forked from jasder/antlr
Merge pull request #1637 from bhamiltoncx/char-streams
Make BaseJavaTest and TestRig work with Unicode values > U+FFFF
This commit is contained in:
commit
ae5250685d
|
@ -918,11 +918,12 @@ public class BaseJavaTest implements RuntimeTestSupport {
|
|||
"import org.antlr.v4.runtime.*;\n" +
|
||||
"import org.antlr.v4.runtime.tree.*;\n" +
|
||||
"import org.antlr.v4.runtime.atn.*;\n" +
|
||||
"import java.nio.file.Paths;\n"+
|
||||
"import java.util.Arrays;\n"+
|
||||
"\n" +
|
||||
"public class Test {\n" +
|
||||
" public static void main(String[] args) throws Exception {\n" +
|
||||
" CharStream input = new ANTLRFileStream(args[0]);\n" +
|
||||
" CharStream input = CharStreams.createWithUTF8(Paths.get(args[0]));\n" +
|
||||
" <lexerName> lex = new <lexerName>(input);\n" +
|
||||
" CommonTokenStream tokens = new CommonTokenStream(lex);\n" +
|
||||
" <createParser>\n"+
|
||||
|
@ -974,11 +975,12 @@ public class BaseJavaTest implements RuntimeTestSupport {
|
|||
|
||||
protected void writeLexerTestFile(String lexerName, boolean showDFA) {
|
||||
ST outputFileST = new ST(
|
||||
"import java.nio.file.Paths;\n" +
|
||||
"import org.antlr.v4.runtime.*;\n" +
|
||||
"\n" +
|
||||
"public class Test {\n" +
|
||||
" public static void main(String[] args) throws Exception {\n" +
|
||||
" CharStream input = new ANTLRFileStream(args[0]);\n" +
|
||||
" CharStream input = CharStreams.createWithUTF8(Paths.get(args[0]));\n" +
|
||||
" <lexerName> lex = new <lexerName>(input);\n" +
|
||||
" CommonTokenStream tokens = new CommonTokenStream(lex);\n" +
|
||||
" tokens.fill();\n" +
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
||||
* Use of this file is governed by the BSD 3-clause license that
|
||||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
package org.antlr.v4.test.runtime.java;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import java.nio.channels.SeekableByteChannel;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.antlr.v4.runtime.CharStreams;
|
||||
import org.antlr.v4.runtime.CodePointCharStream;
|
||||
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.ExpectedException;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
|
||||
public class TestCharStreams {
|
||||
@Rule
|
||||
public TemporaryFolder folder = new TemporaryFolder();
|
||||
|
||||
@Rule
|
||||
public ExpectedException thrown = ExpectedException.none();
|
||||
|
||||
@Test
|
||||
public void createWithBMPStringHasExpectedSize() {
|
||||
CodePointCharStream s = CharStreams.createWithString("hello");
|
||||
assertEquals(5, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello", s.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithSMPStringHasExpectedSize() {
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
"hello \uD83C\uDF0E");
|
||||
assertEquals(7, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithBMPUTF8PathHasExpectedSize() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
||||
CodePointCharStream s = CharStreams.createWithUTF8(p);
|
||||
assertEquals(5, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello", s.toString());
|
||||
assertEquals(p.toString(), s.getSourceName());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithSMPUTF8PathHasExpectedSize() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||
CodePointCharStream s = CharStreams.createWithUTF8(p);
|
||||
assertEquals(7, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||
assertEquals(p.toString(), s.getSourceName());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithBMPUTF8InputStreamHasExpectedSize() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
||||
try (InputStream is = Files.newInputStream(p)) {
|
||||
CodePointCharStream s = CharStreams.createWithUTF8Stream(is);
|
||||
assertEquals(5, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello", s.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithSMPUTF8InputStreamHasExpectedSize() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||
try (InputStream is = Files.newInputStream(p)) {
|
||||
CodePointCharStream s = CharStreams.createWithUTF8Stream(is);
|
||||
assertEquals(7, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithBMPUTF8ChannelHasExpectedSize() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
||||
c, 4096, CodingErrorAction.REPLACE, "foo");
|
||||
assertEquals(5, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello", s.toString());
|
||||
assertEquals("foo", s.getSourceName());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithSMPUTF8ChannelHasExpectedSize() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
||||
c, 4096, CodingErrorAction.REPLACE, "foo");
|
||||
assertEquals(7, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||
assertEquals("foo", s.getSourceName());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithInvalidUTF8BytesChannelReplacesWithSubstCharInReplaceMode()
|
||||
throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
byte[] toWrite = new byte[] { (byte)0xCA, (byte)0xFE, (byte)0xFE, (byte)0xED };
|
||||
Files.write(p, toWrite);
|
||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
||||
c, 4096, CodingErrorAction.REPLACE, "foo");
|
||||
assertEquals(3, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("\uFFFD\uFFFD\uFFFD", s.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithInvalidUTF8BytesThrowsInReportMode() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
byte[] toWrite = new byte[] { (byte)0xCA, (byte)0xFE };
|
||||
Files.write(p, toWrite);
|
||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||
thrown.expect(CharacterCodingException.class);
|
||||
CharStreams.createWithUTF8Channel(c, 4096, CodingErrorAction.REPORT, "foo");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createWithSMPUTF8SequenceStraddlingBufferBoundary() throws Exception {
|
||||
Path p = folder.newFile().toPath();
|
||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
||||
c,
|
||||
// Note this buffer size ensures the SMP code point
|
||||
// straddles the boundary of two buffers
|
||||
8,
|
||||
CodingErrorAction.REPLACE,
|
||||
"foo");
|
||||
assertEquals(7, s.size());
|
||||
assertEquals(0, s.index());
|
||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -10,6 +10,7 @@ import static org.junit.Assert.assertEquals;
|
|||
import java.io.IOException;
|
||||
import java.nio.IntBuffer;
|
||||
|
||||
import org.antlr.v4.runtime.CharStreams;
|
||||
import org.antlr.v4.runtime.CodePointCharStream;
|
||||
import org.antlr.v4.runtime.IntStream;
|
||||
|
||||
|
@ -25,21 +26,21 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void emptyBytesHasSize0() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("");
|
||||
CodePointCharStream s = CharStreams.createWithString("");
|
||||
assertEquals(0, s.size());
|
||||
assertEquals(0, s.index());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void emptyBytesLookAheadReturnsEOF() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("");
|
||||
CodePointCharStream s = CharStreams.createWithString("");
|
||||
assertEquals(IntStream.EOF, s.LA(1));
|
||||
assertEquals(0, s.index());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void consumingEmptyStreamShouldThrow() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("");
|
||||
CodePointCharStream s = CharStreams.createWithString("");
|
||||
thrown.expect(IllegalStateException.class);
|
||||
thrown.expectMessage("cannot consume EOF");
|
||||
s.consume();
|
||||
|
@ -47,13 +48,13 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void singleLatinCodePointHasSize1() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("X");
|
||||
CodePointCharStream s = CharStreams.createWithString("X");
|
||||
assertEquals(1, s.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void consumingSingleLatinCodePointShouldMoveIndex() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("X");
|
||||
CodePointCharStream s = CharStreams.createWithString("X");
|
||||
assertEquals(0, s.index());
|
||||
s.consume();
|
||||
assertEquals(1, s.index());
|
||||
|
@ -61,7 +62,7 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void consumingPastSingleLatinCodePointShouldThrow() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("X");
|
||||
CodePointCharStream s = CharStreams.createWithString("X");
|
||||
s.consume();
|
||||
thrown.expect(IllegalStateException.class);
|
||||
thrown.expectMessage("cannot consume EOF");
|
||||
|
@ -70,14 +71,14 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void singleLatinCodePointLookAheadShouldReturnCodePoint() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("X");
|
||||
CodePointCharStream s = CharStreams.createWithString("X");
|
||||
assertEquals('X', s.LA(1));
|
||||
assertEquals(0, s.index());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void multipleLatinCodePointsLookAheadShouldReturnCodePoints() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("XYZ");
|
||||
CodePointCharStream s = CharStreams.createWithString("XYZ");
|
||||
assertEquals('X', s.LA(1));
|
||||
assertEquals(0, s.index());
|
||||
assertEquals('Y', s.LA(2));
|
||||
|
@ -88,20 +89,20 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void singleLatinCodePointLookAheadPastEndShouldReturnEOF() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("X");
|
||||
CodePointCharStream s = CharStreams.createWithString("X");
|
||||
assertEquals(IntStream.EOF, s.LA(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleCJKCodePointHasSize1() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("\u611B");
|
||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
||||
assertEquals(1, s.size());
|
||||
assertEquals(0, s.index());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void consumingSingleCJKCodePointShouldMoveIndex() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("\u611B");
|
||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
||||
assertEquals(0, s.index());
|
||||
s.consume();
|
||||
assertEquals(1, s.index());
|
||||
|
@ -109,7 +110,7 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void consumingPastSingleCJKCodePointShouldThrow() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("\u611B");
|
||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
||||
s.consume();
|
||||
thrown.expect(IllegalStateException.class);
|
||||
thrown.expectMessage("cannot consume EOF");
|
||||
|
@ -118,21 +119,21 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void singleCJKCodePointLookAheadShouldReturnCodePoint() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("\u611B");
|
||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
||||
assertEquals(0x611B, s.LA(1));
|
||||
assertEquals(0, s.index());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleCJKCodePointLookAheadPastEndShouldReturnEOF() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("\u611B");
|
||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
||||
assertEquals(IntStream.EOF, s.LA(2));
|
||||
assertEquals(0, s.index());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleEmojiCodePointHasSize1() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||
assertEquals(1, s.size());
|
||||
assertEquals(0, s.index());
|
||||
|
@ -140,7 +141,7 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void consumingSingleEmojiCodePointShouldMoveIndex() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||
assertEquals(0, s.index());
|
||||
s.consume();
|
||||
|
@ -149,7 +150,7 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void consumingPastEndOfEmojiCodePointWithShouldThrow() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||
assertEquals(0, s.index());
|
||||
s.consume();
|
||||
|
@ -161,7 +162,7 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void singleEmojiCodePointLookAheadShouldReturnCodePoint() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||
assertEquals(0x1F4A9, s.LA(1));
|
||||
assertEquals(0, s.index());
|
||||
|
@ -169,7 +170,7 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void singleEmojiCodePointLookAheadPastEndShouldReturnEOF() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||
assertEquals(IntStream.EOF, s.LA(2));
|
||||
assertEquals(0, s.index());
|
||||
|
@ -177,19 +178,19 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void getTextWithLatin() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("0123456789");
|
||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
||||
assertEquals("34567", s.getText(Interval.of(3, 7)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getTextWithCJK() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("01234\u40946789");
|
||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
||||
assertEquals("34\u409467", s.getText(Interval.of(3, 7)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getTextWithEmoji() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder("01234")
|
||||
.appendCodePoint(0x1F522)
|
||||
.append("6789")
|
||||
|
@ -199,19 +200,19 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void toStringWithLatin() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("0123456789");
|
||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
||||
assertEquals("0123456789", s.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void toStringWithCJK() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("01234\u40946789");
|
||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
||||
assertEquals("01234\u40946789", s.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void toStringWithEmoji() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder("01234")
|
||||
.appendCodePoint(0x1F522)
|
||||
.append("6789")
|
||||
|
@ -221,19 +222,19 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void lookAheadWithLatin() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("0123456789");
|
||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
||||
assertEquals('5', s.LA(6));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lookAheadWithCJK() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("01234\u40946789");
|
||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
||||
assertEquals(0x4094, s.LA(6));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lookAheadWithEmoji() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder("01234")
|
||||
.appendCodePoint(0x1F522)
|
||||
.append("6789")
|
||||
|
@ -243,21 +244,21 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void seekWithLatin() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("0123456789");
|
||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
||||
s.seek(5);
|
||||
assertEquals('5', s.LA(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void seekWithCJK() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("01234\u40946789");
|
||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
||||
s.seek(5);
|
||||
assertEquals(0x4094, s.LA(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void seekWithEmoji() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder("01234")
|
||||
.appendCodePoint(0x1F522)
|
||||
.append("6789")
|
||||
|
@ -268,21 +269,21 @@ public class TestCodePointCharStream {
|
|||
|
||||
@Test
|
||||
public void lookBehindWithLatin() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("0123456789");
|
||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
||||
s.seek(6);
|
||||
assertEquals('5', s.LA(-1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lookBehindWithCJK() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString("01234\u40946789");
|
||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
||||
s.seek(6);
|
||||
assertEquals(0x4094, s.LA(-1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lookBehindWithEmoji() {
|
||||
CodePointCharStream s = CodePointCharStream.createWithString(
|
||||
CodePointCharStream s = CharStreams.createWithString(
|
||||
new StringBuilder("01234")
|
||||
.appendCodePoint(0x1F522)
|
||||
.append("6789")
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
||||
* Use of this file is governed by the BSD 3-clause license that
|
||||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
package org.antlr.v4.runtime;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.IntBuffer;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.channels.Channels;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.ReadableByteChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* Utility class to create {@link CodePointCharStream}s from
|
||||
* various sources of Unicode data.
|
||||
*/
|
||||
public final class CharStreams {
|
||||
private static final int DEFAULT_BUFFER_SIZE = 4096;
|
||||
|
||||
// Utility class; do not construct.
|
||||
private CharStreams() { }
|
||||
|
||||
/**
|
||||
* Convenience method to create a {@link CodePointCharStream}
|
||||
* for the Unicode code points in a Java {@link String}.
|
||||
*/
|
||||
public static CodePointCharStream createWithString(String s) {
|
||||
// Initial guess assumes no code points > U+FFFF: one code
|
||||
// point for each code unit in the string
|
||||
IntBuffer codePointBuffer = IntBuffer.allocate(s.length());
|
||||
int stringIdx = 0;
|
||||
while (stringIdx < s.length()) {
|
||||
if (!codePointBuffer.hasRemaining()) {
|
||||
// Grow the code point buffer size by 2.
|
||||
IntBuffer newBuffer = IntBuffer.allocate(codePointBuffer.capacity() * 2);
|
||||
codePointBuffer.flip();
|
||||
newBuffer.put(codePointBuffer);
|
||||
codePointBuffer = newBuffer;
|
||||
}
|
||||
int codePoint = Character.codePointAt(s, stringIdx);
|
||||
codePointBuffer.put(codePoint);
|
||||
stringIdx += Character.charCount(codePoint);
|
||||
}
|
||||
codePointBuffer.flip();
|
||||
return new CodePointCharStream(codePointBuffer, IntStream.UNKNOWN_SOURCE_NAME);
|
||||
}
|
||||
|
||||
public static CodePointCharStream createWithUTF8(Path path) throws IOException {
|
||||
try (ReadableByteChannel channel = Files.newByteChannel(path)) {
|
||||
return createWithUTF8Channel(
|
||||
channel,
|
||||
DEFAULT_BUFFER_SIZE,
|
||||
CodingErrorAction.REPLACE,
|
||||
path.toString());
|
||||
}
|
||||
}
|
||||
|
||||
public static CodePointCharStream createWithUTF8Stream(InputStream is) throws IOException {
|
||||
try (ReadableByteChannel channel = Channels.newChannel(is)) {
|
||||
return createWithUTF8Channel(
|
||||
channel,
|
||||
DEFAULT_BUFFER_SIZE,
|
||||
CodingErrorAction.REPLACE,
|
||||
IntStream.UNKNOWN_SOURCE_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
public static CodePointCharStream createWithUTF8Channel(
|
||||
ReadableByteChannel channel,
|
||||
int bufferSize,
|
||||
CodingErrorAction decodingErrorAction,
|
||||
String sourceName
|
||||
) throws IOException {
|
||||
ByteBuffer utf8BytesIn = ByteBuffer.allocateDirect(bufferSize);
|
||||
IntBuffer codePointsOut = IntBuffer.allocate(bufferSize);
|
||||
boolean endOfInput = false;
|
||||
UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(decodingErrorAction);
|
||||
while (!endOfInput) {
|
||||
int bytesRead = channel.read(utf8BytesIn);
|
||||
endOfInput = (bytesRead == -1);
|
||||
utf8BytesIn.flip();
|
||||
codePointsOut = decoder.decodeCodePointsFromBuffer(
|
||||
utf8BytesIn,
|
||||
codePointsOut,
|
||||
endOfInput);
|
||||
utf8BytesIn.compact();
|
||||
}
|
||||
codePointsOut.limit(codePointsOut.position());
|
||||
codePointsOut.flip();
|
||||
return new CodePointCharStream(codePointsOut, sourceName);
|
||||
}
|
||||
}
|
|
@ -23,31 +23,6 @@ public final class CodePointCharStream implements CharStream {
|
|||
private final int size;
|
||||
private final String name;
|
||||
|
||||
/**
|
||||
* Convenience method to create a {@link CodePointCharStream}
|
||||
* for the Unicode code points in a Java {@link String}.
|
||||
*/
|
||||
public static CodePointCharStream createWithString(String s) {
|
||||
// Initial guess assumes no code points > U+FFFF: one code
|
||||
// point for each code unit in the string
|
||||
IntBuffer codePointBuffer = IntBuffer.allocate(s.length());
|
||||
int stringIdx = 0;
|
||||
while (stringIdx < s.length()) {
|
||||
if (!codePointBuffer.hasRemaining()) {
|
||||
// Grow the code point buffer size by 2.
|
||||
IntBuffer newBuffer = IntBuffer.allocate(codePointBuffer.capacity() * 2);
|
||||
codePointBuffer.flip();
|
||||
newBuffer.put(codePointBuffer);
|
||||
codePointBuffer = newBuffer;
|
||||
}
|
||||
int codePoint = Character.codePointAt(s, stringIdx);
|
||||
codePointBuffer.put(codePoint);
|
||||
stringIdx += Character.charCount(codePoint);
|
||||
}
|
||||
codePointBuffer.flip();
|
||||
return new CodePointCharStream(codePointBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a {@link CodePointCharStream} which provides access
|
||||
* to the Unicode code points stored in {@code codePointBuffer}.
|
||||
|
|
|
@ -8,6 +8,7 @@ package org.antlr.v4.gui;
|
|||
|
||||
import org.antlr.v4.runtime.ANTLRInputStream;
|
||||
import org.antlr.v4.runtime.CharStream;
|
||||
import org.antlr.v4.runtime.CharStreams;
|
||||
import org.antlr.v4.runtime.CommonToken;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.DiagnosticErrorListener;
|
||||
|
@ -27,6 +28,9 @@ import java.io.Reader;
|
|||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -154,42 +158,36 @@ public class TestRig {
|
|||
parser = parserCtor.newInstance((TokenStream)null);
|
||||
}
|
||||
|
||||
Charset charset = ( encoding == null ? Charset.defaultCharset () : Charset.forName(encoding) );
|
||||
if ( inputFiles.size()==0 ) {
|
||||
InputStream is = System.in;
|
||||
Reader r;
|
||||
if ( encoding!=null ) {
|
||||
r = new InputStreamReader(is, encoding);
|
||||
CharStream charStream;
|
||||
if ( charset.equals(StandardCharsets.UTF_8)) {
|
||||
charStream = CharStreams.createWithUTF8Stream(System.in);
|
||||
} else {
|
||||
try ( InputStreamReader r = new InputStreamReader(System.in, charset) ) {
|
||||
charStream = new ANTLRInputStream(r);
|
||||
}
|
||||
else {
|
||||
r = new InputStreamReader(is);
|
||||
}
|
||||
|
||||
process(lexer, parserClass, parser, is, r);
|
||||
process(lexer, parserClass, parser, charStream);
|
||||
return;
|
||||
}
|
||||
for (String inputFile : inputFiles) {
|
||||
InputStream is = System.in;
|
||||
if ( inputFile!=null ) {
|
||||
is = new FileInputStream(inputFile);
|
||||
CharStream charStream;
|
||||
if ( charset.equals(StandardCharsets.UTF_8) ) {
|
||||
charStream = CharStreams.createWithUTF8(Paths.get(inputFile));
|
||||
} else {
|
||||
try ( InputStreamReader r = new InputStreamReader(System.in, charset) ) {
|
||||
charStream = new ANTLRInputStream(r);
|
||||
}
|
||||
Reader r;
|
||||
if ( encoding!=null ) {
|
||||
r = new InputStreamReader(is, encoding);
|
||||
}
|
||||
else {
|
||||
r = new InputStreamReader(is);
|
||||
}
|
||||
|
||||
if ( inputFiles.size()>1 ) {
|
||||
System.err.println(inputFile);
|
||||
}
|
||||
process(lexer, parserClass, parser, is, r);
|
||||
process(lexer, parserClass, parser, charStream);
|
||||
}
|
||||
}
|
||||
|
||||
protected void process(Lexer lexer, Class<? extends Parser> parserClass, Parser parser, InputStream is, Reader r) throws IOException, IllegalAccessException, InvocationTargetException, PrintException {
|
||||
try {
|
||||
ANTLRInputStream input = new ANTLRInputStream(r);
|
||||
protected void process(Lexer lexer, Class<? extends Parser> parserClass, Parser parser, CharStream input) throws IOException, IllegalAccessException, InvocationTargetException, PrintException {
|
||||
lexer.setInputStream(input);
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
|
||||
|
@ -242,9 +240,4 @@ public class TestRig {
|
|||
System.err.println("No method for rule "+startRuleName+" or it has arguments");
|
||||
}
|
||||
}
|
||||
finally {
|
||||
if ( r!=null ) r.close();
|
||||
if ( is!=null ) is.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue