Merge pull request #1765 from bhamiltoncx/unicode-cleanup-and-doc
Tidy up CharStreams and add new doc/unicode.md
This commit is contained in:
commit
1f6a329692
|
@ -82,7 +82,7 @@ Make sure to use two-stage parsing. See example in [bug report](https://github.c
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
|
|
||||||
CharStream input = new ANTLRFileStream(args[0]);
|
CharStream input = CharStreams.fromPath(Paths.get(args[0]));
|
||||||
ExprLexer lexer = new ExprLexer(input);
|
ExprLexer lexer = new ExprLexer(input);
|
||||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||||
ExprParser parser = new ExprParser(tokens);
|
ExprParser parser = new ExprParser(tokens);
|
||||||
|
|
|
@ -30,7 +30,7 @@ public static ParseTree parse(String fileName,
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
final Grammar g = Grammar.load(combinedGrammarFileName);
|
final Grammar g = Grammar.load(combinedGrammarFileName);
|
||||||
LexerInterpreter lexEngine = g.createLexerInterpreter(new ANTLRFileStream(fileName));
|
LexerInterpreter lexEngine = g.createLexerInterpreter(CharStreams.fromPath(Paths.get(fileName)));
|
||||||
CommonTokenStream tokens = new CommonTokenStream(lexEngine);
|
CommonTokenStream tokens = new CommonTokenStream(lexEngine);
|
||||||
ParserInterpreter parser = g.createParserInterpreter(tokens);
|
ParserInterpreter parser = g.createParserInterpreter(tokens);
|
||||||
ParseTree t = parser.parse(g.getRule(startRule).index);
|
ParseTree t = parser.parse(g.getRule(startRule).index);
|
||||||
|
@ -58,7 +58,7 @@ public static ParseTree parse(String fileNameToParse,
|
||||||
{
|
{
|
||||||
final LexerGrammar lg = (LexerGrammar) Grammar.load(lexerGrammarFileName);
|
final LexerGrammar lg = (LexerGrammar) Grammar.load(lexerGrammarFileName);
|
||||||
final Grammar pg = Grammar.load(parserGrammarFileName, lg);
|
final Grammar pg = Grammar.load(parserGrammarFileName, lg);
|
||||||
ANTLRFileStream input = new ANTLRFileStream(fileNameToParse);
|
CharStream input = CharStreams.fromPath(Paths.get(fileNameToParse));
|
||||||
LexerInterpreter lexEngine = lg.createLexerInterpreter(input);
|
LexerInterpreter lexEngine = lg.createLexerInterpreter(input);
|
||||||
CommonTokenStream tokens = new CommonTokenStream(lexEngine);
|
CommonTokenStream tokens = new CommonTokenStream(lexEngine);
|
||||||
ParserInterpreter parser = pg.createParserInterpreter(tokens);
|
ParserInterpreter parser = pg.createParserInterpreter(tokens);
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
# Lexers and Unicode text
|
||||||
|
|
||||||
|
Until ANTLR 4.7, generated lexers only supported part of the Unicode standard
|
||||||
|
(code points up to `U+FFFF`).
|
||||||
|
|
||||||
|
With ANTLR 4.7 and later, lexers as well as all languages' runtimes
|
||||||
|
support the full range of Unicode code points up to `U+10FFFF`, as
|
||||||
|
long as the input `CharStream` is opened using `CharStreams.fromPath()`
|
||||||
|
or the equivalent method for your runtime's language.
|
||||||
|
|
||||||
|
# Unicode Code Points in Lexer Grammars
|
||||||
|
|
||||||
|
To refer to Unicode [code points](https://en.wikipedia.org/wiki/Code_point)
|
||||||
|
in lexer grammars, use the `\u` string escape. For example, to create
|
||||||
|
a lexer rule for a single Cyrillic character by creating a range from
|
||||||
|
`U+0400` to `U+04FF`:
|
||||||
|
|
||||||
|
```ANTLR
|
||||||
|
CYRILLIC = ('\u0400'..'\u04FF');
|
||||||
|
```
|
||||||
|
|
||||||
|
Unicode literals larger than U+FFFF must use the extended `\u{12345}` syntax.
|
||||||
|
For example, to create a lexer rule for a selection of smiley faces
|
||||||
|
from the [Emoticons Unicode block](http://www.unicode.org/charts/PDF/U1F600.pdf):
|
||||||
|
|
||||||
|
```ANTLR
|
||||||
|
EMOTICONS = ('\u{1F600}' | '\u{1F602}' | '\u{1F615}');
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, lexer char sets can include Unicode properties:
|
||||||
|
|
||||||
|
```ANTLR
|
||||||
|
EMOJI = [\p{Emoji}];
|
||||||
|
JAPANESE = [\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}];
|
||||||
|
NOT_CYRILLIC = [\P{Script=Cyrillic}];
|
||||||
|
```
|
||||||
|
|
||||||
|
See [lexer-rules.md](lexer-rules.md#lexer-rule-elements) for more detail on Unicode
|
||||||
|
escapes in lexer rules.
|
||||||
|
|
||||||
|
# CharStreams and UTF-8
|
||||||
|
|
||||||
|
If your lexer grammar contains code points larger than `U+FFFF`, your
|
||||||
|
lexer client code must open the file using `CharStreams.fromPath()` or
|
||||||
|
equivalent in your runtime's language, or input values larger than
|
||||||
|
`U+FFFF` will *not* match.
|
||||||
|
|
||||||
|
For backwards compatibility, the existing `ANTLRInputStream` and
|
||||||
|
`ANTLRFileStream` APIs only support Unicode code points up to `U+FFFF`.
|
||||||
|
|
||||||
|
The existing `TestRig` command-line interface supports all Unicode
|
||||||
|
code points.
|
||||||
|
|
||||||
|
# Example
|
||||||
|
|
||||||
|
If you have generated a lexer named `UnicodeLexer`:
|
||||||
|
|
||||||
|
```Java
|
||||||
|
public static void main(String[] args) {
|
||||||
|
CharStream charStream = CharStreams.fromPath(Paths.get(args[0]));
|
||||||
|
Lexer lexer = new UnicodeLexer(charStream);
|
||||||
|
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||||
|
tokens.fill();
|
||||||
|
for (Token token : tokens.getTokens()) {
|
||||||
|
System.out.println("Got token: " + token.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
|
@ -923,7 +923,7 @@ public class BaseJavaTest implements RuntimeTestSupport {
|
||||||
"\n" +
|
"\n" +
|
||||||
"public class Test {\n" +
|
"public class Test {\n" +
|
||||||
" public static void main(String[] args) throws Exception {\n" +
|
" public static void main(String[] args) throws Exception {\n" +
|
||||||
" CharStream input = CharStreams.createWithUTF8(Paths.get(args[0]));\n" +
|
" CharStream input = CharStreams.fromPath(Paths.get(args[0]));\n" +
|
||||||
" <lexerName> lex = new <lexerName>(input);\n" +
|
" <lexerName> lex = new <lexerName>(input);\n" +
|
||||||
" CommonTokenStream tokens = new CommonTokenStream(lex);\n" +
|
" CommonTokenStream tokens = new CommonTokenStream(lex);\n" +
|
||||||
" <createParser>\n"+
|
" <createParser>\n"+
|
||||||
|
@ -980,7 +980,7 @@ public class BaseJavaTest implements RuntimeTestSupport {
|
||||||
"\n" +
|
"\n" +
|
||||||
"public class Test {\n" +
|
"public class Test {\n" +
|
||||||
" public static void main(String[] args) throws Exception {\n" +
|
" public static void main(String[] args) throws Exception {\n" +
|
||||||
" CharStream input = CharStreams.createWithUTF8(Paths.get(args[0]));\n" +
|
" CharStream input = CharStreams.fromPath(Paths.get(args[0]));\n" +
|
||||||
" <lexerName> lex = new <lexerName>(input);\n" +
|
" <lexerName> lex = new <lexerName>(input);\n" +
|
||||||
" CommonTokenStream tokens = new CommonTokenStream(lex);\n" +
|
" CommonTokenStream tokens = new CommonTokenStream(lex);\n" +
|
||||||
" tokens.fill();\n" +
|
" tokens.fill();\n" +
|
||||||
|
|
|
@ -10,6 +10,7 @@ import static org.junit.Assert.assertEquals;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
import java.nio.channels.SeekableByteChannel;
|
import java.nio.channels.SeekableByteChannel;
|
||||||
import java.nio.charset.CharacterCodingException;
|
import java.nio.charset.CharacterCodingException;
|
||||||
|
@ -20,6 +21,7 @@ import java.nio.file.Path;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.CharStream;
|
||||||
import org.antlr.v4.runtime.CharStreams;
|
import org.antlr.v4.runtime.CharStreams;
|
||||||
import org.antlr.v4.runtime.CodePointCharStream;
|
import org.antlr.v4.runtime.CodePointCharStream;
|
||||||
|
|
||||||
|
@ -36,16 +38,16 @@ public class TestCharStreams {
|
||||||
public ExpectedException thrown = ExpectedException.none();
|
public ExpectedException thrown = ExpectedException.none();
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithBMPStringHasExpectedSize() {
|
public void fromBMPStringHasExpectedSize() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("hello");
|
CharStream s = CharStreams.fromString("hello");
|
||||||
assertEquals(5, s.size());
|
assertEquals(5, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
assertEquals("hello", s.toString());
|
assertEquals("hello", s.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithSMPStringHasExpectedSize() {
|
public void fromSMPStringHasExpectedSize() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CharStream s = CharStreams.fromString(
|
||||||
"hello \uD83C\uDF0E");
|
"hello \uD83C\uDF0E");
|
||||||
assertEquals(7, s.size());
|
assertEquals(7, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
|
@ -53,10 +55,10 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithBMPUTF8PathHasExpectedSize() throws Exception {
|
public void fromBMPUTF8PathHasExpectedSize() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8(p);
|
CharStream s = CharStreams.fromPath(p);
|
||||||
assertEquals(5, s.size());
|
assertEquals(5, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
assertEquals("hello", s.toString());
|
assertEquals("hello", s.toString());
|
||||||
|
@ -64,10 +66,10 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithSMPUTF8PathHasExpectedSize() throws Exception {
|
public void fromSMPUTF8PathHasExpectedSize() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8(p);
|
CharStream s = CharStreams.fromPath(p);
|
||||||
assertEquals(7, s.size());
|
assertEquals(7, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
|
@ -75,11 +77,11 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithBMPUTF8InputStreamHasExpectedSize() throws Exception {
|
public void fromBMPUTF8InputStreamHasExpectedSize() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
||||||
try (InputStream is = Files.newInputStream(p)) {
|
try (InputStream is = Files.newInputStream(p)) {
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8Stream(is);
|
CharStream s = CharStreams.fromStream(is);
|
||||||
assertEquals(5, s.size());
|
assertEquals(5, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
assertEquals("hello", s.toString());
|
assertEquals("hello", s.toString());
|
||||||
|
@ -87,11 +89,11 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithSMPUTF8InputStreamHasExpectedSize() throws Exception {
|
public void fromSMPUTF8InputStreamHasExpectedSize() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||||
try (InputStream is = Files.newInputStream(p)) {
|
try (InputStream is = Files.newInputStream(p)) {
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8Stream(is);
|
CharStream s = CharStreams.fromStream(is);
|
||||||
assertEquals(7, s.size());
|
assertEquals(7, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
|
@ -99,11 +101,11 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithBMPUTF8ChannelHasExpectedSize() throws Exception {
|
public void fromBMPUTF8ChannelHasExpectedSize() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
Files.write(p, "hello".getBytes(StandardCharsets.UTF_8));
|
||||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
CharStream s = CharStreams.fromChannel(
|
||||||
c, 4096, CodingErrorAction.REPLACE, "foo");
|
c, 4096, CodingErrorAction.REPLACE, "foo");
|
||||||
assertEquals(5, s.size());
|
assertEquals(5, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
|
@ -113,11 +115,11 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithSMPUTF8ChannelHasExpectedSize() throws Exception {
|
public void fromSMPUTF8ChannelHasExpectedSize() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
CharStream s = CharStreams.fromChannel(
|
||||||
c, 4096, CodingErrorAction.REPLACE, "foo");
|
c, 4096, CodingErrorAction.REPLACE, "foo");
|
||||||
assertEquals(7, s.size());
|
assertEquals(7, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
|
@ -127,13 +129,13 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithInvalidUTF8BytesChannelReplacesWithSubstCharInReplaceMode()
|
public void fromInvalidUTF8BytesChannelReplacesWithSubstCharInReplaceMode()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
byte[] toWrite = new byte[] { (byte)0xCA, (byte)0xFE, (byte)0xFE, (byte)0xED };
|
byte[] toWrite = new byte[] { (byte)0xCA, (byte)0xFE, (byte)0xFE, (byte)0xED };
|
||||||
Files.write(p, toWrite);
|
Files.write(p, toWrite);
|
||||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
CharStream s = CharStreams.fromChannel(
|
||||||
c, 4096, CodingErrorAction.REPLACE, "foo");
|
c, 4096, CodingErrorAction.REPLACE, "foo");
|
||||||
assertEquals(3, s.size());
|
assertEquals(3, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
|
@ -142,22 +144,22 @@ public class TestCharStreams {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithInvalidUTF8BytesThrowsInReportMode() throws Exception {
|
public void fromInvalidUTF8BytesThrowsInReportMode() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
byte[] toWrite = new byte[] { (byte)0xCA, (byte)0xFE };
|
byte[] toWrite = new byte[] { (byte)0xCA, (byte)0xFE };
|
||||||
Files.write(p, toWrite);
|
Files.write(p, toWrite);
|
||||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||||
thrown.expect(CharacterCodingException.class);
|
thrown.expect(CharacterCodingException.class);
|
||||||
CharStreams.createWithUTF8Channel(c, 4096, CodingErrorAction.REPORT, "foo");
|
CharStreams.fromChannel(c, 4096, CodingErrorAction.REPORT, "foo");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void createWithSMPUTF8SequenceStraddlingBufferBoundary() throws Exception {
|
public void fromSMPUTF8SequenceStraddlingBufferBoundary() throws Exception {
|
||||||
Path p = folder.newFile().toPath();
|
Path p = folder.newFile().toPath();
|
||||||
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||||
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
try (SeekableByteChannel c = Files.newByteChannel(p)) {
|
||||||
CodePointCharStream s = CharStreams.createWithUTF8Channel(
|
CharStream s = CharStreams.fromChannel(
|
||||||
c,
|
c,
|
||||||
// Note this buffer size ensures the SMP code point
|
// Note this buffer size ensures the SMP code point
|
||||||
// straddles the boundary of two buffers
|
// straddles the boundary of two buffers
|
||||||
|
@ -169,4 +171,40 @@ public class TestCharStreams {
|
||||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void fromFileName() throws Exception {
|
||||||
|
Path p = folder.newFile().toPath();
|
||||||
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||||
|
CharStream s = CharStreams.fromFileName(p.toString());
|
||||||
|
assertEquals(7, s.size());
|
||||||
|
assertEquals(0, s.index());
|
||||||
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
|
assertEquals(p.toString(), s.getSourceName());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void fromFileNameWithLatin1() throws Exception {
|
||||||
|
Path p = folder.newFile().toPath();
|
||||||
|
Files.write(p, "hello \u00CA\u00FE".getBytes(StandardCharsets.ISO_8859_1));
|
||||||
|
CharStream s = CharStreams.fromFileName(p.toString(), StandardCharsets.ISO_8859_1);
|
||||||
|
assertEquals(8, s.size());
|
||||||
|
assertEquals(0, s.index());
|
||||||
|
assertEquals("hello \u00CA\u00FE", s.toString());
|
||||||
|
assertEquals(p.toString(), s.getSourceName());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void fromReader() throws Exception {
|
||||||
|
Path p = folder.newFile().toPath();
|
||||||
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_8));
|
||||||
|
try (Reader r = Files.newBufferedReader(p, StandardCharsets.UTF_8)) {
|
||||||
|
CharStream s = CharStreams.fromReader(r);
|
||||||
|
assertEquals(7, s.size());
|
||||||
|
assertEquals(0, s.index());
|
||||||
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,21 +26,21 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void emptyBytesHasSize0() {
|
public void emptyBytesHasSize0() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("");
|
CodePointCharStream s = CharStreams.fromString("");
|
||||||
assertEquals(0, s.size());
|
assertEquals(0, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void emptyBytesLookAheadReturnsEOF() {
|
public void emptyBytesLookAheadReturnsEOF() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("");
|
CodePointCharStream s = CharStreams.fromString("");
|
||||||
assertEquals(IntStream.EOF, s.LA(1));
|
assertEquals(IntStream.EOF, s.LA(1));
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void consumingEmptyStreamShouldThrow() {
|
public void consumingEmptyStreamShouldThrow() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("");
|
CodePointCharStream s = CharStreams.fromString("");
|
||||||
thrown.expect(IllegalStateException.class);
|
thrown.expect(IllegalStateException.class);
|
||||||
thrown.expectMessage("cannot consume EOF");
|
thrown.expectMessage("cannot consume EOF");
|
||||||
s.consume();
|
s.consume();
|
||||||
|
@ -48,13 +48,13 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleLatinCodePointHasSize1() {
|
public void singleLatinCodePointHasSize1() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("X");
|
CodePointCharStream s = CharStreams.fromString("X");
|
||||||
assertEquals(1, s.size());
|
assertEquals(1, s.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void consumingSingleLatinCodePointShouldMoveIndex() {
|
public void consumingSingleLatinCodePointShouldMoveIndex() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("X");
|
CodePointCharStream s = CharStreams.fromString("X");
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
s.consume();
|
s.consume();
|
||||||
assertEquals(1, s.index());
|
assertEquals(1, s.index());
|
||||||
|
@ -62,7 +62,7 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void consumingPastSingleLatinCodePointShouldThrow() {
|
public void consumingPastSingleLatinCodePointShouldThrow() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("X");
|
CodePointCharStream s = CharStreams.fromString("X");
|
||||||
s.consume();
|
s.consume();
|
||||||
thrown.expect(IllegalStateException.class);
|
thrown.expect(IllegalStateException.class);
|
||||||
thrown.expectMessage("cannot consume EOF");
|
thrown.expectMessage("cannot consume EOF");
|
||||||
|
@ -71,14 +71,14 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleLatinCodePointLookAheadShouldReturnCodePoint() {
|
public void singleLatinCodePointLookAheadShouldReturnCodePoint() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("X");
|
CodePointCharStream s = CharStreams.fromString("X");
|
||||||
assertEquals('X', s.LA(1));
|
assertEquals('X', s.LA(1));
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void multipleLatinCodePointsLookAheadShouldReturnCodePoints() {
|
public void multipleLatinCodePointsLookAheadShouldReturnCodePoints() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("XYZ");
|
CodePointCharStream s = CharStreams.fromString("XYZ");
|
||||||
assertEquals('X', s.LA(1));
|
assertEquals('X', s.LA(1));
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
assertEquals('Y', s.LA(2));
|
assertEquals('Y', s.LA(2));
|
||||||
|
@ -89,20 +89,20 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleLatinCodePointLookAheadPastEndShouldReturnEOF() {
|
public void singleLatinCodePointLookAheadPastEndShouldReturnEOF() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("X");
|
CodePointCharStream s = CharStreams.fromString("X");
|
||||||
assertEquals(IntStream.EOF, s.LA(2));
|
assertEquals(IntStream.EOF, s.LA(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleCJKCodePointHasSize1() {
|
public void singleCJKCodePointHasSize1() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
CodePointCharStream s = CharStreams.fromString("\u611B");
|
||||||
assertEquals(1, s.size());
|
assertEquals(1, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void consumingSingleCJKCodePointShouldMoveIndex() {
|
public void consumingSingleCJKCodePointShouldMoveIndex() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
CodePointCharStream s = CharStreams.fromString("\u611B");
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
s.consume();
|
s.consume();
|
||||||
assertEquals(1, s.index());
|
assertEquals(1, s.index());
|
||||||
|
@ -110,7 +110,7 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void consumingPastSingleCJKCodePointShouldThrow() {
|
public void consumingPastSingleCJKCodePointShouldThrow() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
CodePointCharStream s = CharStreams.fromString("\u611B");
|
||||||
s.consume();
|
s.consume();
|
||||||
thrown.expect(IllegalStateException.class);
|
thrown.expect(IllegalStateException.class);
|
||||||
thrown.expectMessage("cannot consume EOF");
|
thrown.expectMessage("cannot consume EOF");
|
||||||
|
@ -119,21 +119,21 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleCJKCodePointLookAheadShouldReturnCodePoint() {
|
public void singleCJKCodePointLookAheadShouldReturnCodePoint() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
CodePointCharStream s = CharStreams.fromString("\u611B");
|
||||||
assertEquals(0x611B, s.LA(1));
|
assertEquals(0x611B, s.LA(1));
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleCJKCodePointLookAheadPastEndShouldReturnEOF() {
|
public void singleCJKCodePointLookAheadPastEndShouldReturnEOF() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("\u611B");
|
CodePointCharStream s = CharStreams.fromString("\u611B");
|
||||||
assertEquals(IntStream.EOF, s.LA(2));
|
assertEquals(IntStream.EOF, s.LA(2));
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleEmojiCodePointHasSize1() {
|
public void singleEmojiCodePointHasSize1() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||||
assertEquals(1, s.size());
|
assertEquals(1, s.size());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
|
@ -141,7 +141,7 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void consumingSingleEmojiCodePointShouldMoveIndex() {
|
public void consumingSingleEmojiCodePointShouldMoveIndex() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
s.consume();
|
s.consume();
|
||||||
|
@ -150,7 +150,7 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void consumingPastEndOfEmojiCodePointWithShouldThrow() {
|
public void consumingPastEndOfEmojiCodePointWithShouldThrow() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
s.consume();
|
s.consume();
|
||||||
|
@ -162,7 +162,7 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleEmojiCodePointLookAheadShouldReturnCodePoint() {
|
public void singleEmojiCodePointLookAheadShouldReturnCodePoint() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||||
assertEquals(0x1F4A9, s.LA(1));
|
assertEquals(0x1F4A9, s.LA(1));
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
|
@ -170,7 +170,7 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleEmojiCodePointLookAheadPastEndShouldReturnEOF() {
|
public void singleEmojiCodePointLookAheadPastEndShouldReturnEOF() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
new StringBuilder().appendCodePoint(0x1F4A9).toString());
|
||||||
assertEquals(IntStream.EOF, s.LA(2));
|
assertEquals(IntStream.EOF, s.LA(2));
|
||||||
assertEquals(0, s.index());
|
assertEquals(0, s.index());
|
||||||
|
@ -178,19 +178,19 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getTextWithLatin() {
|
public void getTextWithLatin() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
CodePointCharStream s = CharStreams.fromString("0123456789");
|
||||||
assertEquals("34567", s.getText(Interval.of(3, 7)));
|
assertEquals("34567", s.getText(Interval.of(3, 7)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getTextWithCJK() {
|
public void getTextWithCJK() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
CodePointCharStream s = CharStreams.fromString("01234\u40946789");
|
||||||
assertEquals("34\u409467", s.getText(Interval.of(3, 7)));
|
assertEquals("34\u409467", s.getText(Interval.of(3, 7)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getTextWithEmoji() {
|
public void getTextWithEmoji() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder("01234")
|
new StringBuilder("01234")
|
||||||
.appendCodePoint(0x1F522)
|
.appendCodePoint(0x1F522)
|
||||||
.append("6789")
|
.append("6789")
|
||||||
|
@ -200,19 +200,19 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void toStringWithLatin() {
|
public void toStringWithLatin() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
CodePointCharStream s = CharStreams.fromString("0123456789");
|
||||||
assertEquals("0123456789", s.toString());
|
assertEquals("0123456789", s.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void toStringWithCJK() {
|
public void toStringWithCJK() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
CodePointCharStream s = CharStreams.fromString("01234\u40946789");
|
||||||
assertEquals("01234\u40946789", s.toString());
|
assertEquals("01234\u40946789", s.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void toStringWithEmoji() {
|
public void toStringWithEmoji() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder("01234")
|
new StringBuilder("01234")
|
||||||
.appendCodePoint(0x1F522)
|
.appendCodePoint(0x1F522)
|
||||||
.append("6789")
|
.append("6789")
|
||||||
|
@ -222,19 +222,19 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void lookAheadWithLatin() {
|
public void lookAheadWithLatin() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
CodePointCharStream s = CharStreams.fromString("0123456789");
|
||||||
assertEquals('5', s.LA(6));
|
assertEquals('5', s.LA(6));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void lookAheadWithCJK() {
|
public void lookAheadWithCJK() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
CodePointCharStream s = CharStreams.fromString("01234\u40946789");
|
||||||
assertEquals(0x4094, s.LA(6));
|
assertEquals(0x4094, s.LA(6));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void lookAheadWithEmoji() {
|
public void lookAheadWithEmoji() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder("01234")
|
new StringBuilder("01234")
|
||||||
.appendCodePoint(0x1F522)
|
.appendCodePoint(0x1F522)
|
||||||
.append("6789")
|
.append("6789")
|
||||||
|
@ -244,21 +244,21 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void seekWithLatin() {
|
public void seekWithLatin() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
CodePointCharStream s = CharStreams.fromString("0123456789");
|
||||||
s.seek(5);
|
s.seek(5);
|
||||||
assertEquals('5', s.LA(1));
|
assertEquals('5', s.LA(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void seekWithCJK() {
|
public void seekWithCJK() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
CodePointCharStream s = CharStreams.fromString("01234\u40946789");
|
||||||
s.seek(5);
|
s.seek(5);
|
||||||
assertEquals(0x4094, s.LA(1));
|
assertEquals(0x4094, s.LA(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void seekWithEmoji() {
|
public void seekWithEmoji() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder("01234")
|
new StringBuilder("01234")
|
||||||
.appendCodePoint(0x1F522)
|
.appendCodePoint(0x1F522)
|
||||||
.append("6789")
|
.append("6789")
|
||||||
|
@ -269,21 +269,21 @@ public class TestCodePointCharStream {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void lookBehindWithLatin() {
|
public void lookBehindWithLatin() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("0123456789");
|
CodePointCharStream s = CharStreams.fromString("0123456789");
|
||||||
s.seek(6);
|
s.seek(6);
|
||||||
assertEquals('5', s.LA(-1));
|
assertEquals('5', s.LA(-1));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void lookBehindWithCJK() {
|
public void lookBehindWithCJK() {
|
||||||
CodePointCharStream s = CharStreams.createWithString("01234\u40946789");
|
CodePointCharStream s = CharStreams.fromString("01234\u40946789");
|
||||||
s.seek(6);
|
s.seek(6);
|
||||||
assertEquals(0x4094, s.LA(-1));
|
assertEquals(0x4094, s.LA(-1));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void lookBehindWithEmoji() {
|
public void lookBehindWithEmoji() {
|
||||||
CodePointCharStream s = CharStreams.createWithString(
|
CodePointCharStream s = CharStreams.fromString(
|
||||||
new StringBuilder("01234")
|
new StringBuilder("01234")
|
||||||
.appendCodePoint(0x1F522)
|
.appendCodePoint(0x1F522)
|
||||||
.append("6789")
|
.append("6789")
|
||||||
|
|
|
@ -7,19 +7,27 @@ package org.antlr.v4.runtime;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.IntBuffer;
|
import java.nio.IntBuffer;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.nio.charset.CodingErrorAction;
|
import java.nio.charset.CodingErrorAction;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.channels.Channels;
|
import java.nio.channels.Channels;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.nio.channels.ReadableByteChannel;
|
import java.nio.channels.ReadableByteChannel;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class to create {@link CodePointCharStream}s from
|
* Utility class to create {@link CharStream}s from various sources of
|
||||||
* various sources of Unicode data.
|
* string data.
|
||||||
|
*
|
||||||
|
* Main entry points are the factory methods {@code CharStreams.fromPath()},
|
||||||
|
* {@code CharStreams.fromString()}, etc.
|
||||||
*/
|
*/
|
||||||
public final class CharStreams {
|
public final class CharStreams {
|
||||||
private static final int DEFAULT_BUFFER_SIZE = 4096;
|
private static final int DEFAULT_BUFFER_SIZE = 4096;
|
||||||
|
@ -28,14 +36,215 @@ public final class CharStreams {
|
||||||
private CharStreams() { }
|
private CharStreams() { }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience method to create a {@link CodePointCharStream}
|
* Creates a {@link CharStream} given a path to a UTF-8
|
||||||
* for the Unicode code points in a Java {@link String}.
|
* encoded file on disk.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the file into the result before returning.
|
||||||
*/
|
*/
|
||||||
public static CodePointCharStream createWithString(String s) {
|
public static CharStream fromPath(Path path) throws IOException {
|
||||||
return createWithString(s, IntStream.UNKNOWN_SOURCE_NAME);
|
return fromPath(path, StandardCharsets.UTF_8);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CodePointCharStream createWithString(String s, String sourceName) {
|
/**
|
||||||
|
* Creates a {@link CharStream} given a path to a file on disk and the
|
||||||
|
* charset of the bytes contained in the file.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the file into the result before returning.
|
||||||
|
*
|
||||||
|
* For sources encoded in UTF-8, supports the full Unicode code point
|
||||||
|
* range.
|
||||||
|
*
|
||||||
|
* For other sources, only supports Unicode code points up to U+FFFF.
|
||||||
|
*/
|
||||||
|
public static CharStream fromPath(Path path, Charset charset) throws IOException {
|
||||||
|
if (charset.equals(StandardCharsets.UTF_8)) {
|
||||||
|
try (ReadableByteChannel channel = Files.newByteChannel(path)) {
|
||||||
|
return fromChannel(
|
||||||
|
channel,
|
||||||
|
DEFAULT_BUFFER_SIZE,
|
||||||
|
CodingErrorAction.REPLACE,
|
||||||
|
path.toString());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return new ANTLRFileStream(path.toString(), charset.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given a string containing a
|
||||||
|
* path to a UTF-8 file on disk.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the file into the result before returning.
|
||||||
|
*/
|
||||||
|
public static CharStream fromFileName(String fileName) throws IOException {
|
||||||
|
return fromPath(Paths.get(fileName), StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given a string containing a
|
||||||
|
* path to a file on disk and the charset of the bytes
|
||||||
|
* contained in the file.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the file into the result before returning.
|
||||||
|
*
|
||||||
|
* For sources encoded in UTF-8, supports the full Unicode code point
|
||||||
|
* range.
|
||||||
|
*
|
||||||
|
* For other sources, only supports Unicode code points up to U+FFFF.
|
||||||
|
*/
|
||||||
|
public static CharStream fromFileName(String fileName, Charset charset) throws IOException {
|
||||||
|
return fromPath(Paths.get(fileName), charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given an opened {@link InputStream}
|
||||||
|
* containing UTF-8 bytes.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the {@code InputStream} into
|
||||||
|
* the result before returning, then closes the {@code InputStream}.
|
||||||
|
*/
|
||||||
|
public static CharStream fromStream(InputStream is) throws IOException {
|
||||||
|
return fromStream(is, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given an opened {@link InputStream} and the
|
||||||
|
* charset of the bytes contained in the stream.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the {@code InputStream} into
|
||||||
|
* the result before returning, then closes the {@code InputStream}.
|
||||||
|
*
|
||||||
|
* For sources encoded in UTF-8, supports the full Unicode code point
|
||||||
|
* range.
|
||||||
|
*
|
||||||
|
* For other sources, only supports Unicode code points up to U+FFFF.
|
||||||
|
*/
|
||||||
|
public static CharStream fromStream(InputStream is, Charset charset) throws IOException {
|
||||||
|
if (charset.equals(StandardCharsets.UTF_8)) {
|
||||||
|
try (ReadableByteChannel channel = Channels.newChannel(is)) {
|
||||||
|
return fromChannel(
|
||||||
|
channel,
|
||||||
|
DEFAULT_BUFFER_SIZE,
|
||||||
|
CodingErrorAction.REPLACE,
|
||||||
|
IntStream.UNKNOWN_SOURCE_NAME);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try (InputStreamReader isr = new InputStreamReader(is, charset)) {
|
||||||
|
return new ANTLRInputStream(isr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given an opened {@link ReadableByteChannel}
|
||||||
|
* containing UTF-8 bytes.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the {@code channel} into
|
||||||
|
* the result before returning, then closes the {@code channel}.
|
||||||
|
*/
|
||||||
|
public static CharStream fromChannel(ReadableByteChannel channel) throws IOException {
|
||||||
|
return fromChannel(channel, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given an opened {@link ReadableByteChannel} and the
|
||||||
|
* charset of the bytes contained in the channel.
|
||||||
|
*
|
||||||
|
* Reads the entire contents of the {@code channel} into
|
||||||
|
* the result before returning, then closes the {@code channel}.
|
||||||
|
*
|
||||||
|
* For sources encoded in UTF-8, supports the full Unicode code point
|
||||||
|
* range.
|
||||||
|
*
|
||||||
|
* For other sources, only supports Unicode code points up to U+FFFF.
|
||||||
|
*/
|
||||||
|
public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException {
|
||||||
|
if (charset.equals(StandardCharsets.UTF_8)) {
|
||||||
|
return fromChannel(
|
||||||
|
channel,
|
||||||
|
DEFAULT_BUFFER_SIZE,
|
||||||
|
CodingErrorAction.REPLACE,
|
||||||
|
IntStream.UNKNOWN_SOURCE_NAME);
|
||||||
|
} else {
|
||||||
|
try (InputStream is = Channels.newInputStream(channel);
|
||||||
|
InputStreamReader isr = new InputStreamReader(Channels.newInputStream(channel), charset)) {
|
||||||
|
return new ANTLRInputStream(isr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given a {@link Reader}. Closes
|
||||||
|
* the reader before returning.
|
||||||
|
*/
|
||||||
|
public static CodePointCharStream fromReader(Reader r) throws IOException {
|
||||||
|
return fromReader(r, IntStream.UNKNOWN_SOURCE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given a {@link Reader} and its
|
||||||
|
* source name. Closes the reader before returning.
|
||||||
|
*/
|
||||||
|
public static CodePointCharStream fromReader(Reader r, String sourceName) throws IOException {
|
||||||
|
IntBuffer codePointBuffer = IntBuffer.allocate(DEFAULT_BUFFER_SIZE);
|
||||||
|
int highSurrogate = -1;
|
||||||
|
int curCodeUnit;
|
||||||
|
try {
|
||||||
|
while ((curCodeUnit = r.read()) != -1) {
|
||||||
|
if (!codePointBuffer.hasRemaining()) {
|
||||||
|
// Grow the code point buffer size by 2.
|
||||||
|
IntBuffer newBuffer = IntBuffer.allocate(codePointBuffer.capacity() * 2);
|
||||||
|
codePointBuffer.flip();
|
||||||
|
newBuffer.put(codePointBuffer);
|
||||||
|
codePointBuffer = newBuffer;
|
||||||
|
}
|
||||||
|
if (Character.isHighSurrogate((char) curCodeUnit)) {
|
||||||
|
if (highSurrogate != -1) {
|
||||||
|
// Dangling high surrogate followed by another high surrogate.
|
||||||
|
codePointBuffer.put(highSurrogate);
|
||||||
|
}
|
||||||
|
highSurrogate = curCodeUnit;
|
||||||
|
} else if (Character.isLowSurrogate((char) curCodeUnit)) {
|
||||||
|
if (highSurrogate == -1) {
|
||||||
|
// Low surrogate not preceded by high surrogate.
|
||||||
|
codePointBuffer.put(curCodeUnit);
|
||||||
|
} else {
|
||||||
|
codePointBuffer.put(Character.toCodePoint((char) highSurrogate, (char) curCodeUnit));
|
||||||
|
highSurrogate = -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (highSurrogate != -1) {
|
||||||
|
// Dangling high surrogate followed by a non-surrogate.
|
||||||
|
codePointBuffer.put(highSurrogate);
|
||||||
|
highSurrogate = -1;
|
||||||
|
}
|
||||||
|
codePointBuffer.put(curCodeUnit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (highSurrogate != -1) {
|
||||||
|
// Dangling high surrogate at end of file.
|
||||||
|
codePointBuffer.put(highSurrogate);
|
||||||
|
}
|
||||||
|
codePointBuffer.flip();
|
||||||
|
return new CodePointCharStream(codePointBuffer, sourceName);
|
||||||
|
} finally {
|
||||||
|
r.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given a {@link String}.
|
||||||
|
*/
|
||||||
|
public static CodePointCharStream fromString(String s) {
|
||||||
|
return fromString(s, IntStream.UNKNOWN_SOURCE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link CharStream} given a {@link String} and the {@code sourceName}
|
||||||
|
* from which it came.
|
||||||
|
*/
|
||||||
|
public static CodePointCharStream fromString(String s, String sourceName) {
|
||||||
// Initial guess assumes no code points > U+FFFF: one code
|
// Initial guess assumes no code points > U+FFFF: one code
|
||||||
// point for each code unit in the string
|
// point for each code unit in the string
|
||||||
IntBuffer codePointBuffer = IntBuffer.allocate(s.length());
|
IntBuffer codePointBuffer = IntBuffer.allocate(s.length());
|
||||||
|
@ -56,48 +265,39 @@ public final class CharStreams {
|
||||||
return new CodePointCharStream(codePointBuffer, sourceName);
|
return new CodePointCharStream(codePointBuffer, sourceName);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CodePointCharStream createWithUTF8(Path path) throws IOException {
|
/**
|
||||||
try (ReadableByteChannel channel = Files.newByteChannel(path)) {
|
* Creates a {@link CharStream} given an opened {@link ReadableByteChannel}
|
||||||
return createWithUTF8Channel(
|
* containing UTF-8 bytes.
|
||||||
channel,
|
*
|
||||||
DEFAULT_BUFFER_SIZE,
|
* Reads the entire contents of the {@code channel} into
|
||||||
CodingErrorAction.REPLACE,
|
* the result before returning, then closes the {@code channel}.
|
||||||
path.toString());
|
*/
|
||||||
}
|
public static CodePointCharStream fromChannel(
|
||||||
}
|
|
||||||
|
|
||||||
public static CodePointCharStream createWithUTF8Stream(InputStream is) throws IOException {
|
|
||||||
try (ReadableByteChannel channel = Channels.newChannel(is)) {
|
|
||||||
return createWithUTF8Channel(
|
|
||||||
channel,
|
|
||||||
DEFAULT_BUFFER_SIZE,
|
|
||||||
CodingErrorAction.REPLACE,
|
|
||||||
IntStream.UNKNOWN_SOURCE_NAME);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static CodePointCharStream createWithUTF8Channel(
|
|
||||||
ReadableByteChannel channel,
|
ReadableByteChannel channel,
|
||||||
int bufferSize,
|
int bufferSize,
|
||||||
CodingErrorAction decodingErrorAction,
|
CodingErrorAction decodingErrorAction,
|
||||||
String sourceName
|
String sourceName
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
ByteBuffer utf8BytesIn = ByteBuffer.allocateDirect(bufferSize);
|
try {
|
||||||
IntBuffer codePointsOut = IntBuffer.allocate(bufferSize);
|
ByteBuffer utf8BytesIn = ByteBuffer.allocateDirect(bufferSize);
|
||||||
boolean endOfInput = false;
|
IntBuffer codePointsOut = IntBuffer.allocate(bufferSize);
|
||||||
UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(decodingErrorAction);
|
boolean endOfInput = false;
|
||||||
while (!endOfInput) {
|
UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(decodingErrorAction);
|
||||||
int bytesRead = channel.read(utf8BytesIn);
|
while (!endOfInput) {
|
||||||
endOfInput = (bytesRead == -1);
|
int bytesRead = channel.read(utf8BytesIn);
|
||||||
utf8BytesIn.flip();
|
endOfInput = (bytesRead == -1);
|
||||||
codePointsOut = decoder.decodeCodePointsFromBuffer(
|
utf8BytesIn.flip();
|
||||||
utf8BytesIn,
|
codePointsOut = decoder.decodeCodePointsFromBuffer(
|
||||||
codePointsOut,
|
utf8BytesIn,
|
||||||
endOfInput);
|
codePointsOut,
|
||||||
utf8BytesIn.compact();
|
endOfInput);
|
||||||
|
utf8BytesIn.compact();
|
||||||
|
}
|
||||||
|
codePointsOut.limit(codePointsOut.position());
|
||||||
|
codePointsOut.flip();
|
||||||
|
return new CodePointCharStream(codePointsOut, sourceName);
|
||||||
|
} finally {
|
||||||
|
channel.close();
|
||||||
}
|
}
|
||||||
codePointsOut.limit(codePointsOut.position());
|
|
||||||
codePointsOut.flip();
|
|
||||||
return new CodePointCharStream(codePointsOut, sourceName);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -381,7 +381,7 @@ public class TestATNLexerInterpreter extends BaseJavaToolTest {
|
||||||
|
|
||||||
protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
|
protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
|
||||||
ATN atn = createATN(lg, true);
|
ATN atn = createATN(lg, true);
|
||||||
CharStream input = CharStreams.createWithString(inputString);
|
CharStream input = CharStreams.fromString(inputString);
|
||||||
ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE");
|
ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE");
|
||||||
DOTGenerator dot = new DOTGenerator(lg);
|
DOTGenerator dot = new DOTGenerator(lg);
|
||||||
// System.out.println(dot.getDOT(startState, true));
|
// System.out.println(dot.getDOT(startState, true));
|
||||||
|
|
|
@ -161,7 +161,7 @@ public class TestUnicodeGrammar extends BaseJavaToolTest {
|
||||||
String inputText) throws Exception {
|
String inputText) throws Exception {
|
||||||
Grammar grammar = new Grammar(grammarText);
|
Grammar grammar = new Grammar(grammarText);
|
||||||
LexerInterpreter lexEngine = grammar.createLexerInterpreter(
|
LexerInterpreter lexEngine = grammar.createLexerInterpreter(
|
||||||
CharStreams.createWithString(inputText));
|
CharStreams.fromString(inputText));
|
||||||
CommonTokenStream tokens = new CommonTokenStream(lexEngine);
|
CommonTokenStream tokens = new CommonTokenStream(lexEngine);
|
||||||
GrammarParserInterpreter parser = grammar.createGrammarParserInterpreter(tokens);
|
GrammarParserInterpreter parser = grammar.createGrammarParserInterpreter(tokens);
|
||||||
ParseTree parseTree = parser.parse(grammar.rules.get(rootRule).index);
|
ParseTree parseTree = parser.parse(grammar.rules.get(rootRule).index);
|
||||||
|
|
|
@ -26,7 +26,6 @@ import java.lang.reflect.Constructor;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -157,28 +156,12 @@ public class TestRig {
|
||||||
|
|
||||||
Charset charset = ( encoding == null ? Charset.defaultCharset () : Charset.forName(encoding) );
|
Charset charset = ( encoding == null ? Charset.defaultCharset () : Charset.forName(encoding) );
|
||||||
if ( inputFiles.size()==0 ) {
|
if ( inputFiles.size()==0 ) {
|
||||||
CharStream charStream;
|
CharStream charStream = CharStreams.fromStream(System.in, charset);
|
||||||
if ( charset.equals(StandardCharsets.UTF_8)) {
|
|
||||||
charStream = CharStreams.createWithUTF8Stream(System.in);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
try ( InputStreamReader r = new InputStreamReader(System.in, charset) ) {
|
|
||||||
charStream = new ANTLRInputStream(r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
process(lexer, parserClass, parser, charStream);
|
process(lexer, parserClass, parser, charStream);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (String inputFile : inputFiles) {
|
for (String inputFile : inputFiles) {
|
||||||
CharStream charStream;
|
CharStream charStream = CharStreams.fromPath(Paths.get(inputFile), charset);
|
||||||
if ( charset.equals(StandardCharsets.UTF_8) ) {
|
|
||||||
charStream = CharStreams.createWithUTF8(Paths.get(inputFile));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
try ( InputStreamReader r = new InputStreamReader(System.in, charset) ) {
|
|
||||||
charStream = new ANTLRInputStream(r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( inputFiles.size()>1 ) {
|
if ( inputFiles.size()>1 ) {
|
||||||
System.err.println(inputFile);
|
System.err.println(inputFile);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue