From 200379802aa69d9ec4d1c42377a25cea35f2fe6c Mon Sep 17 00:00:00 2001 From: Ben Hamilton Date: Wed, 29 Mar 2017 10:06:08 -0700 Subject: [PATCH] Tidy comments and add tests to ensure UTF-16 and UTF-32 support Unicode code points > U+FFF --- .../v4/test/runtime/java/TestCharStreams.java | 25 +++++++++++++++++++ .../src/org/antlr/v4/runtime/CharStreams.java | 20 --------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java index 67cf5e636..5a17579f5 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java @@ -14,6 +14,7 @@ import java.io.Reader; import java.nio.channels.SeekableByteChannel; import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -207,4 +208,28 @@ public class TestCharStreams { assertEquals("hello \uD83C\uDF0E", s.toString()); } } + + @Test + public void fromSMPUTF16LEPathSMPHasExpectedSize() throws Exception { + Path p = folder.newFile().toPath(); + Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_16LE)); + CharStream s = CharStreams.fromPath(p, StandardCharsets.UTF_16LE); + assertEquals(7, s.size()); + assertEquals(0, s.index()); + assertEquals("hello \uD83C\uDF0E", s.toString()); + assertEquals(p.toString(), s.getSourceName()); + } + + @Test + public void fromSMPUTF32LEPathSMPHasExpectedSize() throws Exception { + Path p = folder.newFile().toPath(); + // UTF-32 isn't popular enough to have an entry in StandardCharsets. + Charset c = Charset.forName("UTF-32LE"); + Files.write(p, "hello \uD83C\uDF0E".getBytes(c)); + CharStream s = CharStreams.fromPath(p, c); + assertEquals(7, s.size()); + assertEquals(0, s.index()); + assertEquals("hello \uD83C\uDF0E", s.toString()); + assertEquals(p.toString(), s.getSourceName()); + } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java b/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java index 05908ce34..b10731cac 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java +++ b/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java @@ -50,11 +50,6 @@ public final class CharStreams { * charset of the bytes contained in the file. * * Reads the entire contents of the file into the result before returning. - * - * For sources encoded in UTF-8, supports the full Unicode code point - * range. - * - * For other sources, only supports Unicode code points up to U+FFFF. */ public static CharStream fromPath(Path path, Charset charset) throws IOException { long size = Files.size(path); @@ -85,11 +80,6 @@ public final class CharStreams { * contained in the file. * * Reads the entire contents of the file into the result before returning. - * - * For sources encoded in UTF-8, supports the full Unicode code point - * range. - * - * For other sources, only supports Unicode code points up to U+FFFF. */ public static CharStream fromFileName(String fileName, Charset charset) throws IOException { return fromPath(Paths.get(fileName), charset); @@ -113,11 +103,6 @@ public final class CharStreams { * * Reads the entire contents of the {@code InputStream} into * the result before returning, then closes the {@code InputStream}. - * - * For sources encoded in UTF-8, supports the full Unicode code point - * range. - * - * For other sources, only supports Unicode code points up to U+FFFF. */ public static CharStream fromStream(InputStream is, Charset charset) throws IOException { return fromStream(is, charset, -1); @@ -152,11 +137,6 @@ public final class CharStreams { * * Reads the entire contents of the {@code channel} into * the result before returning, then closes the {@code channel}. - * - * For sources encoded in UTF-8, supports the full Unicode code point - * range. - * - * For other sources, only supports Unicode code points up to U+FFFF. */ public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException { return fromChannel(