Tidy comments and add tests to ensure UTF-16 and UTF-32 support Unicode code points > U+FFF

2017-03-29 10:06:08 -07:00 · 2017-03-29 10:06:08 -07:00 · 200379802a
parent 9764ff7c50
commit 200379802a
2 changed files with 25 additions and 20 deletions
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java
@ -14,6 +14,7 @@ import java.io.Reader;

 import java.nio.channels.SeekableByteChannel;
 import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
@ -207,4 +208,28 @@ public class TestCharStreams {
 			assertEquals("hello \uD83C\uDF0E", s.toString());
 		}
 	}
+
+	@Test
+	public void fromSMPUTF16LEPathSMPHasExpectedSize() throws Exception {
+		Path p = folder.newFile().toPath();
+		Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_16LE));
+		CharStream s = CharStreams.fromPath(p, StandardCharsets.UTF_16LE);
+		assertEquals(7, s.size());
+		assertEquals(0, s.index());
+		assertEquals("hello \uD83C\uDF0E", s.toString());
+		assertEquals(p.toString(), s.getSourceName());
+	}
+
+	@Test
+	public void fromSMPUTF32LEPathSMPHasExpectedSize() throws Exception {
+		Path p = folder.newFile().toPath();
+		// UTF-32 isn't popular enough to have an entry in StandardCharsets.
+		Charset c = Charset.forName("UTF-32LE");
+		Files.write(p, "hello \uD83C\uDF0E".getBytes(c));
+		CharStream s = CharStreams.fromPath(p, c);
+		assertEquals(7, s.size());
+		assertEquals(0, s.index());
+		assertEquals("hello \uD83C\uDF0E", s.toString());
+		assertEquals(p.toString(), s.getSourceName());
+	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java
@ -50,11 +50,6 @@ public final class CharStreams {
 	 * charset of the bytes contained in the file.
 	 *
 	 * Reads the entire contents of the file into the result before returning.
-	 *
-	 * For sources encoded in UTF-8, supports the full Unicode code point
-	 * range.
-	 *
-	 * For other sources, only supports Unicode code points up to U+FFFF.
 	 */
 	public static CharStream fromPath(Path path, Charset charset) throws IOException {
 		long size = Files.size(path);
@ -85,11 +80,6 @@ public final class CharStreams {
 	 * contained in the file.
 	 *
 	 * Reads the entire contents of the file into the result before returning.
-	 *
-	 * For sources encoded in UTF-8, supports the full Unicode code point
-	 * range.
-	 *
-	 * For other sources, only supports Unicode code points up to U+FFFF.
 	 */
 	public static CharStream fromFileName(String fileName, Charset charset) throws IOException {
 		return fromPath(Paths.get(fileName), charset);
@ -113,11 +103,6 @@ public final class CharStreams {
 	 *
 	 * Reads the entire contents of the {@code InputStream} into
 	 * the result before returning, then closes the {@code InputStream}.
-	 *
-	 * For sources encoded in UTF-8, supports the full Unicode code point
-	 * range.
-	 *
-	 * For other sources, only supports Unicode code points up to U+FFFF.
 	 */
 	public static CharStream fromStream(InputStream is, Charset charset) throws IOException {
 		return fromStream(is, charset, -1);
@ -152,11 +137,6 @@ public final class CharStreams {
 	 *
 	 * Reads the entire contents of the {@code channel} into
 	 * the result before returning, then closes the {@code channel}.
-	 *
-	 * For sources encoded in UTF-8, supports the full Unicode code point
-	 * range.
-	 *
-	 * For other sources, only supports Unicode code points up to U+FFFF.
 	 */
 	public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException {
 		return fromChannel(