Tidy comments and add tests to ensure UTF-16 and UTF-32 support Unicode code points > U+FFF

This commit is contained in:
Ben Hamilton 2017-03-29 10:06:08 -07:00
parent 9764ff7c50
commit 200379802a
2 changed files with 25 additions and 20 deletions

View File

@ -14,6 +14,7 @@ import java.io.Reader;
import java.nio.channels.SeekableByteChannel; import java.nio.channels.SeekableByteChannel;
import java.nio.charset.CharacterCodingException; import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
@ -207,4 +208,28 @@ public class TestCharStreams {
assertEquals("hello \uD83C\uDF0E", s.toString()); assertEquals("hello \uD83C\uDF0E", s.toString());
} }
} }
@Test
public void fromSMPUTF16LEPathSMPHasExpectedSize() throws Exception {
Path p = folder.newFile().toPath();
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_16LE));
CharStream s = CharStreams.fromPath(p, StandardCharsets.UTF_16LE);
assertEquals(7, s.size());
assertEquals(0, s.index());
assertEquals("hello \uD83C\uDF0E", s.toString());
assertEquals(p.toString(), s.getSourceName());
}
@Test
public void fromSMPUTF32LEPathSMPHasExpectedSize() throws Exception {
Path p = folder.newFile().toPath();
// UTF-32 isn't popular enough to have an entry in StandardCharsets.
Charset c = Charset.forName("UTF-32LE");
Files.write(p, "hello \uD83C\uDF0E".getBytes(c));
CharStream s = CharStreams.fromPath(p, c);
assertEquals(7, s.size());
assertEquals(0, s.index());
assertEquals("hello \uD83C\uDF0E", s.toString());
assertEquals(p.toString(), s.getSourceName());
}
} }

View File

@ -50,11 +50,6 @@ public final class CharStreams {
* charset of the bytes contained in the file. * charset of the bytes contained in the file.
* *
* Reads the entire contents of the file into the result before returning. * Reads the entire contents of the file into the result before returning.
*
* For sources encoded in UTF-8, supports the full Unicode code point
* range.
*
* For other sources, only supports Unicode code points up to U+FFFF.
*/ */
public static CharStream fromPath(Path path, Charset charset) throws IOException { public static CharStream fromPath(Path path, Charset charset) throws IOException {
long size = Files.size(path); long size = Files.size(path);
@ -85,11 +80,6 @@ public final class CharStreams {
* contained in the file. * contained in the file.
* *
* Reads the entire contents of the file into the result before returning. * Reads the entire contents of the file into the result before returning.
*
* For sources encoded in UTF-8, supports the full Unicode code point
* range.
*
* For other sources, only supports Unicode code points up to U+FFFF.
*/ */
public static CharStream fromFileName(String fileName, Charset charset) throws IOException { public static CharStream fromFileName(String fileName, Charset charset) throws IOException {
return fromPath(Paths.get(fileName), charset); return fromPath(Paths.get(fileName), charset);
@ -113,11 +103,6 @@ public final class CharStreams {
* *
* Reads the entire contents of the {@code InputStream} into * Reads the entire contents of the {@code InputStream} into
* the result before returning, then closes the {@code InputStream}. * the result before returning, then closes the {@code InputStream}.
*
* For sources encoded in UTF-8, supports the full Unicode code point
* range.
*
* For other sources, only supports Unicode code points up to U+FFFF.
*/ */
public static CharStream fromStream(InputStream is, Charset charset) throws IOException { public static CharStream fromStream(InputStream is, Charset charset) throws IOException {
return fromStream(is, charset, -1); return fromStream(is, charset, -1);
@ -152,11 +137,6 @@ public final class CharStreams {
* *
* Reads the entire contents of the {@code channel} into * Reads the entire contents of the {@code channel} into
* the result before returning, then closes the {@code channel}. * the result before returning, then closes the {@code channel}.
*
* For sources encoded in UTF-8, supports the full Unicode code point
* range.
*
* For other sources, only supports Unicode code points up to U+FFFF.
*/ */
public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException { public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException {
return fromChannel( return fromChannel(