forked from jasder/antlr
Tidy comments and add tests to ensure UTF-16 and UTF-32 support Unicode code points > U+FFF
This commit is contained in:
parent
9764ff7c50
commit
200379802a
|
@ -14,6 +14,7 @@ import java.io.Reader;
|
||||||
|
|
||||||
import java.nio.channels.SeekableByteChannel;
|
import java.nio.channels.SeekableByteChannel;
|
||||||
import java.nio.charset.CharacterCodingException;
|
import java.nio.charset.CharacterCodingException;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.nio.charset.CodingErrorAction;
|
import java.nio.charset.CodingErrorAction;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
@ -207,4 +208,28 @@ public class TestCharStreams {
|
||||||
assertEquals("hello \uD83C\uDF0E", s.toString());
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void fromSMPUTF16LEPathSMPHasExpectedSize() throws Exception {
|
||||||
|
Path p = folder.newFile().toPath();
|
||||||
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(StandardCharsets.UTF_16LE));
|
||||||
|
CharStream s = CharStreams.fromPath(p, StandardCharsets.UTF_16LE);
|
||||||
|
assertEquals(7, s.size());
|
||||||
|
assertEquals(0, s.index());
|
||||||
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
|
assertEquals(p.toString(), s.getSourceName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void fromSMPUTF32LEPathSMPHasExpectedSize() throws Exception {
|
||||||
|
Path p = folder.newFile().toPath();
|
||||||
|
// UTF-32 isn't popular enough to have an entry in StandardCharsets.
|
||||||
|
Charset c = Charset.forName("UTF-32LE");
|
||||||
|
Files.write(p, "hello \uD83C\uDF0E".getBytes(c));
|
||||||
|
CharStream s = CharStreams.fromPath(p, c);
|
||||||
|
assertEquals(7, s.size());
|
||||||
|
assertEquals(0, s.index());
|
||||||
|
assertEquals("hello \uD83C\uDF0E", s.toString());
|
||||||
|
assertEquals(p.toString(), s.getSourceName());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,11 +50,6 @@ public final class CharStreams {
|
||||||
* charset of the bytes contained in the file.
|
* charset of the bytes contained in the file.
|
||||||
*
|
*
|
||||||
* Reads the entire contents of the file into the result before returning.
|
* Reads the entire contents of the file into the result before returning.
|
||||||
*
|
|
||||||
* For sources encoded in UTF-8, supports the full Unicode code point
|
|
||||||
* range.
|
|
||||||
*
|
|
||||||
* For other sources, only supports Unicode code points up to U+FFFF.
|
|
||||||
*/
|
*/
|
||||||
public static CharStream fromPath(Path path, Charset charset) throws IOException {
|
public static CharStream fromPath(Path path, Charset charset) throws IOException {
|
||||||
long size = Files.size(path);
|
long size = Files.size(path);
|
||||||
|
@ -85,11 +80,6 @@ public final class CharStreams {
|
||||||
* contained in the file.
|
* contained in the file.
|
||||||
*
|
*
|
||||||
* Reads the entire contents of the file into the result before returning.
|
* Reads the entire contents of the file into the result before returning.
|
||||||
*
|
|
||||||
* For sources encoded in UTF-8, supports the full Unicode code point
|
|
||||||
* range.
|
|
||||||
*
|
|
||||||
* For other sources, only supports Unicode code points up to U+FFFF.
|
|
||||||
*/
|
*/
|
||||||
public static CharStream fromFileName(String fileName, Charset charset) throws IOException {
|
public static CharStream fromFileName(String fileName, Charset charset) throws IOException {
|
||||||
return fromPath(Paths.get(fileName), charset);
|
return fromPath(Paths.get(fileName), charset);
|
||||||
|
@ -113,11 +103,6 @@ public final class CharStreams {
|
||||||
*
|
*
|
||||||
* Reads the entire contents of the {@code InputStream} into
|
* Reads the entire contents of the {@code InputStream} into
|
||||||
* the result before returning, then closes the {@code InputStream}.
|
* the result before returning, then closes the {@code InputStream}.
|
||||||
*
|
|
||||||
* For sources encoded in UTF-8, supports the full Unicode code point
|
|
||||||
* range.
|
|
||||||
*
|
|
||||||
* For other sources, only supports Unicode code points up to U+FFFF.
|
|
||||||
*/
|
*/
|
||||||
public static CharStream fromStream(InputStream is, Charset charset) throws IOException {
|
public static CharStream fromStream(InputStream is, Charset charset) throws IOException {
|
||||||
return fromStream(is, charset, -1);
|
return fromStream(is, charset, -1);
|
||||||
|
@ -152,11 +137,6 @@ public final class CharStreams {
|
||||||
*
|
*
|
||||||
* Reads the entire contents of the {@code channel} into
|
* Reads the entire contents of the {@code channel} into
|
||||||
* the result before returning, then closes the {@code channel}.
|
* the result before returning, then closes the {@code channel}.
|
||||||
*
|
|
||||||
* For sources encoded in UTF-8, supports the full Unicode code point
|
|
||||||
* range.
|
|
||||||
*
|
|
||||||
* For other sources, only supports Unicode code points up to U+FFFF.
|
|
||||||
*/
|
*/
|
||||||
public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException {
|
public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException {
|
||||||
return fromChannel(
|
return fromChannel(
|
||||||
|
|
Loading…
Reference in New Issue