Change UnbufferedCharStream to use 32-bit Unicode code points and 32-bit buffer

This commit is contained in:
Ben Hamilton 2017-03-29 12:41:20 -07:00
parent 1f0db00cd0
commit 8108b342ad
2 changed files with 69 additions and 9 deletions

View File

@ -25,7 +25,7 @@ public class UnbufferedCharStream implements CharStream {
* we keep adding to buffer. Otherwise, {@link #consume consume()} resets so
* we start filling at index 0 again.
*/
protected char[] data;
protected int[] data;
/**
* The number of characters currently in {@link #data data}.
@ -82,7 +82,7 @@ public class UnbufferedCharStream implements CharStream {
/** Useful for subclasses that pull char from other than this.input. */
public UnbufferedCharStream(int bufferSize) {
n = 0;
data = new char[bufferSize];
data = new int[bufferSize];
}
public UnbufferedCharStream(InputStream input) {
@ -145,13 +145,36 @@ public class UnbufferedCharStream implements CharStream {
*/
protected int fill(int n) {
for (int i=0; i<n; i++) {
if (this.n > 0 && data[this.n - 1] == (char)IntStream.EOF) {
if (this.n > 0 && data[this.n - 1] == IntStream.EOF) {
return i;
}
try {
int c = nextChar();
add(c);
if (c > Character.MAX_VALUE || c == IntStream.EOF) {
add(c);
} else {
char ch = (char) c;
if (Character.isLowSurrogate(ch)) {
throw new RuntimeException("Invalid UTF-16 (low surrogate with no preceding high surrogate)");
} else if (Character.isHighSurrogate(ch)) {
int lowSurrogate = nextChar();
if (lowSurrogate > Character.MAX_VALUE) {
throw new RuntimeException("Invalid UTF-16 (high surrogate followed by code point > U+FFFF");
} else if (lowSurrogate == IntStream.EOF) {
throw new RuntimeException("Invalid UTF-16 (dangling high surrogate at end of file)");
} else {
char lowSurrogateChar = (char) lowSurrogate;
if (Character.isLowSurrogate(lowSurrogateChar)) {
add(Character.toCodePoint(ch, lowSurrogateChar));
} else {
throw new RuntimeException("Invalid UTF-16 (dangling high surrogate");
}
}
} else {
add(c);
}
}
}
catch (IOException ioe) {
throw new RuntimeException(ioe);
@ -173,7 +196,7 @@ public class UnbufferedCharStream implements CharStream {
if ( n>=data.length ) {
data = Arrays.copyOf(data, data.length * 2);
}
data[n++] = (char)c;
data[n++] = c;
}
@Override
@ -183,8 +206,8 @@ public class UnbufferedCharStream implements CharStream {
int index = p + i - 1;
if ( index < 0 ) throw new IndexOutOfBoundsException();
if ( index >= n ) return IntStream.EOF;
char c = data[index];
if ( c==(char)IntStream.EOF ) return IntStream.EOF;
int c = data[index];
if ( c==IntStream.EOF ) return IntStream.EOF;
return c;
}

View File

@ -313,6 +313,30 @@ public class TestUnbufferedCharStream extends BaseJavaToolTest {
assertEquals(expecting, tokens.getTokens().toString());
}
@Test public void testUnicodeSMP() throws Exception {
TestingUnbufferedCharStream input = createStream("\uD83C\uDF0E");
assertEquals(0x1F30E, input.LA(1));
assertEquals("\uD83C\uDF0E", input.getBuffer());
input.consume();
assertEquals(IntStream.EOF, input.LA(1));
assertEquals("\uFFFF", input.getBuffer());
}
@Test(expected = RuntimeException.class)
public void testDanglingHighSurrogateAtEOFThrows() throws Exception {
createStream("\uD83C");
}
@Test(expected = RuntimeException.class)
public void testDanglingHighSurrogateThrows() throws Exception {
createStream("\uD83C\u0123");
}
@Test(expected = RuntimeException.class)
public void testDanglingLowSurrogateThrows() throws Exception {
createStream("\uDF0E");
}
protected static TestingUnbufferedCharStream createStream(String text) {
return new TestingUnbufferedCharStream(new StringReader(text));
}
@ -336,7 +360,13 @@ public class TestUnbufferedCharStream extends BaseJavaToolTest {
*/
public String getRemainingBuffer() {
if ( n==0 ) return "";
return new String(data,p,n-p);
int len = n;
if (data[len-1] == IntStream.EOF) {
// Don't pass -1 to new String().
return new String(data,p,len-p-1) + "\uFFFF";
} else {
return new String(data,p,len-p);
}
}
/** For testing. What's in moving window buffer into data stream.
@ -344,7 +374,14 @@ public class TestUnbufferedCharStream extends BaseJavaToolTest {
*/
public String getBuffer() {
if ( n==0 ) return "";
return new String(data,0,n);
int len = n;
// Don't pass -1 to new String().
if (data[len-1] == IntStream.EOF) {
// Don't pass -1 to new String().
return new String(data,0,len-1) + "\uFFFF";
} else {
return new String(data,0,len);
}
}
}