Support Unicode escape sequences in JavaUnicodeInputStream with indirection (multiple 'u' characters) (fixes #287)

This commit is contained in:
Sam Harwell 2013-06-24 11:44:57 -05:00
parent e575dc40e3
commit 82330a8a78
1 changed files with 21 additions and 10 deletions

View File

@ -44,6 +44,7 @@ public class JavaUnicodeInputStream implements CharStream {
private final CharStream source;
private final IntegerList escapeIndexes = new IntegerList();
private final IntegerList escapeCharacters = new IntegerList();
private final IntegerList escapeIndirectionLevels = new IntegerList();
private int escapeListIndex;
private int range;
@ -98,7 +99,8 @@ public class JavaUnicodeInputStream implements CharStream {
slashCount++;
}
else {
for (int i = 0; i < 6; i++) {
int indirectionLevel = escapeIndirectionLevels.get(escapeListIndex);
for (int i = 0; i < 6 + indirectionLevel; i++) {
source.consume();
}
@ -119,8 +121,8 @@ public class JavaUnicodeInputStream implements CharStream {
if (i <= 0) {
int desiredIndex = index() + i;
for (int j = escapeListIndex - 1; j >= 0; j--) {
if (escapeIndexes.get(j) + 6 > desiredIndex) {
desiredIndex -= 5;
if (escapeIndexes.get(j) + 6 + escapeIndirectionLevels.get(j) > desiredIndex) {
desiredIndex -= 5 + escapeIndirectionLevels.get(j);
}
if (escapeIndexes.get(j) == desiredIndex) {
@ -137,7 +139,7 @@ public class JavaUnicodeInputStream implements CharStream {
return escapeCharacters.get(j);
}
else if (escapeIndexes.get(j) < desiredIndex) {
desiredIndex += 5;
desiredIndex += 5 + escapeIndirectionLevels.get(j);
}
else {
return source.LA(desiredIndex - index() + 1);
@ -146,13 +148,15 @@ public class JavaUnicodeInputStream implements CharStream {
int[] currentIndex = { index() };
int[] slashCountPtr = { slashCount };
int[] indirectionLevelPtr = { 0 };
for (int j = 0; j < i; j++) {
int previousIndex = currentIndex[0];
int c = readCharAt(currentIndex, slashCountPtr);
int c = readCharAt(currentIndex, slashCountPtr, indirectionLevelPtr);
if (currentIndex[0] > range) {
if (currentIndex[0] - previousIndex > 1) {
escapeIndexes.add(previousIndex);
escapeCharacters.add(c);
escapeIndirectionLevels.add(indirectionLevelPtr[0]);
}
range = currentIndex[0];
@ -219,9 +223,10 @@ public class JavaUnicodeInputStream implements CharStream {
throw new IllegalArgumentException("c");
}
private int readCharAt(int[] nextIndexPtr, int[] slashCountPtr) {
private int readCharAt(int[] nextIndexPtr, int[] slashCountPtr, int[] indirectionLevelPtr) {
assert nextIndexPtr != null && nextIndexPtr.length == 1;
assert slashCountPtr != null && slashCountPtr.length == 1;
assert indirectionLevelPtr != null && indirectionLevelPtr.length == 1;
boolean blockUnicodeEscape = (slashCountPtr[0] % 2) != 0;
@ -233,16 +238,22 @@ public class JavaUnicodeInputStream implements CharStream {
int c1 = source.LA(nextIndexPtr[0] - index() + 2);
if (c1 == 'u') {
int c2 = source.LA(nextIndexPtr[0] - index() + 3);
int c3 = source.LA(nextIndexPtr[0] - index() + 4);
int c4 = source.LA(nextIndexPtr[0] - index() + 5);
int c5 = source.LA(nextIndexPtr[0] - index() + 6);
indirectionLevelPtr[0] = 0;
while (c2 == 'u') {
indirectionLevelPtr[0]++;
c2 = source.LA(nextIndexPtr[0] - index() + 3 + indirectionLevelPtr[0]);
}
int c3 = source.LA(nextIndexPtr[0] - index() + 4 + indirectionLevelPtr[0]);
int c4 = source.LA(nextIndexPtr[0] - index() + 5 + indirectionLevelPtr[0]);
int c5 = source.LA(nextIndexPtr[0] - index() + 6 + indirectionLevelPtr[0]);
if (isHexDigit(c2) && isHexDigit(c3) && isHexDigit(c4) && isHexDigit(c5)) {
int value = hexValue(c2);
value = (value << 4) + hexValue(c3);
value = (value << 4) + hexValue(c4);
value = (value << 4) + hexValue(c5);
nextIndexPtr[0] += 6;
nextIndexPtr[0] += 6 + indirectionLevelPtr[0];
slashCountPtr[0] = 0;
return value;
}