Support Unicode escape sequences in JavaUnicodeInputStream with indirection (multiple 'u' characters) (fixes #287)
This commit is contained in:
parent
e575dc40e3
commit
82330a8a78
|
@ -44,6 +44,7 @@ public class JavaUnicodeInputStream implements CharStream {
|
|||
private final CharStream source;
|
||||
private final IntegerList escapeIndexes = new IntegerList();
|
||||
private final IntegerList escapeCharacters = new IntegerList();
|
||||
private final IntegerList escapeIndirectionLevels = new IntegerList();
|
||||
|
||||
private int escapeListIndex;
|
||||
private int range;
|
||||
|
@ -98,7 +99,8 @@ public class JavaUnicodeInputStream implements CharStream {
|
|||
slashCount++;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < 6; i++) {
|
||||
int indirectionLevel = escapeIndirectionLevels.get(escapeListIndex);
|
||||
for (int i = 0; i < 6 + indirectionLevel; i++) {
|
||||
source.consume();
|
||||
}
|
||||
|
||||
|
@ -119,8 +121,8 @@ public class JavaUnicodeInputStream implements CharStream {
|
|||
if (i <= 0) {
|
||||
int desiredIndex = index() + i;
|
||||
for (int j = escapeListIndex - 1; j >= 0; j--) {
|
||||
if (escapeIndexes.get(j) + 6 > desiredIndex) {
|
||||
desiredIndex -= 5;
|
||||
if (escapeIndexes.get(j) + 6 + escapeIndirectionLevels.get(j) > desiredIndex) {
|
||||
desiredIndex -= 5 + escapeIndirectionLevels.get(j);
|
||||
}
|
||||
|
||||
if (escapeIndexes.get(j) == desiredIndex) {
|
||||
|
@ -137,7 +139,7 @@ public class JavaUnicodeInputStream implements CharStream {
|
|||
return escapeCharacters.get(j);
|
||||
}
|
||||
else if (escapeIndexes.get(j) < desiredIndex) {
|
||||
desiredIndex += 5;
|
||||
desiredIndex += 5 + escapeIndirectionLevels.get(j);
|
||||
}
|
||||
else {
|
||||
return source.LA(desiredIndex - index() + 1);
|
||||
|
@ -146,13 +148,15 @@ public class JavaUnicodeInputStream implements CharStream {
|
|||
|
||||
int[] currentIndex = { index() };
|
||||
int[] slashCountPtr = { slashCount };
|
||||
int[] indirectionLevelPtr = { 0 };
|
||||
for (int j = 0; j < i; j++) {
|
||||
int previousIndex = currentIndex[0];
|
||||
int c = readCharAt(currentIndex, slashCountPtr);
|
||||
int c = readCharAt(currentIndex, slashCountPtr, indirectionLevelPtr);
|
||||
if (currentIndex[0] > range) {
|
||||
if (currentIndex[0] - previousIndex > 1) {
|
||||
escapeIndexes.add(previousIndex);
|
||||
escapeCharacters.add(c);
|
||||
escapeIndirectionLevels.add(indirectionLevelPtr[0]);
|
||||
}
|
||||
|
||||
range = currentIndex[0];
|
||||
|
@ -219,9 +223,10 @@ public class JavaUnicodeInputStream implements CharStream {
|
|||
throw new IllegalArgumentException("c");
|
||||
}
|
||||
|
||||
private int readCharAt(int[] nextIndexPtr, int[] slashCountPtr) {
|
||||
private int readCharAt(int[] nextIndexPtr, int[] slashCountPtr, int[] indirectionLevelPtr) {
|
||||
assert nextIndexPtr != null && nextIndexPtr.length == 1;
|
||||
assert slashCountPtr != null && slashCountPtr.length == 1;
|
||||
assert indirectionLevelPtr != null && indirectionLevelPtr.length == 1;
|
||||
|
||||
boolean blockUnicodeEscape = (slashCountPtr[0] % 2) != 0;
|
||||
|
||||
|
@ -233,16 +238,22 @@ public class JavaUnicodeInputStream implements CharStream {
|
|||
int c1 = source.LA(nextIndexPtr[0] - index() + 2);
|
||||
if (c1 == 'u') {
|
||||
int c2 = source.LA(nextIndexPtr[0] - index() + 3);
|
||||
int c3 = source.LA(nextIndexPtr[0] - index() + 4);
|
||||
int c4 = source.LA(nextIndexPtr[0] - index() + 5);
|
||||
int c5 = source.LA(nextIndexPtr[0] - index() + 6);
|
||||
indirectionLevelPtr[0] = 0;
|
||||
while (c2 == 'u') {
|
||||
indirectionLevelPtr[0]++;
|
||||
c2 = source.LA(nextIndexPtr[0] - index() + 3 + indirectionLevelPtr[0]);
|
||||
}
|
||||
|
||||
int c3 = source.LA(nextIndexPtr[0] - index() + 4 + indirectionLevelPtr[0]);
|
||||
int c4 = source.LA(nextIndexPtr[0] - index() + 5 + indirectionLevelPtr[0]);
|
||||
int c5 = source.LA(nextIndexPtr[0] - index() + 6 + indirectionLevelPtr[0]);
|
||||
if (isHexDigit(c2) && isHexDigit(c3) && isHexDigit(c4) && isHexDigit(c5)) {
|
||||
int value = hexValue(c2);
|
||||
value = (value << 4) + hexValue(c3);
|
||||
value = (value << 4) + hexValue(c4);
|
||||
value = (value << 4) + hexValue(c5);
|
||||
|
||||
nextIndexPtr[0] += 6;
|
||||
nextIndexPtr[0] += 6 + indirectionLevelPtr[0];
|
||||
slashCountPtr[0] = 0;
|
||||
return value;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue