Fixes #1815. Add info on what the invalid escape is. Match \x for any x but give error. This prevents \x from appearing like a sequence of 2 char. Updated unit tests.
This commit is contained in:
parent
8af0080103
commit
80aa7907a4
|
@ -8,7 +8,6 @@ package org.antlr.v4.test.tool;
|
||||||
|
|
||||||
import org.antlr.v4.misc.EscapeSequenceParsing;
|
import org.antlr.v4.misc.EscapeSequenceParsing;
|
||||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import static org.antlr.v4.misc.EscapeSequenceParsing.Result;
|
import static org.antlr.v4.misc.EscapeSequenceParsing.Result;
|
||||||
|
@ -18,113 +17,113 @@ public class TestEscapeSequenceParsing {
|
||||||
@Test
|
@Test
|
||||||
public void testParseEmpty() {
|
public void testParseEmpty() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("", 0));
|
EscapeSequenceParsing.parseEscape("", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseJustBackslash() {
|
public void testParseJustBackslash() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\", 0));
|
EscapeSequenceParsing.parseEscape("\\", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseInvalidEscape() {
|
public void testParseInvalidEscape() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\z", 0));
|
EscapeSequenceParsing.parseEscape("\\z", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseNewline() {
|
public void testParseNewline() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
new Result(Result.Type.CODE_POINT, '\n', IntervalSet.EMPTY_SET, 2),
|
new Result(Result.Type.CODE_POINT, '\n', IntervalSet.EMPTY_SET, 0,2),
|
||||||
EscapeSequenceParsing.parseEscape("\\n", 0));
|
EscapeSequenceParsing.parseEscape("\\n", 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseTab() {
|
public void testParseTab() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
new Result(Result.Type.CODE_POINT, '\t', IntervalSet.EMPTY_SET, 2),
|
new Result(Result.Type.CODE_POINT, '\t', IntervalSet.EMPTY_SET, 0,2),
|
||||||
EscapeSequenceParsing.parseEscape("\\t", 0));
|
EscapeSequenceParsing.parseEscape("\\t", 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodeTooShort() {
|
public void testParseUnicodeTooShort() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\uABC", 0));
|
EscapeSequenceParsing.parseEscape("\\uABC", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodeBMP() {
|
public void testParseUnicodeBMP() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
new Result(Result.Type.CODE_POINT, 0xABCD, IntervalSet.EMPTY_SET, 6),
|
new Result(Result.Type.CODE_POINT, 0xABCD, IntervalSet.EMPTY_SET, 0,6),
|
||||||
EscapeSequenceParsing.parseEscape("\\uABCD", 0));
|
EscapeSequenceParsing.parseEscape("\\uABCD", 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodeSMPTooShort() {
|
public void testParseUnicodeSMPTooShort() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\u{}", 0));
|
EscapeSequenceParsing.parseEscape("\\u{}", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodeSMPMissingCloseBrace() {
|
public void testParseUnicodeSMPMissingCloseBrace() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\u{12345", 0));
|
EscapeSequenceParsing.parseEscape("\\u{12345", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodeTooBig() {
|
public void testParseUnicodeTooBig() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\u{110000}", 0));
|
EscapeSequenceParsing.parseEscape("\\u{110000}", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodeSMP() {
|
public void testParseUnicodeSMP() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
new Result(Result.Type.CODE_POINT, 0x10ABCD, IntervalSet.EMPTY_SET, 10),
|
new Result(Result.Type.CODE_POINT, 0x10ABCD, IntervalSet.EMPTY_SET, 0,10),
|
||||||
EscapeSequenceParsing.parseEscape("\\u{10ABCD}", 0));
|
EscapeSequenceParsing.parseEscape("\\u{10ABCD}", 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodePropertyTooShort() {
|
public void testParseUnicodePropertyTooShort() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\p{}", 0));
|
EscapeSequenceParsing.parseEscape("\\p{}", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodePropertyMissingCloseBrace() {
|
public void testParseUnicodePropertyMissingCloseBrace() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\p{1234", 0));
|
EscapeSequenceParsing.parseEscape("\\p{1234", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodeProperty() {
|
public void testParseUnicodeProperty() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
new Result(Result.Type.PROPERTY, -1, IntervalSet.of(66560, 66639), 11),
|
new Result(Result.Type.PROPERTY, -1, IntervalSet.of(66560, 66639), 0,11),
|
||||||
EscapeSequenceParsing.parseEscape("\\p{Deseret}", 0));
|
EscapeSequenceParsing.parseEscape("\\p{Deseret}", 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodePropertyInvertedTooShort() {
|
public void testParseUnicodePropertyInvertedTooShort() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\P{}", 0));
|
EscapeSequenceParsing.parseEscape("\\P{}", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseUnicodePropertyInvertedMissingCloseBrace() {
|
public void testParseUnicodePropertyInvertedMissingCloseBrace() {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
EscapeSequenceParsing.Result.INVALID,
|
EscapeSequenceParsing.Result.Type.INVALID,
|
||||||
EscapeSequenceParsing.parseEscape("\\P{Deseret", 0));
|
EscapeSequenceParsing.parseEscape("\\P{Deseret", 0).type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -132,7 +131,7 @@ public class TestEscapeSequenceParsing {
|
||||||
IntervalSet expected = IntervalSet.of(0, 66559);
|
IntervalSet expected = IntervalSet.of(0, 66559);
|
||||||
expected.add(66640, Character.MAX_CODE_POINT);
|
expected.add(66640, Character.MAX_CODE_POINT);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
new Result(Result.Type.PROPERTY, -1, expected, 11),
|
new Result(Result.Type.PROPERTY, -1, expected, 0, 11),
|
||||||
EscapeSequenceParsing.parseEscape("\\P{Deseret}", 0));
|
EscapeSequenceParsing.parseEscape("\\P{Deseret}", 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -462,9 +462,9 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
|
||||||
"lexer grammar A;\n" +
|
"lexer grammar A;\n" +
|
||||||
"RULE : 'Foo \\uAABG \\x \\u';\n";
|
"RULE : 'Foo \\uAABG \\x \\u';\n";
|
||||||
String expected =
|
String expected =
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence\n";
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n";
|
||||||
|
|
||||||
String[] pair = new String[] {
|
String[] pair = new String[] {
|
||||||
grammar,
|
grammar,
|
||||||
|
@ -516,13 +516,13 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
|
||||||
"VALID_CHAR_SET: [`\\-=\\]];";
|
"VALID_CHAR_SET: [`\\-=\\]];";
|
||||||
|
|
||||||
String expected =
|
String expected =
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence \\\"\n" +
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence \\]\n" +
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence \\u24\n" +
|
||||||
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
|
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
|
||||||
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
|
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence \\u24\\u\n" +
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" +
|
||||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
|
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
|
||||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
|
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
|
||||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n";
|
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n";
|
||||||
|
@ -552,14 +552,14 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
|
||||||
"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";
|
"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";
|
||||||
|
|
||||||
String expected =
|
String expected =
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n"+
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n"+
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n"+
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n"+
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n"+
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n"+
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n"+
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n"+
|
||||||
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
|
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
|
||||||
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
|
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
|
||||||
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +
|
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +
|
||||||
|
|
|
@ -349,7 +349,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
chars = CharSupport.getStringFromGrammarStringLiteral(chars);
|
chars = CharSupport.getStringFromGrammarStringLiteral(chars);
|
||||||
if (chars == null) {
|
if (chars == null) {
|
||||||
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
|
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
|
||||||
g.fileName, stringLiteralAST.getToken());
|
g.fileName, stringLiteralAST.getToken(), chars);
|
||||||
return new Handle(left, left);
|
return new Handle(left, left);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -462,8 +462,10 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
EscapeSequenceParsing.parseEscape(chars, i);
|
EscapeSequenceParsing.parseEscape(chars, i);
|
||||||
switch (escapeParseResult.type) {
|
switch (escapeParseResult.type) {
|
||||||
case INVALID:
|
case INVALID:
|
||||||
|
String invalid = chars.substring(escapeParseResult.startOffset,
|
||||||
|
escapeParseResult.startOffset+escapeParseResult.parseLength);
|
||||||
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
|
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
|
||||||
g.fileName, charSetAST.getToken(), charSetAST.getText());
|
g.fileName, charSetAST.getToken(), invalid);
|
||||||
state = CharSetParseState.ERROR;
|
state = CharSetParseState.ERROR;
|
||||||
break;
|
break;
|
||||||
case CODE_POINT:
|
case CODE_POINT:
|
||||||
|
|
|
@ -29,17 +29,17 @@ public abstract class EscapeSequenceParsing {
|
||||||
PROPERTY
|
PROPERTY
|
||||||
};
|
};
|
||||||
|
|
||||||
public static Result INVALID = new Result(Type.INVALID, -1, IntervalSet.EMPTY_SET, -1);
|
|
||||||
|
|
||||||
public final Type type;
|
public final Type type;
|
||||||
public final int codePoint;
|
public final int codePoint;
|
||||||
public final IntervalSet propertyIntervalSet;
|
public final IntervalSet propertyIntervalSet;
|
||||||
|
public final int startOffset;
|
||||||
public final int parseLength;
|
public final int parseLength;
|
||||||
|
|
||||||
public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int parseLength) {
|
public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int startOffset, int parseLength) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.codePoint = codePoint;
|
this.codePoint = codePoint;
|
||||||
this.propertyIntervalSet = propertyIntervalSet;
|
this.propertyIntervalSet = propertyIntervalSet;
|
||||||
|
this.startOffset = startOffset;
|
||||||
this.parseLength = parseLength;
|
this.parseLength = parseLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,12 +78,12 @@ public abstract class EscapeSequenceParsing {
|
||||||
/**
|
/**
|
||||||
* Parses a single escape sequence starting at {@code startOff}.
|
* Parses a single escape sequence starting at {@code startOff}.
|
||||||
*
|
*
|
||||||
* Returns {@link Result#INVALID} if no valid escape sequence was found, a Result otherwise.
|
* Returns a type of INVALID if no valid escape sequence was found, a Result otherwise.
|
||||||
*/
|
*/
|
||||||
public static Result parseEscape(String s, int startOff) {
|
public static Result parseEscape(String s, int startOff) {
|
||||||
int offset = startOff;
|
int offset = startOff;
|
||||||
if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') {
|
if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') {
|
||||||
return Result.INVALID;
|
return invalid(startOff, s.length()-1);
|
||||||
}
|
}
|
||||||
// Move past backslash
|
// Move past backslash
|
||||||
offset++;
|
offset++;
|
||||||
|
@ -93,21 +93,21 @@ public abstract class EscapeSequenceParsing {
|
||||||
if (escaped == 'u') {
|
if (escaped == 'u') {
|
||||||
// \\u{1} is the shortest we support
|
// \\u{1} is the shortest we support
|
||||||
if (offset + 3 > s.length()) {
|
if (offset + 3 > s.length()) {
|
||||||
return Result.INVALID;
|
return invalid(startOff, s.length()-1);
|
||||||
}
|
}
|
||||||
int hexStartOffset;
|
int hexStartOffset;
|
||||||
int hexEndOffset;
|
int hexEndOffset; // appears to be exclusive
|
||||||
if (s.codePointAt(offset) == '{') {
|
if (s.codePointAt(offset) == '{') {
|
||||||
hexStartOffset = offset + 1;
|
hexStartOffset = offset + 1;
|
||||||
hexEndOffset = s.indexOf('}', hexStartOffset);
|
hexEndOffset = s.indexOf('}', hexStartOffset);
|
||||||
if (hexEndOffset == -1) {
|
if (hexEndOffset == -1) {
|
||||||
return Result.INVALID;
|
return invalid(startOff, s.length()-1);
|
||||||
}
|
}
|
||||||
offset = hexEndOffset + 1;
|
offset = hexEndOffset + 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (offset + 4 > s.length()) {
|
if (offset + 4 > s.length()) {
|
||||||
return Result.INVALID;
|
return invalid(startOff, s.length()-1);
|
||||||
}
|
}
|
||||||
hexStartOffset = offset;
|
hexStartOffset = offset;
|
||||||
hexEndOffset = offset + 4;
|
hexEndOffset = offset + 4;
|
||||||
|
@ -115,28 +115,32 @@ public abstract class EscapeSequenceParsing {
|
||||||
}
|
}
|
||||||
int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset);
|
int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset);
|
||||||
if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) {
|
if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) {
|
||||||
return Result.INVALID;
|
return invalid(startOff, startOff+6-1);
|
||||||
}
|
}
|
||||||
return new Result(
|
return new Result(
|
||||||
Result.Type.CODE_POINT,
|
Result.Type.CODE_POINT,
|
||||||
codePointValue,
|
codePointValue,
|
||||||
IntervalSet.EMPTY_SET,
|
IntervalSet.EMPTY_SET,
|
||||||
|
startOff,
|
||||||
offset - startOff);
|
offset - startOff);
|
||||||
}
|
}
|
||||||
else if (escaped == 'p' || escaped == 'P') {
|
else if (escaped == 'p' || escaped == 'P') {
|
||||||
// \p{L} is the shortest we support
|
// \p{L} is the shortest we support
|
||||||
if (offset + 3 > s.length() || s.codePointAt(offset) != '{') {
|
if (offset + 3 > s.length()) {
|
||||||
return Result.INVALID;
|
return invalid(startOff, s.length()-1);
|
||||||
|
}
|
||||||
|
if (s.codePointAt(offset) != '{') {
|
||||||
|
return invalid(startOff, offset);
|
||||||
}
|
}
|
||||||
int openBraceOffset = offset;
|
int openBraceOffset = offset;
|
||||||
int closeBraceOffset = s.indexOf('}', openBraceOffset);
|
int closeBraceOffset = s.indexOf('}', openBraceOffset);
|
||||||
if (closeBraceOffset == -1) {
|
if (closeBraceOffset == -1) {
|
||||||
return Result.INVALID;
|
return invalid(startOff, s.length()-1);
|
||||||
}
|
}
|
||||||
String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset);
|
String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset);
|
||||||
IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName);
|
IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName);
|
||||||
if (propertyIntervalSet == null) {
|
if (propertyIntervalSet == null) {
|
||||||
return Result.INVALID;
|
return invalid(startOff, closeBraceOffset);
|
||||||
}
|
}
|
||||||
offset = closeBraceOffset + 1;
|
offset = closeBraceOffset + 1;
|
||||||
if (escaped == 'P') {
|
if (escaped == 'P') {
|
||||||
|
@ -146,13 +150,14 @@ public abstract class EscapeSequenceParsing {
|
||||||
Result.Type.PROPERTY,
|
Result.Type.PROPERTY,
|
||||||
-1,
|
-1,
|
||||||
propertyIntervalSet,
|
propertyIntervalSet,
|
||||||
|
startOff,
|
||||||
offset - startOff);
|
offset - startOff);
|
||||||
}
|
}
|
||||||
else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
|
else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
|
||||||
int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
|
int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
|
||||||
if (codePoint == 0) {
|
if (codePoint == 0) {
|
||||||
if (escaped != ']' && escaped != '-') { // escape ']' and '-' only in char sets.
|
if (escaped != ']' && escaped != '-') { // escape ']' and '-' only in char sets.
|
||||||
return Result.INVALID;
|
return invalid(startOff, startOff+1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
codePoint = escaped;
|
codePoint = escaped;
|
||||||
|
@ -162,10 +167,20 @@ public abstract class EscapeSequenceParsing {
|
||||||
Result.Type.CODE_POINT,
|
Result.Type.CODE_POINT,
|
||||||
codePoint,
|
codePoint,
|
||||||
IntervalSet.EMPTY_SET,
|
IntervalSet.EMPTY_SET,
|
||||||
|
startOff,
|
||||||
offset - startOff);
|
offset - startOff);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return Result.INVALID;
|
return invalid(startOff,s.length()-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Result invalid(int start, int stop) { // start..stop is inclusive
|
||||||
|
return new Result(
|
||||||
|
Result.Type.INVALID,
|
||||||
|
0,
|
||||||
|
IntervalSet.EMPTY_SET,
|
||||||
|
start,
|
||||||
|
stop - start + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -146,6 +146,7 @@ tokens { SEMPRED; TOKEN_REF; RULE_REF; LEXER_CHAR_SET; ARG_ACTION; }
|
||||||
*/
|
*/
|
||||||
package org.antlr.v4.parse;
|
package org.antlr.v4.parse;
|
||||||
import org.antlr.v4.tool.*;
|
import org.antlr.v4.tool.*;
|
||||||
|
import org.antlr.v4.runtime.misc.Interval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -643,27 +644,23 @@ fragment
|
||||||
ESC_SEQ
|
ESC_SEQ
|
||||||
: '\\'
|
: '\\'
|
||||||
(
|
(
|
||||||
// The standard escaped character set such as tab, newline,
|
// The standard escaped character set such as tab, newline, etc...
|
||||||
// etc.
|
|
||||||
//
|
|
||||||
'b'|'t'|'n'|'f'|'r'|'\''|'\\'
|
'b'|'t'|'n'|'f'|'r'|'\''|'\\'
|
||||||
|
|
||||||
| // A Java style Unicode escape sequence
|
| // A Java style Unicode escape sequence
|
||||||
//
|
|
||||||
UNICODE_ESC
|
UNICODE_ESC
|
||||||
|
|
||||||
| // A Swift/Hack style Unicode escape sequence
|
| // A Swift/Hack style Unicode escape sequence
|
||||||
//
|
|
||||||
UNICODE_EXTENDED_ESC
|
UNICODE_EXTENDED_ESC
|
||||||
|
|
||||||
| // An illegal escape seqeunce
|
| // An illegal escape seqeunce
|
||||||
//
|
~('b'|'t'|'n'|'f'|'r'|'\''|'\\'|'u') // \x for any invalid x (make sure to match char here)
|
||||||
{
|
{
|
||||||
Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-1, getCharIndex());
|
Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-2, getCharIndex()-1);
|
||||||
t.setText(t.getText());
|
t.setText(t.getText());
|
||||||
t.setLine(input.getLine());
|
t.setLine(input.getLine());
|
||||||
t.setCharPositionInLine(input.getCharPositionInLine()-1);
|
t.setCharPositionInLine(input.getCharPositionInLine()-2);
|
||||||
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t);
|
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(getCharIndex()-2,getCharIndex()-1));
|
||||||
if ( state.text==null ) {
|
if ( state.text==null ) {
|
||||||
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-2));
|
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-2));
|
||||||
}
|
}
|
||||||
|
@ -673,7 +670,6 @@ ESC_SEQ
|
||||||
|
|
||||||
fragment
|
fragment
|
||||||
UNICODE_ESC
|
UNICODE_ESC
|
||||||
|
|
||||||
@init {
|
@init {
|
||||||
|
|
||||||
// Flag to tell us whether we have a valid number of
|
// Flag to tell us whether we have a valid number of
|
||||||
|
@ -717,14 +713,19 @@ UNICODE_ESC
|
||||||
// Now check the digit count and issue an error if we need to
|
// Now check the digit count and issue an error if we need to
|
||||||
//
|
//
|
||||||
{
|
{
|
||||||
if (hCount != 4) {
|
if (hCount < 4) {
|
||||||
Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-3-hCount, getCharIndex()-1);
|
Interval badRange = Interval.of(getCharIndex()-2-hCount, getCharIndex());
|
||||||
t.setText(t.getText());
|
String lastChar = input.substring(badRange.b, badRange.b);
|
||||||
t.setLine(input.getLine());
|
if ( lastChar.codePointAt(0)=='\'' ) {
|
||||||
t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2);
|
badRange.b--;
|
||||||
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t);
|
}
|
||||||
|
String bad = input.substring(badRange.a, badRange.b);
|
||||||
|
Token t = new CommonToken(input, state.type, state.channel, badRange.a, badRange.b);
|
||||||
|
t.setLine(input.getLine());
|
||||||
|
t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2);
|
||||||
|
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, bad);
|
||||||
if ( state.text==null ) {
|
if ( state.text==null ) {
|
||||||
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-hCount-3));
|
setText(bad);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -746,7 +747,7 @@ UNICODE_EXTENDED_ESC
|
||||||
t.setText(t.getText());
|
t.setText(t.getText());
|
||||||
t.setLine(input.getLine());
|
t.setLine(input.getLine());
|
||||||
t.setCharPositionInLine(input.getCharPositionInLine()-numDigits);
|
t.setCharPositionInLine(input.getCharPositionInLine()-numDigits);
|
||||||
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t);
|
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(state.tokenStartCharIndex,getCharIndex()-1));
|
||||||
if ( state.text==null ) {
|
if ( state.text==null ) {
|
||||||
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-numDigits-3));
|
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-numDigits-3));
|
||||||
}
|
}
|
||||||
|
|
|
@ -824,7 +824,7 @@ public enum ErrorType {
|
||||||
*
|
*
|
||||||
* @since 4.2.1
|
* @since 4.2.1
|
||||||
*/
|
*/
|
||||||
INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence", ErrorSeverity.WARNING),
|
INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence <arg>", ErrorSeverity.WARNING),
|
||||||
/**
|
/**
|
||||||
* Compiler Warning 157.
|
* Compiler Warning 157.
|
||||||
*
|
*
|
||||||
|
|
|
@ -43,7 +43,7 @@ public abstract class GrammarASTWithOptions extends GrammarAST {
|
||||||
if ( v.startsWith("'") || v.startsWith("\"") ) {
|
if ( v.startsWith("'") || v.startsWith("\"") ) {
|
||||||
v = CharSupport.getStringFromGrammarStringLiteral(v);
|
v = CharSupport.getStringFromGrammarStringLiteral(v);
|
||||||
if (v == null) {
|
if (v == null) {
|
||||||
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.fileName, value.getToken());
|
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.fileName, value.getToken(), value.getText());
|
||||||
v = "";
|
v = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue