Fixes #1815. Add info on what the invalid escape is. Match \x for any x but give error. This prevents \x from appearing like a sequence of 2 char. Updated unit tests.

This commit is contained in:
parrt 2017-04-06 10:26:03 -07:00
parent 8af0080103
commit 80aa7907a4
7 changed files with 100 additions and 83 deletions

View File

@ -8,7 +8,6 @@ package org.antlr.v4.test.tool;
import org.antlr.v4.misc.EscapeSequenceParsing; import org.antlr.v4.misc.EscapeSequenceParsing;
import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.runtime.misc.IntervalSet;
import org.junit.Test; import org.junit.Test;
import static org.antlr.v4.misc.EscapeSequenceParsing.Result; import static org.antlr.v4.misc.EscapeSequenceParsing.Result;
@ -18,113 +17,113 @@ public class TestEscapeSequenceParsing {
@Test @Test
public void testParseEmpty() { public void testParseEmpty() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("", 0)); EscapeSequenceParsing.parseEscape("", 0).type);
} }
@Test @Test
public void testParseJustBackslash() { public void testParseJustBackslash() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\", 0)); EscapeSequenceParsing.parseEscape("\\", 0).type);
} }
@Test @Test
public void testParseInvalidEscape() { public void testParseInvalidEscape() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\z", 0)); EscapeSequenceParsing.parseEscape("\\z", 0).type);
} }
@Test @Test
public void testParseNewline() { public void testParseNewline() {
assertEquals( assertEquals(
new Result(Result.Type.CODE_POINT, '\n', IntervalSet.EMPTY_SET, 2), new Result(Result.Type.CODE_POINT, '\n', IntervalSet.EMPTY_SET, 0,2),
EscapeSequenceParsing.parseEscape("\\n", 0)); EscapeSequenceParsing.parseEscape("\\n", 0));
} }
@Test @Test
public void testParseTab() { public void testParseTab() {
assertEquals( assertEquals(
new Result(Result.Type.CODE_POINT, '\t', IntervalSet.EMPTY_SET, 2), new Result(Result.Type.CODE_POINT, '\t', IntervalSet.EMPTY_SET, 0,2),
EscapeSequenceParsing.parseEscape("\\t", 0)); EscapeSequenceParsing.parseEscape("\\t", 0));
} }
@Test @Test
public void testParseUnicodeTooShort() { public void testParseUnicodeTooShort() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\uABC", 0)); EscapeSequenceParsing.parseEscape("\\uABC", 0).type);
} }
@Test @Test
public void testParseUnicodeBMP() { public void testParseUnicodeBMP() {
assertEquals( assertEquals(
new Result(Result.Type.CODE_POINT, 0xABCD, IntervalSet.EMPTY_SET, 6), new Result(Result.Type.CODE_POINT, 0xABCD, IntervalSet.EMPTY_SET, 0,6),
EscapeSequenceParsing.parseEscape("\\uABCD", 0)); EscapeSequenceParsing.parseEscape("\\uABCD", 0));
} }
@Test @Test
public void testParseUnicodeSMPTooShort() { public void testParseUnicodeSMPTooShort() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\u{}", 0)); EscapeSequenceParsing.parseEscape("\\u{}", 0).type);
} }
@Test @Test
public void testParseUnicodeSMPMissingCloseBrace() { public void testParseUnicodeSMPMissingCloseBrace() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\u{12345", 0)); EscapeSequenceParsing.parseEscape("\\u{12345", 0).type);
} }
@Test @Test
public void testParseUnicodeTooBig() { public void testParseUnicodeTooBig() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\u{110000}", 0)); EscapeSequenceParsing.parseEscape("\\u{110000}", 0).type);
} }
@Test @Test
public void testParseUnicodeSMP() { public void testParseUnicodeSMP() {
assertEquals( assertEquals(
new Result(Result.Type.CODE_POINT, 0x10ABCD, IntervalSet.EMPTY_SET, 10), new Result(Result.Type.CODE_POINT, 0x10ABCD, IntervalSet.EMPTY_SET, 0,10),
EscapeSequenceParsing.parseEscape("\\u{10ABCD}", 0)); EscapeSequenceParsing.parseEscape("\\u{10ABCD}", 0));
} }
@Test @Test
public void testParseUnicodePropertyTooShort() { public void testParseUnicodePropertyTooShort() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\p{}", 0)); EscapeSequenceParsing.parseEscape("\\p{}", 0).type);
} }
@Test @Test
public void testParseUnicodePropertyMissingCloseBrace() { public void testParseUnicodePropertyMissingCloseBrace() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\p{1234", 0)); EscapeSequenceParsing.parseEscape("\\p{1234", 0).type);
} }
@Test @Test
public void testParseUnicodeProperty() { public void testParseUnicodeProperty() {
assertEquals( assertEquals(
new Result(Result.Type.PROPERTY, -1, IntervalSet.of(66560, 66639), 11), new Result(Result.Type.PROPERTY, -1, IntervalSet.of(66560, 66639), 0,11),
EscapeSequenceParsing.parseEscape("\\p{Deseret}", 0)); EscapeSequenceParsing.parseEscape("\\p{Deseret}", 0));
} }
@Test @Test
public void testParseUnicodePropertyInvertedTooShort() { public void testParseUnicodePropertyInvertedTooShort() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\P{}", 0)); EscapeSequenceParsing.parseEscape("\\P{}", 0).type);
} }
@Test @Test
public void testParseUnicodePropertyInvertedMissingCloseBrace() { public void testParseUnicodePropertyInvertedMissingCloseBrace() {
assertEquals( assertEquals(
EscapeSequenceParsing.Result.INVALID, EscapeSequenceParsing.Result.Type.INVALID,
EscapeSequenceParsing.parseEscape("\\P{Deseret", 0)); EscapeSequenceParsing.parseEscape("\\P{Deseret", 0).type);
} }
@Test @Test
@ -132,7 +131,7 @@ public class TestEscapeSequenceParsing {
IntervalSet expected = IntervalSet.of(0, 66559); IntervalSet expected = IntervalSet.of(0, 66559);
expected.add(66640, Character.MAX_CODE_POINT); expected.add(66640, Character.MAX_CODE_POINT);
assertEquals( assertEquals(
new Result(Result.Type.PROPERTY, -1, expected, 11), new Result(Result.Type.PROPERTY, -1, expected, 0, 11),
EscapeSequenceParsing.parseEscape("\\P{Deseret}", 0)); EscapeSequenceParsing.parseEscape("\\P{Deseret}", 0));
} }
} }

View File

@ -462,9 +462,9 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
"lexer grammar A;\n" + "lexer grammar A;\n" +
"RULE : 'Foo \\uAABG \\x \\u';\n"; "RULE : 'Foo \\uAABG \\x \\u';\n";
String expected = String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence\n"; "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n";
String[] pair = new String[] { String[] pair = new String[] {
grammar, grammar,
@ -516,13 +516,13 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
"VALID_CHAR_SET: [`\\-=\\]];"; "VALID_CHAR_SET: [`\\-=\\]];";
String expected = String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence \\\"\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence \\]\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence \\u24\n" +
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence \\u24\\u\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n"; "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n";
@ -552,14 +552,14 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n"; "INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";
String expected = String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n"+
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +

View File

@ -349,7 +349,7 @@ public class LexerATNFactory extends ParserATNFactory {
chars = CharSupport.getStringFromGrammarStringLiteral(chars); chars = CharSupport.getStringFromGrammarStringLiteral(chars);
if (chars == null) { if (chars == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
g.fileName, stringLiteralAST.getToken()); g.fileName, stringLiteralAST.getToken(), chars);
return new Handle(left, left); return new Handle(left, left);
} }
@ -462,8 +462,10 @@ public class LexerATNFactory extends ParserATNFactory {
EscapeSequenceParsing.parseEscape(chars, i); EscapeSequenceParsing.parseEscape(chars, i);
switch (escapeParseResult.type) { switch (escapeParseResult.type) {
case INVALID: case INVALID:
String invalid = chars.substring(escapeParseResult.startOffset,
escapeParseResult.startOffset+escapeParseResult.parseLength);
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
g.fileName, charSetAST.getToken(), charSetAST.getText()); g.fileName, charSetAST.getToken(), invalid);
state = CharSetParseState.ERROR; state = CharSetParseState.ERROR;
break; break;
case CODE_POINT: case CODE_POINT:

View File

@ -29,17 +29,17 @@ public abstract class EscapeSequenceParsing {
PROPERTY PROPERTY
}; };
public static Result INVALID = new Result(Type.INVALID, -1, IntervalSet.EMPTY_SET, -1);
public final Type type; public final Type type;
public final int codePoint; public final int codePoint;
public final IntervalSet propertyIntervalSet; public final IntervalSet propertyIntervalSet;
public final int startOffset;
public final int parseLength; public final int parseLength;
public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int parseLength) { public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int startOffset, int parseLength) {
this.type = type; this.type = type;
this.codePoint = codePoint; this.codePoint = codePoint;
this.propertyIntervalSet = propertyIntervalSet; this.propertyIntervalSet = propertyIntervalSet;
this.startOffset = startOffset;
this.parseLength = parseLength; this.parseLength = parseLength;
} }
@ -78,12 +78,12 @@ public abstract class EscapeSequenceParsing {
/** /**
* Parses a single escape sequence starting at {@code startOff}. * Parses a single escape sequence starting at {@code startOff}.
* *
* Returns {@link Result#INVALID} if no valid escape sequence was found, a Result otherwise. * Returns a type of INVALID if no valid escape sequence was found, a Result otherwise.
*/ */
public static Result parseEscape(String s, int startOff) { public static Result parseEscape(String s, int startOff) {
int offset = startOff; int offset = startOff;
if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') { if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') {
return Result.INVALID; return invalid(startOff, s.length()-1);
} }
// Move past backslash // Move past backslash
offset++; offset++;
@ -93,21 +93,21 @@ public abstract class EscapeSequenceParsing {
if (escaped == 'u') { if (escaped == 'u') {
// \\u{1} is the shortest we support // \\u{1} is the shortest we support
if (offset + 3 > s.length()) { if (offset + 3 > s.length()) {
return Result.INVALID; return invalid(startOff, s.length()-1);
} }
int hexStartOffset; int hexStartOffset;
int hexEndOffset; int hexEndOffset; // appears to be exclusive
if (s.codePointAt(offset) == '{') { if (s.codePointAt(offset) == '{') {
hexStartOffset = offset + 1; hexStartOffset = offset + 1;
hexEndOffset = s.indexOf('}', hexStartOffset); hexEndOffset = s.indexOf('}', hexStartOffset);
if (hexEndOffset == -1) { if (hexEndOffset == -1) {
return Result.INVALID; return invalid(startOff, s.length()-1);
} }
offset = hexEndOffset + 1; offset = hexEndOffset + 1;
} }
else { else {
if (offset + 4 > s.length()) { if (offset + 4 > s.length()) {
return Result.INVALID; return invalid(startOff, s.length()-1);
} }
hexStartOffset = offset; hexStartOffset = offset;
hexEndOffset = offset + 4; hexEndOffset = offset + 4;
@ -115,28 +115,32 @@ public abstract class EscapeSequenceParsing {
} }
int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset); int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset);
if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) { if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) {
return Result.INVALID; return invalid(startOff, startOff+6-1);
} }
return new Result( return new Result(
Result.Type.CODE_POINT, Result.Type.CODE_POINT,
codePointValue, codePointValue,
IntervalSet.EMPTY_SET, IntervalSet.EMPTY_SET,
startOff,
offset - startOff); offset - startOff);
} }
else if (escaped == 'p' || escaped == 'P') { else if (escaped == 'p' || escaped == 'P') {
// \p{L} is the shortest we support // \p{L} is the shortest we support
if (offset + 3 > s.length() || s.codePointAt(offset) != '{') { if (offset + 3 > s.length()) {
return Result.INVALID; return invalid(startOff, s.length()-1);
}
if (s.codePointAt(offset) != '{') {
return invalid(startOff, offset);
} }
int openBraceOffset = offset; int openBraceOffset = offset;
int closeBraceOffset = s.indexOf('}', openBraceOffset); int closeBraceOffset = s.indexOf('}', openBraceOffset);
if (closeBraceOffset == -1) { if (closeBraceOffset == -1) {
return Result.INVALID; return invalid(startOff, s.length()-1);
} }
String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset); String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset);
IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName); IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName);
if (propertyIntervalSet == null) { if (propertyIntervalSet == null) {
return Result.INVALID; return invalid(startOff, closeBraceOffset);
} }
offset = closeBraceOffset + 1; offset = closeBraceOffset + 1;
if (escaped == 'P') { if (escaped == 'P') {
@ -146,13 +150,14 @@ public abstract class EscapeSequenceParsing {
Result.Type.PROPERTY, Result.Type.PROPERTY,
-1, -1,
propertyIntervalSet, propertyIntervalSet,
startOff,
offset - startOff); offset - startOff);
} }
else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) { else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped]; int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
if (codePoint == 0) { if (codePoint == 0) {
if (escaped != ']' && escaped != '-') { // escape ']' and '-' only in char sets. if (escaped != ']' && escaped != '-') { // escape ']' and '-' only in char sets.
return Result.INVALID; return invalid(startOff, startOff+1);
} }
else { else {
codePoint = escaped; codePoint = escaped;
@ -162,10 +167,20 @@ public abstract class EscapeSequenceParsing {
Result.Type.CODE_POINT, Result.Type.CODE_POINT,
codePoint, codePoint,
IntervalSet.EMPTY_SET, IntervalSet.EMPTY_SET,
startOff,
offset - startOff); offset - startOff);
} }
else { else {
return Result.INVALID; return invalid(startOff,s.length()-1);
} }
} }
private static Result invalid(int start, int stop) { // start..stop is inclusive
return new Result(
Result.Type.INVALID,
0,
IntervalSet.EMPTY_SET,
start,
stop - start + 1);
}
} }

View File

@ -146,6 +146,7 @@ tokens { SEMPRED; TOKEN_REF; RULE_REF; LEXER_CHAR_SET; ARG_ACTION; }
*/ */
package org.antlr.v4.parse; package org.antlr.v4.parse;
import org.antlr.v4.tool.*; import org.antlr.v4.tool.*;
import org.antlr.v4.runtime.misc.Interval;
} }
@ -643,27 +644,23 @@ fragment
ESC_SEQ ESC_SEQ
: '\\' : '\\'
( (
// The standard escaped character set such as tab, newline, // The standard escaped character set such as tab, newline, etc...
// etc.
//
'b'|'t'|'n'|'f'|'r'|'\''|'\\' 'b'|'t'|'n'|'f'|'r'|'\''|'\\'
| // A Java style Unicode escape sequence | // A Java style Unicode escape sequence
//
UNICODE_ESC UNICODE_ESC
| // A Swift/Hack style Unicode escape sequence | // A Swift/Hack style Unicode escape sequence
//
UNICODE_EXTENDED_ESC UNICODE_EXTENDED_ESC
| // An illegal escape seqeunce | // An illegal escape seqeunce
// ~('b'|'t'|'n'|'f'|'r'|'\''|'\\'|'u') // \x for any invalid x (make sure to match char here)
{ {
Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-1, getCharIndex()); Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-2, getCharIndex()-1);
t.setText(t.getText()); t.setText(t.getText());
t.setLine(input.getLine()); t.setLine(input.getLine());
t.setCharPositionInLine(input.getCharPositionInLine()-1); t.setCharPositionInLine(input.getCharPositionInLine()-2);
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t); grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(getCharIndex()-2,getCharIndex()-1));
if ( state.text==null ) { if ( state.text==null ) {
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-2)); setText(input.substring(state.tokenStartCharIndex, getCharIndex()-2));
} }
@ -673,7 +670,6 @@ ESC_SEQ
fragment fragment
UNICODE_ESC UNICODE_ESC
@init { @init {
// Flag to tell us whether we have a valid number of // Flag to tell us whether we have a valid number of
@ -717,14 +713,19 @@ UNICODE_ESC
// Now check the digit count and issue an error if we need to // Now check the digit count and issue an error if we need to
// //
{ {
if (hCount != 4) { if (hCount < 4) {
Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-3-hCount, getCharIndex()-1); Interval badRange = Interval.of(getCharIndex()-2-hCount, getCharIndex());
t.setText(t.getText()); String lastChar = input.substring(badRange.b, badRange.b);
t.setLine(input.getLine()); if ( lastChar.codePointAt(0)=='\'' ) {
t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2); badRange.b--;
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t); }
String bad = input.substring(badRange.a, badRange.b);
Token t = new CommonToken(input, state.type, state.channel, badRange.a, badRange.b);
t.setLine(input.getLine());
t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2);
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, bad);
if ( state.text==null ) { if ( state.text==null ) {
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-hCount-3)); setText(bad);
} }
} }
} }
@ -746,7 +747,7 @@ UNICODE_EXTENDED_ESC
t.setText(t.getText()); t.setText(t.getText());
t.setLine(input.getLine()); t.setLine(input.getLine());
t.setCharPositionInLine(input.getCharPositionInLine()-numDigits); t.setCharPositionInLine(input.getCharPositionInLine()-numDigits);
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t); grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(state.tokenStartCharIndex,getCharIndex()-1));
if ( state.text==null ) { if ( state.text==null ) {
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-numDigits-3)); setText(input.substring(state.tokenStartCharIndex, getCharIndex()-numDigits-3));
} }

View File

@ -824,7 +824,7 @@ public enum ErrorType {
* *
* @since 4.2.1 * @since 4.2.1
*/ */
INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence", ErrorSeverity.WARNING), INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence <arg>", ErrorSeverity.WARNING),
/** /**
* Compiler Warning 157. * Compiler Warning 157.
* *

View File

@ -43,7 +43,7 @@ public abstract class GrammarASTWithOptions extends GrammarAST {
if ( v.startsWith("'") || v.startsWith("\"") ) { if ( v.startsWith("'") || v.startsWith("\"") ) {
v = CharSupport.getStringFromGrammarStringLiteral(v); v = CharSupport.getStringFromGrammarStringLiteral(v);
if (v == null) { if (v == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.fileName, value.getToken()); g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.fileName, value.getToken(), value.getText());
v = ""; v = "";
} }
} }