Fixes #1815. Add info on what the invalid escape is. Match \x for any x but give error. This prevents \x from appearing like a sequence of 2 char. Updated unit tests.

2017-04-06 10:26:03 -07:00 · 2017-04-06 10:26:03 -07:00 · 80aa7907a4
parent 8af0080103
commit 80aa7907a4
7 changed files with 100 additions and 83 deletions
--- a/tool-testsuite/test/org/antlr/v4/test/tool/TestEscapeSequenceParsing.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestEscapeSequenceParsing.java
@ -8,7 +8,6 @@ package org.antlr.v4.test.tool;
 import org.antlr.v4.misc.EscapeSequenceParsing;
 import org.antlr.v4.runtime.misc.IntervalSet;
 import org.junit.Test;
 import static org.antlr.v4.misc.EscapeSequenceParsing.Result;
@ -18,113 +17,113 @@ public class TestEscapeSequenceParsing {
 	@Test
 	public void testParseEmpty() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("", 0));
+				EscapeSequenceParsing.parseEscape("", 0).type);
 	}
 	@Test
 	public void testParseJustBackslash() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\", 0));
+				EscapeSequenceParsing.parseEscape("\\", 0).type);
 	}
 	@Test
 	public void testParseInvalidEscape() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\z", 0));
+				EscapeSequenceParsing.parseEscape("\\z", 0).type);
 	}
 	@Test
 	public void testParseNewline() {
 		assertEquals(
-				new Result(Result.Type.CODE_POINT, '\n', IntervalSet.EMPTY_SET, 2),
+				new Result(Result.Type.CODE_POINT, '\n', IntervalSet.EMPTY_SET, 0,2),
 				EscapeSequenceParsing.parseEscape("\\n", 0));
 	}
 	@Test
 	public void testParseTab() {
 		assertEquals(
-				new Result(Result.Type.CODE_POINT, '\t', IntervalSet.EMPTY_SET, 2),
+				new Result(Result.Type.CODE_POINT, '\t', IntervalSet.EMPTY_SET, 0,2),
 				EscapeSequenceParsing.parseEscape("\\t", 0));
 	}
 	@Test
 	public void testParseUnicodeTooShort() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\uABC", 0));
+				EscapeSequenceParsing.parseEscape("\\uABC", 0).type);
 	}
 	@Test
 	public void testParseUnicodeBMP() {
 		assertEquals(
-				new Result(Result.Type.CODE_POINT, 0xABCD, IntervalSet.EMPTY_SET, 6),
+				new Result(Result.Type.CODE_POINT, 0xABCD, IntervalSet.EMPTY_SET, 0,6),
 				EscapeSequenceParsing.parseEscape("\\uABCD", 0));
 	}
 	@Test
 	public void testParseUnicodeSMPTooShort() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\u{}", 0));
+				EscapeSequenceParsing.parseEscape("\\u{}", 0).type);
 	}
 	@Test
 	public void testParseUnicodeSMPMissingCloseBrace() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\u{12345", 0));
+				EscapeSequenceParsing.parseEscape("\\u{12345", 0).type);
 	}
 	@Test
 	public void testParseUnicodeTooBig() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\u{110000}", 0));
+				EscapeSequenceParsing.parseEscape("\\u{110000}", 0).type);
 	}
 	@Test
 	public void testParseUnicodeSMP() {
 		assertEquals(
-				new Result(Result.Type.CODE_POINT, 0x10ABCD, IntervalSet.EMPTY_SET, 10),
+				new Result(Result.Type.CODE_POINT, 0x10ABCD, IntervalSet.EMPTY_SET, 0,10),
 				EscapeSequenceParsing.parseEscape("\\u{10ABCD}", 0));
 	}
 	@Test
 	public void testParseUnicodePropertyTooShort() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\p{}", 0));
+				EscapeSequenceParsing.parseEscape("\\p{}", 0).type);
 	}
 	@Test
 	public void testParseUnicodePropertyMissingCloseBrace() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\p{1234", 0));
+				EscapeSequenceParsing.parseEscape("\\p{1234", 0).type);
 	}
 	@Test
 	public void testParseUnicodeProperty() {
 		assertEquals(
-				new Result(Result.Type.PROPERTY, -1, IntervalSet.of(66560, 66639), 11),
+				new Result(Result.Type.PROPERTY, -1, IntervalSet.of(66560, 66639), 0,11),
 				EscapeSequenceParsing.parseEscape("\\p{Deseret}", 0));
 	}
 	@Test
 	public void testParseUnicodePropertyInvertedTooShort() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\P{}", 0));
+				EscapeSequenceParsing.parseEscape("\\P{}", 0).type);
 	}
 	@Test
 	public void testParseUnicodePropertyInvertedMissingCloseBrace() {
 		assertEquals(
-				EscapeSequenceParsing.Result.INVALID,
+				EscapeSequenceParsing.Result.Type.INVALID,
-				EscapeSequenceParsing.parseEscape("\\P{Deseret", 0));
+				EscapeSequenceParsing.parseEscape("\\P{Deseret", 0).type);
 	}
 	@Test
@ -132,7 +131,7 @@ public class TestEscapeSequenceParsing {
 		IntervalSet expected = IntervalSet.of(0, 66559);
 		expected.add(66640, Character.MAX_CODE_POINT);
 		assertEquals(
-				new Result(Result.Type.PROPERTY, -1, expected, 11),
+				new Result(Result.Type.PROPERTY, -1, expected, 0, 11),
 				EscapeSequenceParsing.parseEscape("\\P{Deseret}", 0));
 	}
 }
--- a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java
@ -462,9 +462,9 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
 			"lexer grammar A;\n" +
 			"RULE : 'Foo \\uAABG \\x \\u';\n";
 		String expected =
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence\n" +
+			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence\n" +
+			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence\n";
+			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n";
 		String[] pair = new String[] {
 			grammar,
@ -516,13 +516,13 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
 				"VALID_CHAR_SET:               [`\\-=\\]];";
 		String expected =
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence \\\"\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence \\]\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence \\u24\n" +
 				"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
 				"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence \\u24\\u\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" +
 				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
 				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
 				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n";
@ -552,14 +552,14 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
 				"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";
 		String expected =
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n"+
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n"+
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n"+
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n"+
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n"+
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n"+
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n"+
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence\n" +
+				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n"+
 				"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
 				"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
 				"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +
--- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java
+++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java
@ -349,7 +349,7 @@ public class LexerATNFactory extends ParserATNFactory {
 		chars = CharSupport.getStringFromGrammarStringLiteral(chars);
 		if (chars == null) {
 			g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
-					g.fileName, stringLiteralAST.getToken());
+					g.fileName, stringLiteralAST.getToken(), chars);
 			return new Handle(left, left);
 		}
@ -462,8 +462,10 @@ public class LexerATNFactory extends ParserATNFactory {
 					EscapeSequenceParsing.parseEscape(chars, i);
 				switch (escapeParseResult.type) {
 					case INVALID:
 						String invalid = chars.substring(escapeParseResult.startOffset,
 						                                 escapeParseResult.startOffset+escapeParseResult.parseLength);
 						g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
-									   g.fileName, charSetAST.getToken(), charSetAST.getText());
+						                           g.fileName, charSetAST.getToken(), invalid);
 						state = CharSetParseState.ERROR;
 						break;
 					case CODE_POINT:
--- a/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java
+++ b/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java
@ -29,17 +29,17 @@ public abstract class EscapeSequenceParsing {
 			PROPERTY
 		};
 		public static Result INVALID = new Result(Type.INVALID, -1, IntervalSet.EMPTY_SET, -1);
 		public final Type type;
 		public final int codePoint;
 		public final IntervalSet propertyIntervalSet;
 		public final int startOffset;
 		public final int parseLength;
-		public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int parseLength) {
+		public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int startOffset, int parseLength) {
 			this.type = type;
 			this.codePoint = codePoint;
 			this.propertyIntervalSet = propertyIntervalSet;
 			this.startOffset = startOffset;
 			this.parseLength = parseLength;
 		}
@ -78,12 +78,12 @@ public abstract class EscapeSequenceParsing {
 	/**
 	 * Parses a single escape sequence starting at {@code startOff}.
 	 *
-	 * Returns {@link Result#INVALID} if no valid escape sequence was found, a Result otherwise.
+	 * Returns a type of INVALID if no valid escape sequence was found, a Result otherwise.
 	 */
 	public static Result parseEscape(String s, int startOff) {
 		int offset = startOff;
 		if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') {
-			return Result.INVALID;
+			return invalid(startOff, s.length()-1);
 		}
 		// Move past backslash
 		offset++;
@ -93,21 +93,21 @@ public abstract class EscapeSequenceParsing {
 		if (escaped == 'u') {
 			// \\u{1} is the shortest we support
 			if (offset + 3 > s.length()) {
-				return Result.INVALID;
+				return invalid(startOff, s.length()-1);
 			}
 			int hexStartOffset;
-			int hexEndOffset;
+			int hexEndOffset; // appears to be exclusive
 			if (s.codePointAt(offset) == '{') {
 				hexStartOffset = offset + 1;
 				hexEndOffset = s.indexOf('}', hexStartOffset);
 				if (hexEndOffset == -1) {
-					return Result.INVALID;
+					return invalid(startOff, s.length()-1);
 				}
 				offset = hexEndOffset + 1;
 			}
 			else {
 				if (offset + 4 > s.length()) {
-					return Result.INVALID;
+					return invalid(startOff, s.length()-1);
 				}
 				hexStartOffset = offset;
 				hexEndOffset = offset + 4;
@ -115,28 +115,32 @@ public abstract class EscapeSequenceParsing {
 			}
 			int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset);
 			if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) {
-				return Result.INVALID;
+				return invalid(startOff, startOff+6-1);
 			}
 			return new Result(
 				Result.Type.CODE_POINT,
 				codePointValue,
 				IntervalSet.EMPTY_SET,
 				startOff,
 				offset - startOff);
 		}
 		else if (escaped == 'p' || escaped == 'P') {
 			// \p{L} is the shortest we support
-			if (offset + 3 > s.length() || s.codePointAt(offset) != '{') {
+			if (offset + 3 > s.length()) {
-				return Result.INVALID;
+				return invalid(startOff, s.length()-1);
 			}
 			if (s.codePointAt(offset) != '{') {
 				return invalid(startOff, offset);
 			}
 			int openBraceOffset = offset;
 			int closeBraceOffset = s.indexOf('}', openBraceOffset);
 			if (closeBraceOffset == -1) {
-				return Result.INVALID;
+				return invalid(startOff, s.length()-1);
 			}
 			String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset);
 			IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName);
 			if (propertyIntervalSet == null) {
-				return Result.INVALID;
+				return invalid(startOff, closeBraceOffset);
 			}
 			offset = closeBraceOffset + 1;
 			if (escaped == 'P') {
@ -146,13 +150,14 @@ public abstract class EscapeSequenceParsing {
 				Result.Type.PROPERTY,
 				-1,
 				propertyIntervalSet,
 				startOff,
 				offset - startOff);
 		}
 		else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
 			int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
 			if (codePoint == 0) {
 				if (escaped != ']' && escaped != '-') { // escape ']' and '-' only in char sets.
-					return Result.INVALID;
+					return invalid(startOff, startOff+1);
 				}
 				else {
 					codePoint = escaped;
@ -162,10 +167,20 @@ public abstract class EscapeSequenceParsing {
 				Result.Type.CODE_POINT,
 				codePoint,
 				IntervalSet.EMPTY_SET,
 				startOff,
 				offset - startOff);
 		}
 		else {
-			return Result.INVALID;
+			return invalid(startOff,s.length()-1);
 		}
 	}
 	private static Result invalid(int start, int stop) { // start..stop is inclusive
 		return new Result(
 			Result.Type.INVALID,
 			0,
 			IntervalSet.EMPTY_SET,
 			start,
 			stop - start + 1);
 	}
 }
--- a/tool/src/org/antlr/v4/parse/ANTLRLexer.g
+++ b/tool/src/org/antlr/v4/parse/ANTLRLexer.g
@ -146,6 +146,7 @@ tokens { SEMPRED; TOKEN_REF; RULE_REF; LEXER_CHAR_SET; ARG_ACTION; }
 */
 package org.antlr.v4.parse;
 import org.antlr.v4.tool.*;
 import org.antlr.v4.runtime.misc.Interval;
 }
@ -643,27 +644,23 @@ fragment
 ESC_SEQ
    : '\\'
        (
-              // The standard escaped character set such as tab, newline,
+              // The standard escaped character set such as tab, newline, etc...
              // etc.
              //
    		  'b'|'t'|'n'|'f'|'r'|'\''|'\\'
    	    | // A Java style Unicode escape sequence
    	      //
    	      UNICODE_ESC
            | // A Swift/Hack style Unicode escape sequence
              //
              UNICODE_EXTENDED_ESC
    	    | // An illegal escape seqeunce
-    	      //
+    	      ~('b'|'t'|'n'|'f'|'r'|'\''|'\\'|'u') // \x for any invalid x (make sure to match char here)
    	      {
-                Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-1, getCharIndex());
+                Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-2, getCharIndex()-1);
                t.setText(t.getText());
                t.setLine(input.getLine());
-                t.setCharPositionInLine(input.getCharPositionInLine()-1);
+                t.setCharPositionInLine(input.getCharPositionInLine()-2);
-                grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t);
+                grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(getCharIndex()-2,getCharIndex()-1));
 				if ( state.text==null ) {
 					setText(input.substring(state.tokenStartCharIndex, getCharIndex()-2));
 				}
@ -673,7 +670,6 @@ ESC_SEQ
 fragment
 UNICODE_ESC
@init {
 	// Flag to tell us whether we have a valid number of
@ -717,14 +713,19 @@ UNICODE_ESC
    	// Now check the digit count and issue an error if we need to
    	//
    	{
-    		if (hCount != 4) {
+    		if (hCount < 4) {
-                Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-3-hCount, getCharIndex()-1);
+				Interval badRange = Interval.of(getCharIndex()-2-hCount, getCharIndex());
-                t.setText(t.getText());
+				String lastChar = input.substring(badRange.b, badRange.b);
-                t.setLine(input.getLine());
+				if ( lastChar.codePointAt(0)=='\'' ) {
-                t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2);
+					badRange.b--;
-                grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t);
+				}
 				String bad = input.substring(badRange.a, badRange.b);
 				Token t = new CommonToken(input, state.type, state.channel, badRange.a, badRange.b);
 				t.setLine(input.getLine());
 				t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2);
 				grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, bad);
 				if ( state.text==null ) {
-					setText(input.substring(state.tokenStartCharIndex, getCharIndex()-hCount-3));
+					setText(bad);
 				}
    		}
    	}
@ -746,7 +747,7 @@ UNICODE_EXTENDED_ESC
                t.setText(t.getText());
                t.setLine(input.getLine());
                t.setCharPositionInLine(input.getCharPositionInLine()-numDigits);
-                grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t);
+                grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(state.tokenStartCharIndex,getCharIndex()-1));
 				if ( state.text==null ) {
 					setText(input.substring(state.tokenStartCharIndex, getCharIndex()-numDigits-3));
 				}
--- a/tool/src/org/antlr/v4/tool/ErrorType.java
+++ b/tool/src/org/antlr/v4/tool/ErrorType.java
@ -824,7 +824,7 @@ public enum ErrorType {
 	 *
 	 * @since 4.2.1
 	 */
-	INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence", ErrorSeverity.WARNING),
+	INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence <arg>", ErrorSeverity.WARNING),
 	/**
 	 * Compiler Warning 157.
 	 *
--- a/tool/src/org/antlr/v4/tool/ast/GrammarASTWithOptions.java
+++ b/tool/src/org/antlr/v4/tool/ast/GrammarASTWithOptions.java
@ -43,7 +43,7 @@ public abstract class GrammarASTWithOptions extends GrammarAST {
 			if ( v.startsWith("'") || v.startsWith("\"") ) {
 				v = CharSupport.getStringFromGrammarStringLiteral(v);
 				if (v == null) {
-					g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.fileName, value.getToken());
+					g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, g.fileName, value.getToken(), value.getText());
 					v = "";
 				}
 			}