diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerErrorsDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerErrorsDescriptors.java index 4a88fbe94..a7bcc3d7d 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerErrorsDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerErrorsDescriptors.java @@ -222,10 +222,11 @@ public class LexerErrorsDescriptors { public String startRule = ""; public String grammarName = "L"; + // ST interprets \\ as \ so we need \\\\ to get \\ /** lexer grammar L; ACTION2 : '[' (STRING | ~'"')*? ']'; - STRING : '"' ('\\"' | .)*? '"'; + STRING : '"' ('\\\\' '"' | .)*? '"'; WS : [ \t\r\n]+ -> skip; */ @CommentHasStringValue diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java index 210918cbc..3219c0bad 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java @@ -432,6 +432,26 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { super.testErrors(pair, true); } + /** + * This is a regression test for https://github.com/antlr/antlr4/issues/1815 + * "Null ptr exception in SqlBase.g4" + */ + @Test public void testDoubleQuoteInTwoStringLiterals() { + String grammar = + "lexer grammar A;\n" + + "STRING : '\\\"' '\\\"' 'x' ;"; + String expected = + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n"; + + String[] pair = new String[] { + grammar, + expected + }; + + super.testErrors(pair, true); + } + /** * This test ensures that the {@link ErrorType#INVALID_ESCAPE_SEQUENCE} * error is not reported for escape sequences that are known to be valid. @@ -464,7 +484,8 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { String expected = "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n"; + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n" + + "warning("+ErrorType.EPSILON_TOKEN.code+"): A.g4:2:0: non-fragment lexer rule RULE can match the empty string\n"; String[] pair = new String[] { grammar, @@ -525,7 +546,8 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n"; + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" + + "warning("+ ErrorType.EPSILON_TOKEN.code + "): Test.g4:2:0: non-fragment lexer rule INVALID_STRING_LITERAL can match the empty string\n"; String[] pair = new String[] { grammar, diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index b356ddd3c..fb332e59f 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -346,19 +346,18 @@ public class LexerATNFactory extends ParserATNFactory { String chars = stringLiteralAST.getText(); ATNState left = newState(stringLiteralAST); ATNState right; - chars = CharSupport.getStringFromGrammarStringLiteral(chars); - if (chars == null) { - g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, - g.fileName, stringLiteralAST.getToken(), chars); + String s = CharSupport.getStringFromGrammarStringLiteral(chars); + if (s == null) { + // the lexer will already have given an error return new Handle(left, left); } - int n = chars.length(); + int n = s.length(); ATNState prev = left; right = null; for (int i = 0; i < n; ) { right = newState(stringLiteralAST); - int codePoint = chars.codePointAt(i); + int codePoint = s.codePointAt(i); prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint)); prev = right; i += Character.charCount(codePoint); diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java index 0133a097b..025b42ad9 100644 --- a/tool/src/org/antlr/v4/codegen/Target.java +++ b/tool/src/org/antlr/v4/codegen/Target.java @@ -242,10 +242,12 @@ public abstract class Target { else { toAdvance += 4; } - String fullEscape = is.substring(i, i + toAdvance); - appendUnicodeEscapedCodePoint( + if ( i+toAdvance <= is.length() ) { // we might have an invalid \\uAB or something + String fullEscape = is.substring(i, i+toAdvance); + appendUnicodeEscapedCodePoint( CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape), sb); + } break; default: if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) { diff --git a/tool/src/org/antlr/v4/parse/ANTLRLexer.g b/tool/src/org/antlr/v4/parse/ANTLRLexer.g index 33c4e4efd..4f113a7f5 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRLexer.g +++ b/tool/src/org/antlr/v4/parse/ANTLRLexer.g @@ -661,9 +661,6 @@ ESC_SEQ t.setLine(input.getLine()); t.setCharPositionInLine(input.getCharPositionInLine()-2); grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(getCharIndex()-2,getCharIndex()-1)); - if ( state.text==null ) { - setText(input.substring(state.tokenStartCharIndex, getCharIndex()-2)); - } } ) ; @@ -724,9 +721,6 @@ UNICODE_ESC t.setLine(input.getLine()); t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2); grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, bad); - if ( state.text==null ) { - setText(bad); - } } } ; @@ -748,9 +742,6 @@ UNICODE_EXTENDED_ESC t.setLine(input.getLine()); t.setCharPositionInLine(input.getCharPositionInLine()-numDigits); grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(state.tokenStartCharIndex,getCharIndex()-1)); - if ( state.text==null ) { - setText(input.substring(state.tokenStartCharIndex, getCharIndex()-numDigits-3)); - } } } ; diff --git a/tool/src/org/antlr/v4/parse/ATNBuilder.g b/tool/src/org/antlr/v4/parse/ATNBuilder.g index 32cdec5cd..dfd47dc72 100644 --- a/tool/src/org/antlr/v4/parse/ATNBuilder.g +++ b/tool/src/org/antlr/v4/parse/ATNBuilder.g @@ -137,7 +137,7 @@ element returns [ATNFactory.Handle p] | ^(ACTION .) {$p = factory.action((ActionAST)$ACTION);} | ^(SEMPRED .) {$p = factory.sempred((PredAST)$SEMPRED);} | ^(NOT b=blockSet[true]) {$p = $b.p;} - | LEXER_CHAR_SET {$p = factory.charSetLiteral($start);} + | LEXER_CHAR_SET {$p = factory.charSetLiteral($start);} ; astOperand returns [ATNFactory.Handle p]