Merge pull request #1818 from parrt/fix-1815-again

Fixes #1815 (for real this time!)
This commit is contained in:
Terence Parr 2017-04-06 14:36:22 -07:00 committed by GitHub
commit 9519bfc36f
6 changed files with 36 additions and 21 deletions

View File

@ -222,10 +222,11 @@ public class LexerErrorsDescriptors {
public String startRule = "";
public String grammarName = "L";
// ST interprets \\ as \ so we need \\\\ to get \\
/**
lexer grammar L;
ACTION2 : '[' (STRING | ~'"')*? ']';
STRING : '"' ('\\"' | .)*? '"';
STRING : '"' ('\\\\' '"' | .)*? '"';
WS : [ \t\r\n]+ -> skip;
*/
@CommentHasStringValue

View File

@ -432,6 +432,26 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
super.testErrors(pair, true);
}
/**
* This is a regression test for https://github.com/antlr/antlr4/issues/1815
* "Null ptr exception in SqlBase.g4"
*/
@Test public void testDoubleQuoteInTwoStringLiterals() {
String grammar =
"lexer grammar A;\n" +
"STRING : '\\\"' '\\\"' 'x' ;";
String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n";
String[] pair = new String[] {
grammar,
expected
};
super.testErrors(pair, true);
}
/**
* This test ensures that the {@link ErrorType#INVALID_ESCAPE_SEQUENCE}
* error is not reported for escape sequences that are known to be valid.
@ -464,7 +484,8 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n";
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n" +
"warning("+ErrorType.EPSILON_TOKEN.code+"): A.g4:2:0: non-fragment lexer rule RULE can match the empty string\n";
String[] pair = new String[] {
grammar,
@ -525,7 +546,8 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n";
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
"warning("+ ErrorType.EPSILON_TOKEN.code + "): Test.g4:2:0: non-fragment lexer rule INVALID_STRING_LITERAL can match the empty string\n";
String[] pair = new String[] {
grammar,

View File

@ -346,19 +346,18 @@ public class LexerATNFactory extends ParserATNFactory {
String chars = stringLiteralAST.getText();
ATNState left = newState(stringLiteralAST);
ATNState right;
chars = CharSupport.getStringFromGrammarStringLiteral(chars);
if (chars == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE,
g.fileName, stringLiteralAST.getToken(), chars);
String s = CharSupport.getStringFromGrammarStringLiteral(chars);
if (s == null) {
// the lexer will already have given an error
return new Handle(left, left);
}
int n = chars.length();
int n = s.length();
ATNState prev = left;
right = null;
for (int i = 0; i < n; ) {
right = newState(stringLiteralAST);
int codePoint = chars.codePointAt(i);
int codePoint = s.codePointAt(i);
prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint));
prev = right;
i += Character.charCount(codePoint);

View File

@ -242,10 +242,12 @@ public abstract class Target {
else {
toAdvance += 4;
}
String fullEscape = is.substring(i, i + toAdvance);
appendUnicodeEscapedCodePoint(
if ( i+toAdvance <= is.length() ) { // we might have an invalid \\uAB or something
String fullEscape = is.substring(i, i+toAdvance);
appendUnicodeEscapedCodePoint(
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
sb);
}
break;
default:
if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {

View File

@ -661,9 +661,6 @@ ESC_SEQ
t.setLine(input.getLine());
t.setCharPositionInLine(input.getCharPositionInLine()-2);
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(getCharIndex()-2,getCharIndex()-1));
if ( state.text==null ) {
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-2));
}
}
)
;
@ -724,9 +721,6 @@ UNICODE_ESC
t.setLine(input.getLine());
t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2);
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, bad);
if ( state.text==null ) {
setText(bad);
}
}
}
;
@ -748,9 +742,6 @@ UNICODE_EXTENDED_ESC
t.setLine(input.getLine());
t.setCharPositionInLine(input.getCharPositionInLine()-numDigits);
grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(state.tokenStartCharIndex,getCharIndex()-1));
if ( state.text==null ) {
setText(input.substring(state.tokenStartCharIndex, getCharIndex()-numDigits-3));
}
}
}
;

View File

@ -137,7 +137,7 @@ element returns [ATNFactory.Handle p]
| ^(ACTION .) {$p = factory.action((ActionAST)$ACTION);}
| ^(SEMPRED .) {$p = factory.sempred((PredAST)$SEMPRED);}
| ^(NOT b=blockSet[true]) {$p = $b.p;}
| LEXER_CHAR_SET {$p = factory.charSetLiteral($start);}
| LEXER_CHAR_SET {$p = factory.charSetLiteral($start);}
;
astOperand returns [ATNFactory.Handle p]