added error for bad sets in lexer. some tests in TestSets appeared to allow ~('a'|B) but it was randomly working. ('a'|B) works without the ~, though doesn't collapse to a set. fixes antlr/antlr4#70

This commit is contained in:
Terence Parr 2012-12-01 15:43:15 -08:00
parent fc79752748
commit 6d6389eef2
4 changed files with 27 additions and 33 deletions

View File

@ -6,6 +6,11 @@ December 1, 2012
line 2:3 token recognition error at: '\t' line 2:3 token recognition error at: '\t'
line 2:4 token recognition error at: '\n' line 2:4 token recognition error at: '\n'
* added error for bad sets in lexer; e.g.:
lexer set element A is invalid (either rule ref or literal with > 1 char)
some tests in TestSets appeared to allow ~('a'|B) but it was randomly working.
('a'|B) works, though doesn't collapse to a set.
November 30, 2012 November 30, 2012
* Maven updates (cleanup, unification, and specify Java 6 bootstrap classpath) * Maven updates (cleanup, unification, and specify Java 6 bootstrap classpath)

View File

@ -46,6 +46,7 @@ import org.antlr.v4.runtime.atn.TokensStartState;
import org.antlr.v4.runtime.atn.Transition; import org.antlr.v4.runtime.atn.Transition;
import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.LexerGrammar; import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.ActionAST; import org.antlr.v4.tool.ast.ActionAST;
@ -179,9 +180,20 @@ public class LexerATNFactory extends ParserATNFactory {
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) { else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
set.addAll(getSetFromCharSetLiteral(t)); set.addAll(getSetFromCharSetLiteral(t));
} }
else { else if ( t.getType()==ANTLRParser.STRING_LITERAL ) {
int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText()); int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
set.add(c); if ( c != -1 ) {
set.add(c);
}
else {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_SET_ELEMENT,
g.fileName, t.getToken(), t.getText());
}
}
else if ( t.getType()==ANTLRParser.TOKEN_REF ) {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_SET_ELEMENT,
g.fileName, t.getToken(), t.getText());
} }
} }
if ( invert ) { if ( invert ) {

View File

@ -115,6 +115,7 @@ public enum ErrorType {
LEXER_ACTION_PLACEMENT_ISSUE(132, "action in lexer rule <arg> must be last element of single outermost alt", ErrorSeverity.ERROR), LEXER_ACTION_PLACEMENT_ISSUE(132, "action in lexer rule <arg> must be last element of single outermost alt", ErrorSeverity.ERROR),
LEXER_COMMAND_PLACEMENT_ISSUE(133, "->command in lexer rule <arg> must be last element of single outermost alt", ErrorSeverity.ERROR), LEXER_COMMAND_PLACEMENT_ISSUE(133, "->command in lexer rule <arg> must be last element of single outermost alt", ErrorSeverity.ERROR),
USE_OF_BAD_WORD(134, "symbol <arg> conflicts with generated code in target language or runtime", ErrorSeverity.ERROR), USE_OF_BAD_WORD(134, "symbol <arg> conflicts with generated code in target language or runtime", ErrorSeverity.ERROR),
INVALID_LEXER_SET_ELEMENT(134, "lexer set element <arg> is invalid (either lexer rule ref or literal with > 1 char)", ErrorSeverity.ERROR),
// Backward incompatibility errors // Backward incompatibility errors
V3_TREE_GRAMMAR(200, "tree grammars are not supported in ANTLR v4", ErrorSeverity.ERROR), V3_TREE_GRAMMAR(200, "tree grammars are not supported in ANTLR v4", ErrorSeverity.ERROR),

View File

@ -227,32 +227,22 @@ public class TestSets extends BaseTest {
} }
@Test public void testNotCharSetWithRuleRef() throws Exception { @Test public void testNotCharSetWithRuleRef() throws Exception {
String grammar = // might be a useful feature to add someday
String[] pair = new String[] {
"grammar T;\n" + "grammar T;\n" +
"a : A {System.out.println($A.text);} ;\n" + "a : A {System.out.println($A.text);} ;\n" +
"A : ~('a'|B) ;\n" + "A : ~('a'|B) ;\n" +
"B : 'b' ;\n"; "B : 'b' ;\n",
String found = execParser("T.g4", grammar, "TParser", "TLexer", "error(134): T.g4:3:10: lexer set element B is invalid (either lexer rule ref or literal with > 1 char)\n"
"a", "x", debug); };
assertEquals("x\n", found); super.testErrors(pair, true);
}
@Test public void testNotCharSetWithRuleRef2() throws Exception {
String grammar =
"grammar T;\n" +
"a : A {System.out.println($A.text);} ;\n" +
"A : ~('a'|B) ;\n" +
"B : 'b'|'c' ;\n";
String found = execParser("T.g4", grammar, "TParser", "TLexer",
"a", "x", debug);
assertEquals("x\n", found);
} }
@Test public void testNotCharSetWithRuleRef3() throws Exception { @Test public void testNotCharSetWithRuleRef3() throws Exception {
String grammar = String grammar =
"grammar T;\n" + "grammar T;\n" +
"a : A {System.out.println($A.text);} ;\n" + "a : A {System.out.println($A.text);} ;\n" +
"A : ('a'|B) ;\n" + "A : ('a'|B) ;\n" + // this doesn't collapse to set but works
"fragment\n" + "fragment\n" +
"B : ~('a'|'c') ;\n"; "B : ~('a'|'c') ;\n";
String found = execParser("T.g4", grammar, "TParser", "TLexer", String found = execParser("T.g4", grammar, "TParser", "TLexer",
@ -260,20 +250,6 @@ public class TestSets extends BaseTest {
assertEquals("x\n", found); assertEquals("x\n", found);
} }
@Test public void testNotCharSetWithRuleRef4() throws Exception {
String grammar =
"grammar T;\n" +
"a : A {System.out.println($A.text);} ;\n" +
"A : ('a'|B) ;\n" +
"fragment\n" +
"B : ~('a'|C) ;\n" +
"fragment\n" +
"C : 'c'|'d' ;\n ";
String found = execParser("T.g4", grammar, "TParser", "TLexer",
"a", "x", debug);
assertEquals("x\n", found);
}
@Test public void testCharSetLiteral() throws Exception { @Test public void testCharSetLiteral() throws Exception {
String grammar = String grammar =
"grammar T;\n" + "grammar T;\n" +