added error for bad sets in lexer. some tests in TestSets appeared to allow ~('a'|B) but it was randomly working. ('a'|B) works without the ~, though doesn't collapse to a set. fixes antlr/antlr4#70

2012-12-01 15:43:15 -08:00 · 2012-12-01 15:43:15 -08:00 · 6d6389eef2
parent fc79752748
commit 6d6389eef2
4 changed files with 27 additions and 33 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -6,6 +6,11 @@ December 1, 2012
  line 2:3 token recognition error at: '\t'
  line 2:4 token recognition error at: '\n'

+* added error for bad sets in lexer; e.g.:
+  lexer set element A is invalid (either rule ref or literal with > 1 char)
+  some tests in TestSets appeared to allow ~('a'|B) but it was randomly working.
+  ('a'|B) works, though doesn't collapse to a set.
+
 November 30, 2012

 * Maven updates (cleanup, unification, and specify Java 6 bootstrap classpath)
--- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java
+++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java
@ -46,6 +46,7 @@ import org.antlr.v4.runtime.atn.TokensStartState;
 import org.antlr.v4.runtime.atn.Transition;
 import org.antlr.v4.runtime.misc.Interval;
 import org.antlr.v4.runtime.misc.IntervalSet;
+import org.antlr.v4.tool.ErrorType;
 import org.antlr.v4.tool.LexerGrammar;
 import org.antlr.v4.tool.Rule;
 import org.antlr.v4.tool.ast.ActionAST;
@ -179,9 +180,20 @@ public class LexerATNFactory extends ParserATNFactory {
 			else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
 				set.addAll(getSetFromCharSetLiteral(t));
 			}
-			else {
+			else if ( t.getType()==ANTLRParser.STRING_LITERAL ) {
 				int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
-				set.add(c);
+				if ( c != -1 ) {
+					set.add(c);
+				}
+				else {
+					g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_SET_ELEMENT,
+											   g.fileName, t.getToken(), t.getText());
+
+				}
+			}
+			else if ( t.getType()==ANTLRParser.TOKEN_REF ) {
+				g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_SET_ELEMENT,
+										   g.fileName, t.getToken(), t.getText());
 			}
 		}
 		if ( invert ) {
--- a/tool/src/org/antlr/v4/tool/ErrorType.java
+++ b/tool/src/org/antlr/v4/tool/ErrorType.java
@ -115,6 +115,7 @@ public enum ErrorType {
 	LEXER_ACTION_PLACEMENT_ISSUE(132, "action in lexer rule <arg> must be last element of single outermost alt", ErrorSeverity.ERROR),
 	LEXER_COMMAND_PLACEMENT_ISSUE(133, "->command in lexer rule <arg> must be last element of single outermost alt", ErrorSeverity.ERROR),
 	USE_OF_BAD_WORD(134, "symbol <arg> conflicts with generated code in target language or runtime", ErrorSeverity.ERROR),
+	INVALID_LEXER_SET_ELEMENT(134, "lexer set element <arg> is invalid (either lexer rule ref or literal with > 1 char)", ErrorSeverity.ERROR),

 	// Backward incompatibility errors
 	V3_TREE_GRAMMAR(200, "tree grammars are not supported in ANTLR v4", ErrorSeverity.ERROR),
--- a/tool/test/org/antlr/v4/test/TestSets.java
+++ b/tool/test/org/antlr/v4/test/TestSets.java
@ -227,32 +227,22 @@ public class TestSets extends BaseTest {
 	}

 	@Test public void testNotCharSetWithRuleRef() throws Exception {
-		String grammar =
+		// might be a useful feature to add someday
+		String[] pair = new String[] {
 			"grammar T;\n" +
 			"a : A {System.out.println($A.text);} ;\n" +
 			"A : ~('a'|B) ;\n" +
-			"B : 'b' ;\n";
-		String found = execParser("T.g4", grammar, "TParser", "TLexer",
-								  "a", "x", debug);
-		assertEquals("x\n", found);
-	}
-
-	@Test public void testNotCharSetWithRuleRef2() throws Exception {
-		String grammar =
-			"grammar T;\n" +
-			"a : A {System.out.println($A.text);} ;\n" +
-			"A : ~('a'|B) ;\n" +
-			"B : 'b'|'c' ;\n";
-		String found = execParser("T.g4", grammar, "TParser", "TLexer",
-								  "a", "x", debug);
-		assertEquals("x\n", found);
+			"B : 'b' ;\n",
+			"error(134): T.g4:3:10: lexer set element B is invalid (either lexer rule ref or literal with > 1 char)\n"
+		};
+		super.testErrors(pair, true);
 	}

 	@Test public void testNotCharSetWithRuleRef3() throws Exception {
 		String grammar =
 			"grammar T;\n" +
 			"a : A {System.out.println($A.text);} ;\n" +
-			"A : ('a'|B) ;\n" +
+			"A : ('a'|B) ;\n" +  // this doesn't collapse to set but works
 			"fragment\n" +
 			"B : ~('a'|'c') ;\n";
 		String found = execParser("T.g4", grammar, "TParser", "TLexer",
@ -260,20 +250,6 @@ public class TestSets extends BaseTest {
 		assertEquals("x\n", found);
 	}

-	@Test public void testNotCharSetWithRuleRef4() throws Exception {
-		String grammar =
-			"grammar T;\n" +
-			"a : A {System.out.println($A.text);} ;\n" +
-			"A : ('a'|B) ;\n" +
-			"fragment\n" +
-			"B : ~('a'|C) ;\n" +
-			"fragment\n" +
-			"C : 'c'|'d' ;\n ";
-		String found = execParser("T.g4", grammar, "TParser", "TLexer",
-								  "a", "x", debug);
-		assertEquals("x\n", found);
-	}
-
 	@Test public void testCharSetLiteral() throws Exception {
 		String grammar =
 			"grammar T;\n" +