From 3521a00dbbe2f79b80f917bb8fe5e90870e3f497 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Mon, 14 Jan 2013 18:02:17 -0600 Subject: [PATCH] Add error 146: non-fragment lexer rule can match the empty string --- .../antlr/v4/analysis/AnalysisPipeline.java | 29 +++++++++++++++++-- tool/src/org/antlr/v4/tool/ErrorType.java | 1 + .../org/antlr/v4/test/TestSymbolIssues.java | 16 ++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java b/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java index b24f7c94a..37472b499 100644 --- a/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java +++ b/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java @@ -31,10 +31,14 @@ package org.antlr.v4.analysis; import org.antlr.v4.misc.Utils; +import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.atn.DecisionState; import org.antlr.v4.runtime.atn.LL1Analyzer; import org.antlr.v4.runtime.misc.IntervalSet; +import org.antlr.v4.tool.ErrorType; import org.antlr.v4.tool.Grammar; +import org.antlr.v4.tool.Rule; +import org.antlr.v4.tool.ast.GrammarAST; import java.util.ArrayList; import java.util.Arrays; @@ -52,11 +56,30 @@ public class AnalysisPipeline { lr.check(); if ( !lr.listOfRecursiveCycles.isEmpty() ) return; // bail out - // BUILD DFA FOR EACH DECISION - if ( !g.isLexer() ) processParser(); + if (g.isLexer()) { + processLexer(); + } else { + // BUILD DFA FOR EACH DECISION + processParser(); + } } - void processParser() { + protected void processLexer() { + // make sure all non-fragment lexer rules must match at least one symbol + for (Rule rule : g.rules.values()) { + if (rule.isFragment()) { + continue; + } + + LL1Analyzer analyzer = new LL1Analyzer(g.atn); + IntervalSet look = analyzer.LOOK(g.atn.ruleToStartState[rule.index], null); + if (look.contains(Token.EPSILON)) { + g.tool.errMgr.grammarError(ErrorType.EPSILON_TOKEN, g.fileName, ((GrammarAST)rule.ast.getChild(0)).getToken(), rule.name); + } + } + } + + protected void processParser() { g.decisionLOOK = new ArrayList(g.atn.getNumberOfDecisions()+1); for (DecisionState s : g.atn.decisionToState) { g.tool.log("LL1", "\nDECISION "+s.decision+" in rule "+g.getRule(s.ruleIndex).name); diff --git a/tool/src/org/antlr/v4/tool/ErrorType.java b/tool/src/org/antlr/v4/tool/ErrorType.java index 13654332e..542be506b 100644 --- a/tool/src/org/antlr/v4/tool/ErrorType.java +++ b/tool/src/org/antlr/v4/tool/ErrorType.java @@ -128,6 +128,7 @@ public enum ErrorType { LOCAL_CONFLICTS_WITH_RETVAL(143, "local '' conflicts with return value with same name", ErrorSeverity.ERROR), INVALID_LITERAL_IN_LEXER_SET(144, "multi-character literals are not allowed in lexer sets: ", ErrorSeverity.ERROR), MODE_WITHOUT_RULES(145, "lexer mode '' must contain at least one non-fragment rule", ErrorSeverity.ERROR), + EPSILON_TOKEN(146, "non-fragment lexer rule '' can match the empty string", ErrorSeverity.ERROR), // Backward incompatibility errors V3_TREE_GRAMMAR(200, "tree grammars are not supported in ANTLR 4", ErrorSeverity.ERROR), diff --git a/tool/test/org/antlr/v4/test/TestSymbolIssues.java b/tool/test/org/antlr/v4/test/TestSymbolIssues.java index 8c6367bda..d4abe1a72 100644 --- a/tool/test/org/antlr/v4/test/TestSymbolIssues.java +++ b/tool/test/org/antlr/v4/test/TestSymbolIssues.java @@ -152,4 +152,20 @@ public class TestSymbolIssues extends BaseTest { testErrors(test, false); } + + @Test public void testEmptyLexerRuleDetection() throws Exception { + String[] test = { + "lexer grammar L;\n" + + "A : 'a';\n" + + "WS : [ \t]* -> skip;\n" + + "mode X;\n" + + " B : C;\n" + + " fragment C : A | (A C)?;", + + "error(" + ErrorType.EPSILON_TOKEN.code + "): L.g4:3:0: non-fragment lexer rule 'WS' can match the empty string\n" + + "error(" + ErrorType.EPSILON_TOKEN.code + "): L.g4:5:2: non-fragment lexer rule 'B' can match the empty string\n" + }; + + testErrors(test, false); + } }