diff --git a/tool/src/org/antlr/v4/semantics/SymbolChecks.java b/tool/src/org/antlr/v4/semantics/SymbolChecks.java index 172d934cd..cf69c3ae5 100644 --- a/tool/src/org/antlr/v4/semantics/SymbolChecks.java +++ b/tool/src/org/antlr/v4/semantics/SymbolChecks.java @@ -327,6 +327,16 @@ public class SymbolChecks { } } + /** + * Algorithm steps: + * 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z') + * for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues}) + * 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List, List) checkForOverlap}) + * and throw TOKEN_UNREACHABLE warning if the same string found. + * Complexity: O(m * n^2 / 2), approximately equals to O(n^2) + * where m - number of modes, n - average number of lexer rules per mode. + * See also testUnreachableTokens unit test for details. + */ public void checkForUnreachableTokens(Grammar g) { if (g.isLexer()) { LexerGrammar lexerGrammar = (LexerGrammar) g; @@ -360,33 +370,22 @@ public class SymbolChecks { } } - private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List firstTokenStringValues, List secondTokenStringValues) { - for (int i = 0; i < firstTokenStringValues.size(); i++) { - int secondTokenInd = rule1 == rule2 ? i + 1 : 0; // Compare with self or not - String str1 = firstTokenStringValues.get(i); - for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) { - String str2 = secondTokenStringValues.get(j); - if (str1.equals(str2)) { - errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName, - ((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name); - } - } - } - } - - public List getSingleTokenValues(Rule rule) + /** + * {@return} list of simple string literals for rule {@param rule} + */ + private List getSingleTokenValues(Rule rule) { List values = new ArrayList<>(); for (Alternative alt : rule.alt) { if (alt != null) { // select first alt if token has a command Tree rootNode = alt.ast.getChildCount() == 2 && - alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST - ? alt.ast.getChild(0) - : alt.ast; + alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST + ? alt.ast.getChild(0) + : alt.ast; if (rootNode.getTokenStartIndex() == -1) { - continue; // ignore tokens from parser that start as T__ + continue; // ignore autogenerated tokens from combined grammars that start with T__ } // Ignore alt if contains not only string literals (repetition, optional) @@ -417,8 +416,29 @@ public class SymbolChecks { } return values; } - // CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline) + /** + * For same rule compare values from next index: + * TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf'; + * For different rules compare from start value: + * TOKEN1: 'asdf'; + * TOKEN2: 'asdf'; + */ + private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List firstTokenStringValues, List secondTokenStringValues) { + for (int i = 0; i < firstTokenStringValues.size(); i++) { + int secondTokenInd = rule1 == rule2 ? i + 1 : 0; + String str1 = firstTokenStringValues.get(i); + for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) { + String str2 = secondTokenStringValues.get(j); + if (str1.equals(str2)) { + errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName, + ((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name); + } + } + } + } + + // CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline) public void checkRuleArgs(Grammar g, List rulerefs) { if (rulerefs == null) return; for (GrammarAST ref : rulerefs) {