Added comments to checkForUnreachableTokens and getSingleTokenValues methods.

2017-10-28 16:29:20 +03:00 · 2017-10-28 16:29:20 +03:00 · f4c4e8b4a7
parent 58349e5ad3
commit f4c4e8b4a7
1 changed files with 40 additions and 20 deletions
--- a/tool/src/org/antlr/v4/semantics/SymbolChecks.java
+++ b/tool/src/org/antlr/v4/semantics/SymbolChecks.java
@ -327,6 +327,16 @@ public class SymbolChecks {
 		}
 	}

+	/**
+	 * Algorithm steps:
+	 * 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z')
+	 *    for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues})
+	 * 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List<String>, List<String>) checkForOverlap})
+	 *    and throw TOKEN_UNREACHABLE warning if the same string found.
+	 * Complexity: O(m * n^2 / 2), approximately equals to O(n^2)
+	 * where m - number of modes, n - average number of lexer rules per mode.
+	 * See also testUnreachableTokens unit test for details.
+	 */
 	public void checkForUnreachableTokens(Grammar g) {
 		if (g.isLexer()) {
 			LexerGrammar lexerGrammar = (LexerGrammar) g;
@ -360,21 +370,10 @@ public class SymbolChecks {
 		}
 	}

-	private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
-		for (int i = 0; i < firstTokenStringValues.size(); i++) {
-			int secondTokenInd = rule1 == rule2 ? i + 1 : 0; // Compare with self or not
-			String str1 = firstTokenStringValues.get(i);
-			for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
-				String str2 = secondTokenStringValues.get(j);
-				if (str1.equals(str2)) {
-					errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
-							((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
-				}
-			}
-		}
-	}
-
-	public List<String> getSingleTokenValues(Rule rule)
+	/**
+	 * {@return} list of simple string literals for rule {@param rule}
+	 */
+	private List<String> getSingleTokenValues(Rule rule)
 	{
 		List<String> values = new ArrayList<>();
 		for (Alternative alt : rule.alt) {
@ -386,7 +385,7 @@ public class SymbolChecks {
 						: alt.ast;

 				if (rootNode.getTokenStartIndex() == -1) {
-					continue; // ignore tokens from parser that start as T__
+					continue; // ignore autogenerated tokens from combined grammars that start with T__
 				}

 				// Ignore alt if contains not only string literals (repetition, optional)
@ -417,8 +416,29 @@ public class SymbolChecks {
 		}
 		return values;
 	}
-	// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)

+	/**
+	 * For same rule compare values from next index:
+	 * TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf';
+	 * For different rules compare from start value:
+	 * TOKEN1: 'asdf';
+	 * TOKEN2: 'asdf';
+	 */
+	private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
+		for (int i = 0; i < firstTokenStringValues.size(); i++) {
+			int secondTokenInd = rule1 == rule2 ? i + 1 : 0;
+			String str1 = firstTokenStringValues.get(i);
+			for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
+				String str2 = secondTokenStringValues.get(j);
+				if (str1.equals(str2)) {
+					errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
+							((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
+				}
+			}
+		}
+	}
+
+	// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
 	public void checkRuleArgs(Grammar g, List<GrammarAST> rulerefs) {
 		if (rulerefs == null) return;
 		for (GrammarAST ref : rulerefs) {