Added comments to checkForUnreachableTokens and getSingleTokenValues methods.

This commit is contained in:
Ivan Kochurkin 2017-10-28 16:29:20 +03:00
parent 58349e5ad3
commit f4c4e8b4a7
1 changed files with 40 additions and 20 deletions

View File

@ -327,6 +327,16 @@ public class SymbolChecks {
}
}
/**
* Algorithm steps:
* 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z')
* for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues})
* 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List<String>, List<String>) checkForOverlap})
* and throw TOKEN_UNREACHABLE warning if the same string found.
* Complexity: O(m * n^2 / 2), approximately equals to O(n^2)
* where m - number of modes, n - average number of lexer rules per mode.
* See also testUnreachableTokens unit test for details.
*/
public void checkForUnreachableTokens(Grammar g) {
if (g.isLexer()) {
LexerGrammar lexerGrammar = (LexerGrammar) g;
@ -360,33 +370,22 @@ public class SymbolChecks {
}
}
private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
for (int i = 0; i < firstTokenStringValues.size(); i++) {
int secondTokenInd = rule1 == rule2 ? i + 1 : 0; // Compare with self or not
String str1 = firstTokenStringValues.get(i);
for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
String str2 = secondTokenStringValues.get(j);
if (str1.equals(str2)) {
errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
}
}
}
}
public List<String> getSingleTokenValues(Rule rule)
/**
* {@return} list of simple string literals for rule {@param rule}
*/
private List<String> getSingleTokenValues(Rule rule)
{
List<String> values = new ArrayList<>();
for (Alternative alt : rule.alt) {
if (alt != null) {
// select first alt if token has a command
Tree rootNode = alt.ast.getChildCount() == 2 &&
alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST
? alt.ast.getChild(0)
: alt.ast;
alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST
? alt.ast.getChild(0)
: alt.ast;
if (rootNode.getTokenStartIndex() == -1) {
continue; // ignore tokens from parser that start as T__
continue; // ignore autogenerated tokens from combined grammars that start with T__
}
// Ignore alt if contains not only string literals (repetition, optional)
@ -417,8 +416,29 @@ public class SymbolChecks {
}
return values;
}
// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
/**
* For same rule compare values from next index:
* TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf';
* For different rules compare from start value:
* TOKEN1: 'asdf';
* TOKEN2: 'asdf';
*/
private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
for (int i = 0; i < firstTokenStringValues.size(); i++) {
int secondTokenInd = rule1 == rule2 ? i + 1 : 0;
String str1 = firstTokenStringValues.get(i);
for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
String str2 = secondTokenStringValues.get(j);
if (str1.equals(str2)) {
errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
}
}
}
}
// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
public void checkRuleArgs(Grammar g, List<GrammarAST> rulerefs) {
if (rulerefs == null) return;
for (GrammarAST ref : rulerefs) {