Fixed TOKEN_UNREACHABLE warning checks.

This commit is contained in:
Ivan Kochurkin 2017-10-22 15:39:54 +03:00
parent 585325fe12
commit 328cdf3583
2 changed files with 68 additions and 27 deletions

View File

@ -7,10 +7,12 @@
package org.antlr.v4.semantics; package org.antlr.v4.semantics;
import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
import org.antlr.v4.automata.LexerATNFactory; import org.antlr.v4.automata.LexerATNFactory;
import org.antlr.v4.parse.ANTLRLexer; import org.antlr.v4.parse.ANTLRLexer;
import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.MultiMap; import org.antlr.v4.runtime.misc.MultiMap;
import org.antlr.v4.tool.Alternative; import org.antlr.v4.tool.Alternative;
import org.antlr.v4.tool.Attribute; import org.antlr.v4.tool.Attribute;
@ -320,34 +322,44 @@ public class SymbolChecks {
if (g.isLexer()) { if (g.isLexer()) {
LexerGrammar lexerGrammar = (LexerGrammar) g; LexerGrammar lexerGrammar = (LexerGrammar) g;
for (List<Rule> rules : lexerGrammar.modes.values()) { for (List<Rule> rules : lexerGrammar.modes.values()) {
// Collect string literal lexer rules for each mode
// Collect string literal lexer rules
List<Rule> stringLiteralRules = new ArrayList<>(); List<Rule> stringLiteralRules = new ArrayList<>();
List<List<String>> stringLiteralValues = new ArrayList<>(); List<List<String>> stringLiteralValues = new ArrayList<>();
for (int i = 0; i < rules.size(); i++) { for (int i = 0; i < rules.size(); i++) {
Rule rule = rules.get(i); Rule rule = rules.get(i);
if (!rule.isFragment()) {
List<String> ruleStringAlts = getSingleTokenValues(rule); List<String> ruleStringAlts = getSingleTokenValues(rule);
if (ruleStringAlts != null && ruleStringAlts.size() > 0) { if (ruleStringAlts != null && ruleStringAlts.size() > 0) {
stringLiteralRules.add(rule); stringLiteralRules.add(rule);
stringLiteralValues.add(ruleStringAlts); stringLiteralValues.add(ruleStringAlts);
} }
} }
}
// Check string sets intersection // Check string sets intersection
for (int i = 0; i < stringLiteralRules.size(); i++) { for (int i = 0; i < stringLiteralRules.size(); i++) {
List<String> firstTokenStringValues = stringLiteralValues.get(i); List<String> firstTokenStringValues = stringLiteralValues.get(i);
Rule rule1 = stringLiteralRules.get(i);
checkForOverlap(g, rule1, rule1, firstTokenStringValues, stringLiteralValues.get(i));
for (int j = i + 1; j < stringLiteralRules.size(); j++) { for (int j = i + 1; j < stringLiteralRules.size(); j++) {
List<String> secondTokenStringValues = stringLiteralValues.get(j); checkForOverlap(g, rule1, stringLiteralRules.get(j), firstTokenStringValues, stringLiteralValues.get(j));
for (String str1 : firstTokenStringValues) { }
for (String str2 : secondTokenStringValues) { }
}
}
}
private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
for (int i = 0; i < firstTokenStringValues.size(); i++) {
int secondTokenInd = rule1 == rule2 ? i + 1 : 0; // Compare with self or not
String str1 = firstTokenStringValues.get(i);
for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
String str2 = secondTokenStringValues.get(j);
if (str1.equals(str2)) { if (str1.equals(str2)) {
errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName, errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
((GrammarAST) stringLiteralRules.get(j).ast.getChild(0)).token, ((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
stringLiteralRules.get(j).name, str2, stringLiteralRules.get(i).name);
}
}
}
}
} }
} }
} }
@ -358,25 +370,44 @@ public class SymbolChecks {
List<String> values = new ArrayList<>(); List<String> values = new ArrayList<>();
for (Alternative alt : rule.alt) { for (Alternative alt : rule.alt) {
if (alt != null) { if (alt != null) {
List<GrammarAST> stringLiterals = alt.ast.getNodesWithType(ANTLRLexer.STRING_LITERAL); // select first alt if token has a command
Tree rootNode = alt.ast.getChildCount() == 2 &&
alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST
? alt.ast.getChild(0)
: alt.ast;
// TODO: Support all tokens but not only string literals. if (rootNode.getTokenStartIndex() == -1) {
if (stringLiterals.size() == 0 || alt.ast.getChildCount() != stringLiterals.size()) { continue; // ignore tokens from parser that start as T__
continue;
} }
// Ignore alt if contains not only string literals (repetition, optional)
boolean ignore = false;
StringBuilder currentValue = new StringBuilder(); StringBuilder currentValue = new StringBuilder();
for (GrammarAST ast : stringLiterals) { for (int i = 0; i < rootNode.getChildCount(); i++) {
TerminalAST stringLiteral = (TerminalAST) ast; Tree child = rootNode.getChild(i);
String text = stringLiteral.token.getText(); if (!(child instanceof TerminalAST)) {
ignore = true;
break;
}
TerminalAST terminalAST = (TerminalAST)child;
if (terminalAST.token.getType() != ANTLRLexer.STRING_LITERAL) {
ignore = true;
break;
}
else {
String text = terminalAST.token.getText();
currentValue.append(text.substring(1, text.length() - 1)); currentValue.append(text.substring(1, text.length() - 1));
} }
}
if (!ignore) {
values.add(currentValue.toString()); values.add(currentValue.toString());
} }
} }
}
return values; return values;
} }
// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline) // CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
public void checkRuleArgs(Grammar g, List<GrammarAST> rulerefs) { public void checkRuleArgs(Grammar g, List<GrammarAST> rulerefs) {

View File

@ -1074,9 +1074,19 @@ public enum ErrorType {
"unicode property escapes not allowed in lexer charset range: <arg>", "unicode property escapes not allowed in lexer charset range: <arg>",
ErrorSeverity.ERROR), ErrorSeverity.ERROR),
/**
* Compiler Warning 184.
*
* <p>The token value overlapped by another token or self</p>
*
* <pre>
* TOKEN1: 'value';
* TOKEN2: 'value'; // warning
* </pre>
*/
TOKEN_UNREACHABLE( TOKEN_UNREACHABLE(
184, 184,
"token <arg> unreachable. Its value <arg2> is always overlapped by token <arg3>", "One of the token <arg> values unreachable. <arg2> is always overlapped by token <arg3>",
ErrorSeverity.WARNING), ErrorSeverity.WARNING),
/* /*