forked from jasder/antlr
Merge pull request #2066 from KvanTTT/unreachable-tokens
New unreachable token value warning
This commit is contained in:
commit
489a11b7a2
|
@ -401,4 +401,39 @@ public class TestSymbolIssues extends BaseJavaToolTest {
|
|||
|
||||
testErrors(test, false);
|
||||
}
|
||||
|
||||
@Test public void testUnreachableTokens() {
|
||||
String[] test = {
|
||||
"lexer grammar Test;\n" +
|
||||
"TOKEN1: 'as' 'df' | 'qwer';\n" +
|
||||
"TOKEN2: [0-9];\n" +
|
||||
"TOKEN3: 'asdf';\n" +
|
||||
"TOKEN4: 'q' 'w' 'e' 'r' | A;\n" +
|
||||
"TOKEN5: 'aaaa';\n" +
|
||||
"TOKEN6: 'asdf';\n" +
|
||||
"TOKEN7: 'qwer'+;\n" +
|
||||
"TOKEN8: 'a' 'b' | 'b' | 'a' 'b';\n" +
|
||||
"fragment\n" +
|
||||
"TOKEN9: 'asdf' | 'qwer' | 'qwer';\n" +
|
||||
"TOKEN10: '\\r\\n' | '\\r\\n';\n" +
|
||||
"TOKEN11: '\\r\\n';\n" +
|
||||
"\n" +
|
||||
"mode MODE1;\n" +
|
||||
"TOKEN12: 'asdf';\n" +
|
||||
"\n" +
|
||||
"fragment A: 'A';",
|
||||
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:4:0: One of the token TOKEN3 values unreachable. asdf is always overlapped by token TOKEN1\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:5:0: One of the token TOKEN4 values unreachable. qwer is always overlapped by token TOKEN1\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:7:0: One of the token TOKEN6 values unreachable. asdf is always overlapped by token TOKEN1\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:7:0: One of the token TOKEN6 values unreachable. asdf is always overlapped by token TOKEN3\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:9:0: One of the token TOKEN8 values unreachable. ab is always overlapped by token TOKEN8\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:11:0: One of the token TOKEN9 values unreachable. qwer is always overlapped by token TOKEN9\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:12:0: One of the token TOKEN10 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:13:0: One of the token TOKEN11 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" +
|
||||
"warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:13:0: One of the token TOKEN11 values unreachable. \\r\\n is always overlapped by token TOKEN10\n"
|
||||
};
|
||||
|
||||
testErrors(test, false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -269,11 +269,11 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
|
|||
"grammar A;\n" +
|
||||
"tokens{Foo}\n" +
|
||||
"b : Foo ;\n" +
|
||||
"X : 'foo' -> popmode;\n" + // "meant" to use -> popMode
|
||||
"Y : 'foo' -> token(Foo);", // "meant" to use -> type(Foo)
|
||||
"X : 'foo1' -> popmode;\n" + // "meant" to use -> popMode
|
||||
"Y : 'foo2' -> token(Foo);", // "meant" to use -> type(Foo)
|
||||
|
||||
"error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:4:13: lexer command popmode does not exist or is not supported by the current target\n" +
|
||||
"error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:5:13: lexer command token does not exist or is not supported by the current target\n"
|
||||
"error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:4:14: lexer command popmode does not exist or is not supported by the current target\n" +
|
||||
"error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:5:14: lexer command token does not exist or is not supported by the current target\n"
|
||||
};
|
||||
super.testErrors(pair, true);
|
||||
}
|
||||
|
@ -283,11 +283,11 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
|
|||
"grammar A;\n" +
|
||||
"tokens{Foo}\n" +
|
||||
"b : Foo ;\n" +
|
||||
"X : 'foo' -> popMode(Foo);\n" + // "meant" to use -> popMode
|
||||
"Y : 'foo' -> type;", // "meant" to use -> type(Foo)
|
||||
"X : 'foo1' -> popMode(Foo);\n" + // "meant" to use -> popMode
|
||||
"Y : 'foo2' -> type;", // "meant" to use -> type(Foo)
|
||||
|
||||
"error(" + ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT.code + "): A.g4:4:13: lexer command popMode does not take any arguments\n" +
|
||||
"error(" + ErrorType.MISSING_LEXER_COMMAND_ARGUMENT.code + "): A.g4:5:13: missing argument for lexer command type\n"
|
||||
"error(" + ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT.code + "): A.g4:4:14: lexer command popMode does not take any arguments\n" +
|
||||
"error(" + ErrorType.MISSING_LEXER_COMMAND_ARGUMENT.code + "): A.g4:5:14: missing argument for lexer command type\n"
|
||||
};
|
||||
super.testErrors(pair, true);
|
||||
}
|
||||
|
|
|
@ -108,6 +108,7 @@ public class SemanticPipeline {
|
|||
}
|
||||
|
||||
symcheck.checkForModeConflicts(g);
|
||||
symcheck.checkForUnreachableTokens(g);
|
||||
|
||||
assignChannelTypes(g, collector.channelDefs);
|
||||
|
||||
|
|
|
@ -7,7 +7,9 @@
|
|||
package org.antlr.v4.semantics;
|
||||
|
||||
import org.antlr.runtime.tree.CommonTree;
|
||||
import org.antlr.runtime.tree.Tree;
|
||||
import org.antlr.v4.automata.LexerATNFactory;
|
||||
import org.antlr.v4.parse.ANTLRLexer;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.tool.Alternative;
|
||||
|
@ -23,6 +25,7 @@ import org.antlr.v4.tool.LexerGrammar;
|
|||
import org.antlr.v4.tool.Rule;
|
||||
import org.antlr.v4.tool.ast.AltAST;
|
||||
import org.antlr.v4.tool.ast.GrammarAST;
|
||||
import org.antlr.v4.tool.ast.TerminalAST;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -39,42 +42,31 @@ import java.util.Set;
|
|||
* Side-effect: strip away redef'd rules.
|
||||
*/
|
||||
public class SymbolChecks {
|
||||
Grammar g;
|
||||
SymbolCollector collector;
|
||||
Map<String, Rule> nameToRuleMap = new HashMap<String, Rule>();
|
||||
Grammar g;
|
||||
SymbolCollector collector;
|
||||
Map<String, Rule> nameToRuleMap = new HashMap<String, Rule>();
|
||||
Set<String> tokenIDs = new HashSet<String>();
|
||||
Map<String, Set<String>> actionScopeToActionNames = new HashMap<String, Set<String>>();
|
||||
// DoubleKeyMap<String, String, GrammarAST> namedActions =
|
||||
// new DoubleKeyMap<String, String, GrammarAST>();
|
||||
Map<String, Set<String>> actionScopeToActionNames = new HashMap<String, Set<String>>();
|
||||
|
||||
public ErrorManager errMgr;
|
||||
|
||||
protected final Set<String> reservedNames = new HashSet<String>();
|
||||
|
||||
{
|
||||
reservedNames.addAll(LexerATNFactory.getCommonConstants());
|
||||
}
|
||||
|
||||
public SymbolChecks(Grammar g, SymbolCollector collector) {
|
||||
this.g = g;
|
||||
this.collector = collector;
|
||||
public SymbolChecks(Grammar g, SymbolCollector collector) {
|
||||
this.g = g;
|
||||
this.collector = collector;
|
||||
this.errMgr = g.tool.errMgr;
|
||||
|
||||
for (GrammarAST tokenId : collector.tokenIDRefs) {
|
||||
tokenIDs.add(tokenId.getText());
|
||||
}
|
||||
/*
|
||||
System.out.println("rules="+collector.rules);
|
||||
System.out.println("rulerefs="+collector.rulerefs);
|
||||
System.out.println("tokenIDRefs="+collector.tokenIDRefs);
|
||||
System.out.println("terminals="+collector.terminals);
|
||||
System.out.println("strings="+collector.strings);
|
||||
System.out.println("tokensDef="+collector.tokensDefs);
|
||||
System.out.println("actions="+collector.actions);
|
||||
System.out.println("scopes="+collector.scopes);
|
||||
*/
|
||||
}
|
||||
for (GrammarAST tokenId : collector.tokenIDRefs) {
|
||||
tokenIDs.add(tokenId.getText());
|
||||
}
|
||||
}
|
||||
|
||||
public void process() {
|
||||
public void process() {
|
||||
// methods affect fields, but no side-effects outside this object
|
||||
// So, call order sensitive
|
||||
// First collect all rules for later use in checkForLabelConflict()
|
||||
|
@ -83,7 +75,6 @@ public class SymbolChecks {
|
|||
}
|
||||
checkReservedNames(g.rules.values());
|
||||
checkActionRedefinitions(collector.namedActions);
|
||||
checkForTokenConflicts(collector.tokenIDRefs); // sets tokenIDs
|
||||
checkForLabelConflicts(g.rules.values());
|
||||
}
|
||||
|
||||
|
@ -116,21 +107,14 @@ public class SymbolChecks {
|
|||
}
|
||||
}
|
||||
|
||||
public void checkForTokenConflicts(List<GrammarAST> tokenIDRefs) {
|
||||
// for (GrammarAST a : tokenIDRefs) {
|
||||
// Token t = a.token;
|
||||
// String ID = t.getText();
|
||||
// tokenIDs.add(ID);
|
||||
// }
|
||||
}
|
||||
|
||||
/** Make sure a label doesn't conflict with another symbol.
|
||||
* Labels must not conflict with: rules, tokens, scope names,
|
||||
* return values, parameters, and rule-scope dynamic attributes
|
||||
* defined in surrounding rule. Also they must have same type
|
||||
* for repeated defs.
|
||||
*/
|
||||
public void checkForLabelConflicts(Collection<Rule> rules) {
|
||||
/**
|
||||
* Make sure a label doesn't conflict with another symbol.
|
||||
* Labels must not conflict with: rules, tokens, scope names,
|
||||
* return values, parameters, and rule-scope dynamic attributes
|
||||
* defined in surrounding rule. Also they must have same type
|
||||
* for repeated defs.
|
||||
*/
|
||||
public void checkForLabelConflicts(Collection<Rule> rules) {
|
||||
for (Rule r : rules) {
|
||||
checkForAttributeConflicts(r);
|
||||
|
||||
|
@ -213,7 +197,7 @@ public class SymbolChecks {
|
|||
// Such behavior is referring to the fact that the warning is typically reported on the actual label redefinition,
|
||||
// but for left-recursive rules the warning is reported on the enclosing rule.
|
||||
org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
|
||||
? ((GrammarAST) r.ast.getChild(0)).getToken()
|
||||
? ((GrammarAST) r.ast.getChild(0)).getToken()
|
||||
: labelPair.label.token;
|
||||
errMgr.grammarError(
|
||||
ErrorType.LABEL_TYPE_CONFLICT,
|
||||
|
@ -227,7 +211,7 @@ public class SymbolChecks {
|
|||
(labelPair.type.equals(LabelType.RULE_LABEL) || labelPair.type.equals(LabelType.RULE_LIST_LABEL))) {
|
||||
|
||||
org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
|
||||
? ((GrammarAST) r.ast.getChild(0)).getToken()
|
||||
? ((GrammarAST) r.ast.getChild(0)).getToken()
|
||||
: labelPair.label.token;
|
||||
String prevLabelOp = prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
|
||||
String labelOp = labelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
|
||||
|
@ -291,11 +275,11 @@ public class SymbolChecks {
|
|||
for (Attribute attribute : attributes.attributes.values()) {
|
||||
if (ruleNames.contains(attribute.name)) {
|
||||
errMgr.grammarError(
|
||||
errorType,
|
||||
g.fileName,
|
||||
attribute.token != null ? attribute.token : ((GrammarAST)r.ast.getChild(0)).token,
|
||||
attribute.name,
|
||||
r.name);
|
||||
errorType,
|
||||
g.fileName,
|
||||
attribute.token != null ? attribute.token : ((GrammarAST) r.ast.getChild(0)).token,
|
||||
attribute.name,
|
||||
r.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -308,11 +292,11 @@ public class SymbolChecks {
|
|||
Set<String> conflictingKeys = attributes.intersection(referenceAttributes);
|
||||
for (String key : conflictingKeys) {
|
||||
errMgr.grammarError(
|
||||
errorType,
|
||||
g.fileName,
|
||||
attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST) r.ast.getChild(0)).token,
|
||||
key,
|
||||
r.name);
|
||||
errorType,
|
||||
g.fileName,
|
||||
attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST)r.ast.getChild(0)).token,
|
||||
key,
|
||||
r.name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -341,8 +325,122 @@ public class SymbolChecks {
|
|||
}
|
||||
}
|
||||
|
||||
// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
|
||||
/**
|
||||
* Algorithm steps:
|
||||
* 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z')
|
||||
* for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues})
|
||||
* 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List<String>, List<String>) checkForOverlap})
|
||||
* and throw TOKEN_UNREACHABLE warning if the same string found.
|
||||
* Complexity: O(m * n^2 / 2), approximately equals to O(n^2)
|
||||
* where m - number of modes, n - average number of lexer rules per mode.
|
||||
* See also testUnreachableTokens unit test for details.
|
||||
*/
|
||||
public void checkForUnreachableTokens(Grammar g) {
|
||||
if (g.isLexer()) {
|
||||
LexerGrammar lexerGrammar = (LexerGrammar)g;
|
||||
for (List<Rule> rules : lexerGrammar.modes.values()) {
|
||||
// Collect string literal lexer rules for each mode
|
||||
List<Rule> stringLiteralRules = new ArrayList<>();
|
||||
List<List<String>> stringLiteralValues = new ArrayList<>();
|
||||
for (int i = 0; i < rules.size(); i++) {
|
||||
Rule rule = rules.get(i);
|
||||
|
||||
List<String> ruleStringAlts = getSingleTokenValues(rule);
|
||||
if (ruleStringAlts != null && ruleStringAlts.size() > 0) {
|
||||
stringLiteralRules.add(rule);
|
||||
stringLiteralValues.add(ruleStringAlts);
|
||||
}
|
||||
}
|
||||
|
||||
// Check string sets intersection
|
||||
for (int i = 0; i < stringLiteralRules.size(); i++) {
|
||||
List<String> firstTokenStringValues = stringLiteralValues.get(i);
|
||||
Rule rule1 = stringLiteralRules.get(i);
|
||||
checkForOverlap(g, rule1, rule1, firstTokenStringValues, stringLiteralValues.get(i));
|
||||
|
||||
// Check fragment rules only with themself
|
||||
if (!rule1.isFragment()) {
|
||||
for (int j = i + 1; j < stringLiteralRules.size(); j++) {
|
||||
Rule rule2 = stringLiteralRules.get(j);
|
||||
if (!rule2.isFragment()) {
|
||||
checkForOverlap(g, rule1, stringLiteralRules.get(j), firstTokenStringValues, stringLiteralValues.get(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return} list of simple string literals for rule {@param rule}
|
||||
*/
|
||||
private List<String> getSingleTokenValues(Rule rule)
|
||||
{
|
||||
List<String> values = new ArrayList<>();
|
||||
for (Alternative alt : rule.alt) {
|
||||
if (alt != null) {
|
||||
// select first alt if token has a command
|
||||
Tree rootNode = alt.ast.getChildCount() == 2 &&
|
||||
alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST
|
||||
? alt.ast.getChild(0)
|
||||
: alt.ast;
|
||||
|
||||
if (rootNode.getTokenStartIndex() == -1) {
|
||||
continue; // ignore autogenerated tokens from combined grammars that start with T__
|
||||
}
|
||||
|
||||
// Ignore alt if contains not only string literals (repetition, optional)
|
||||
boolean ignore = false;
|
||||
StringBuilder currentValue = new StringBuilder();
|
||||
for (int i = 0; i < rootNode.getChildCount(); i++) {
|
||||
Tree child = rootNode.getChild(i);
|
||||
if (!(child instanceof TerminalAST)) {
|
||||
ignore = true;
|
||||
break;
|
||||
}
|
||||
|
||||
TerminalAST terminalAST = (TerminalAST)child;
|
||||
if (terminalAST.token.getType() != ANTLRLexer.STRING_LITERAL) {
|
||||
ignore = true;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
String text = terminalAST.token.getText();
|
||||
currentValue.append(text.substring(1, text.length() - 1));
|
||||
}
|
||||
}
|
||||
|
||||
if (!ignore) {
|
||||
values.add(currentValue.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
* For same rule compare values from next index:
|
||||
* TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf';
|
||||
* For different rules compare from start value:
|
||||
* TOKEN1: 'asdf';
|
||||
* TOKEN2: 'asdf';
|
||||
*/
|
||||
private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
|
||||
for (int i = 0; i < firstTokenStringValues.size(); i++) {
|
||||
int secondTokenInd = rule1 == rule2 ? i + 1 : 0;
|
||||
String str1 = firstTokenStringValues.get(i);
|
||||
for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
|
||||
String str2 = secondTokenStringValues.get(j);
|
||||
if (str1.equals(str2)) {
|
||||
errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
|
||||
((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
|
||||
public void checkRuleArgs(Grammar g, List<GrammarAST> rulerefs) {
|
||||
if ( rulerefs==null ) return;
|
||||
for (GrammarAST ref : rulerefs) {
|
||||
|
@ -351,12 +449,12 @@ public class SymbolChecks {
|
|||
GrammarAST arg = (GrammarAST)ref.getFirstChildWithType(ANTLRParser.ARG_ACTION);
|
||||
if ( arg!=null && (r==null || r.args==null) ) {
|
||||
errMgr.grammarError(ErrorType.RULE_HAS_NO_ARGS,
|
||||
g.fileName, ref.token, ruleName);
|
||||
g.fileName, ref.token, ruleName);
|
||||
|
||||
}
|
||||
else if ( arg==null && (r!=null&&r.args!=null) ) {
|
||||
else if ( arg==null && (r!=null && r.args!=null) ) {
|
||||
errMgr.grammarError(ErrorType.MISSING_RULE_ARGS,
|
||||
g.fileName, ref.token, ruleName);
|
||||
g.fileName, ref.token, ruleName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -365,18 +463,18 @@ public class SymbolChecks {
|
|||
for (GrammarAST dot : qualifiedRuleRefs) {
|
||||
GrammarAST grammar = (GrammarAST)dot.getChild(0);
|
||||
GrammarAST rule = (GrammarAST)dot.getChild(1);
|
||||
g.tool.log("semantics", grammar.getText()+"."+rule.getText());
|
||||
g.tool.log("semantics", grammar.getText()+"."+rule.getText());
|
||||
Grammar delegate = g.getImportedGrammar(grammar.getText());
|
||||
if ( delegate==null ) {
|
||||
errMgr.grammarError(ErrorType.NO_SUCH_GRAMMAR_SCOPE,
|
||||
g.fileName, grammar.token, grammar.getText(),
|
||||
rule.getText());
|
||||
g.fileName, grammar.token, grammar.getText(),
|
||||
rule.getText());
|
||||
}
|
||||
else {
|
||||
if ( g.getRule(grammar.getText(), rule.getText())==null ) {
|
||||
errMgr.grammarError(ErrorType.NO_SUCH_RULE_IN_SCOPE,
|
||||
g.fileName, rule.token, grammar.getText(),
|
||||
rule.getText());
|
||||
g.fileName, rule.token, grammar.getText(),
|
||||
rule.getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1074,6 +1074,21 @@ public enum ErrorType {
|
|||
"unicode property escapes not allowed in lexer charset range: <arg>",
|
||||
ErrorSeverity.ERROR),
|
||||
|
||||
/**
|
||||
* Compiler Warning 184.
|
||||
*
|
||||
* <p>The token value overlapped by another token or self</p>
|
||||
*
|
||||
* <pre>
|
||||
* TOKEN1: 'value';
|
||||
* TOKEN2: 'value'; // warning
|
||||
* </pre>
|
||||
*/
|
||||
TOKEN_UNREACHABLE(
|
||||
184,
|
||||
"One of the token <arg> values unreachable. <arg2> is always overlapped by token <arg3>",
|
||||
ErrorSeverity.WARNING),
|
||||
|
||||
/*
|
||||
* Backward incompatibility errors
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue