Merge pull request #2358 from KvanTTT/AssignTokenNamesToStringLiteralsInParseRuleContexts

Assign token names to string literals in parse rule contexts
This commit is contained in:
Terence Parr 2018-11-19 11:06:42 -08:00 committed by GitHub
commit 5fc056d1d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 89 additions and 10 deletions

View File

@ -31,6 +31,7 @@ import java.util.ArrayList;
import java.util.List;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
public class TestCodeGeneration extends BaseJavaToolTest {
@Before
@ -52,6 +53,24 @@ public class TestCodeGeneration extends BaseJavaToolTest {
}
}
@Test public void AssignTokenNamesToStringLiteralsInGeneratedParserRuleContexts() throws Exception {
String g =
"grammar T;\n" +
"root: 't1';\n" +
"Token: 't1';";
List<String> evals = getEvalInfoForString(g, "() { return getToken(");
assertNotEquals(0, evals.size());
}
@Test public void AssignTokenNamesToStringLiteralArraysInGeneratedParserRuleContexts() throws Exception {
String g =
"grammar T;\n" +
"root: 't1' 't1';\n" +
"Token: 't1';";
List<String> evals = getEvalInfoForString(g, "() { return getTokens(");
assertNotEquals(0, evals.size());
}
/** Add tags around each attribute/template/value write */
public static class DebugInterpreter extends Interpreter {
List<String> evals = new ArrayList<String>();

View File

@ -149,6 +149,16 @@ public class ElementFrequenciesVisitor extends GrammarTreeVisitor {
minFrequencies.peek().add(ref.getText());
}
@Override
public void stringRef(TerminalAST ref) {
String tokenName = ref.g.getTokenName(ref.getText());
if (tokenName != null && !tokenName.startsWith("T__")) {
frequencies.peek().add(tokenName);
minFrequencies.peek().add(tokenName);
}
}
/*
* Parser rules
*/

View File

@ -7,6 +7,7 @@
package org.antlr.v4.codegen.model;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.v4.codegen.OutputModelFactory;
import org.antlr.v4.codegen.model.decl.AltLabelStructDecl;
@ -32,6 +33,7 @@ import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.ActionAST;
import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.PredAST;
import java.util.ArrayList;
import java.util.Collection;
@ -43,6 +45,7 @@ import java.util.Map;
import java.util.Set;
import static org.antlr.v4.parse.ANTLRParser.RULE_REF;
import static org.antlr.v4.parse.ANTLRParser.STRING_LITERAL;
import static org.antlr.v4.parse.ANTLRParser.TOKEN_REF;
/** */
@ -166,7 +169,7 @@ public class RuleFunction extends OutputModelObject {
}
/** for all alts, find which ref X or r needs List
Must see across alts. If any alt needs X or r as list, then
Must see across alts. If any alt needs X or r as list, then
define as list.
*/
public Set<Decl> getDeclsForAllElements(List<AltAST> altASTs) {
@ -174,21 +177,24 @@ public class RuleFunction extends OutputModelObject {
Set<String> nonOptional = new HashSet<String>();
List<GrammarAST> allRefs = new ArrayList<GrammarAST>();
boolean firstAlt = true;
IntervalSet reftypes = new IntervalSet(RULE_REF, TOKEN_REF, STRING_LITERAL);
for (AltAST ast : altASTs) {
IntervalSet reftypes = new IntervalSet(RULE_REF, TOKEN_REF);
List<GrammarAST> refs = ast.getNodesWithType(reftypes);
List<GrammarAST> refs = getRuleTokens(ast.getNodesWithType(reftypes));
allRefs.addAll(refs);
Pair<FrequencySet<String>, FrequencySet<String>> minAndAltFreq = getElementFrequenciesForAlt(ast);
FrequencySet<String> minFreq = minAndAltFreq.a;
FrequencySet<String> altFreq = minAndAltFreq.b;
for (GrammarAST t : refs) {
String refLabelName = t.getText();
if ( altFreq.count(refLabelName)>1 ) {
needsList.add(refLabelName);
}
String refLabelName = getName(t);
if (firstAlt && minFreq.count(refLabelName) != 0) {
nonOptional.add(refLabelName);
if (refLabelName != null) {
if (altFreq.count(refLabelName) > 1) {
needsList.add(refLabelName);
}
if (firstAlt && minFreq.count(refLabelName) != 0) {
nonOptional.add(refLabelName);
}
}
}
@ -202,7 +208,12 @@ public class RuleFunction extends OutputModelObject {
}
Set<Decl> decls = new LinkedHashSet<Decl>();
for (GrammarAST t : allRefs) {
String refLabelName = t.getText();
String refLabelName = getName(t);
if (refLabelName == null) {
continue;
}
List<Decl> d = getDeclForAltElement(t,
refLabelName,
needsList.contains(refLabelName),
@ -212,6 +223,35 @@ public class RuleFunction extends OutputModelObject {
return decls;
}
private List<GrammarAST> getRuleTokens(List<GrammarAST> refs) {
List<GrammarAST> result = new ArrayList<>(refs.size());
for (GrammarAST ref : refs) {
CommonTree r = ref;
boolean ignore = false;
while (r != null) {
// Ignore string literals in predicates
if (r instanceof PredAST) {
ignore = true;
break;
}
r = r.parent;
}
if (!ignore) {
result.add(ref);
}
}
return result;
}
private String getName(GrammarAST token) {
String tokenText = token.getText();
String tokenName = token.getType() != STRING_LITERAL ? tokenText : token.g.getTokenName(tokenText);
return tokenName == null || tokenName.startsWith("T__") ? null : tokenName; // Do not include tokens with auto generated names
}
/** Given list of X and r refs in alt, compute how many of each there are */
protected Pair<FrequencySet<String>, FrequencySet<String>> getElementFrequenciesForAlt(AltAST ast) {
try {

View File

@ -631,6 +631,16 @@ public class Grammar implements AttributeResolver {
return i;
}
public String getTokenName(String literal) {
Grammar grammar = this;
while (grammar != null) {
if (grammar.stringLiteralToTypeMap.containsKey(literal))
return grammar.getTokenName(grammar.stringLiteralToTypeMap.get(literal));
grammar = grammar.parent;
}
return null;
}
/** Given a token type, get a meaningful name for it such as the ID
* or string literal. If this is a lexer and the ttype is in the
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.