From 0a6dbc37b736871f779c5eef9a32d6decd3838f5 Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 9 Sep 2018 11:51:15 +0300 Subject: [PATCH 1/4] Grammar.java: add getTokenName(String literal) method --- tool/src/org/antlr/v4/tool/Grammar.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index b86d62235..fc98fcf4a 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -631,6 +631,16 @@ public class Grammar implements AttributeResolver { return i; } + public String getTokenName(String literal) { + Grammar grammar = this; + while (grammar != null) { + if (grammar.stringLiteralToTypeMap.containsKey(literal)) + return grammar.getTokenName(grammar.stringLiteralToTypeMap.get(literal)); + grammar = grammar.parent; + } + return null; + } + /** Given a token type, get a meaningful name for it such as the ID * or string literal. If this is a lexer and the ttype is in the * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. From d3745c7b9399e88fcd9561c21fa17b3b3816f94a Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 9 Sep 2018 11:54:15 +0300 Subject: [PATCH 2/4] RuleFunction: process STRING_LITERAL along with RULE_REF and TOKEN_REF. Add getRuleTokens method --- .../antlr/v4/codegen/model/RuleFunction.java | 60 +++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/tool/src/org/antlr/v4/codegen/model/RuleFunction.java b/tool/src/org/antlr/v4/codegen/model/RuleFunction.java index d8b69b5db..d6b745f0a 100644 --- a/tool/src/org/antlr/v4/codegen/model/RuleFunction.java +++ b/tool/src/org/antlr/v4/codegen/model/RuleFunction.java @@ -7,6 +7,7 @@ package org.antlr.v4.codegen.model; import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.v4.codegen.OutputModelFactory; import org.antlr.v4.codegen.model.decl.AltLabelStructDecl; @@ -32,6 +33,7 @@ import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.ast.ActionAST; import org.antlr.v4.tool.ast.AltAST; import org.antlr.v4.tool.ast.GrammarAST; +import org.antlr.v4.tool.ast.PredAST; import java.util.ArrayList; import java.util.Collection; @@ -43,6 +45,7 @@ import java.util.Map; import java.util.Set; import static org.antlr.v4.parse.ANTLRParser.RULE_REF; +import static org.antlr.v4.parse.ANTLRParser.STRING_LITERAL; import static org.antlr.v4.parse.ANTLRParser.TOKEN_REF; /** */ @@ -166,7 +169,7 @@ public class RuleFunction extends OutputModelObject { } /** for all alts, find which ref X or r needs List - Must see across alts. If any alt needs X or r as list, then + Must see across alts. If any alt needs X or r as list, then define as list. */ public Set getDeclsForAllElements(List altASTs) { @@ -174,21 +177,24 @@ public class RuleFunction extends OutputModelObject { Set nonOptional = new HashSet(); List allRefs = new ArrayList(); boolean firstAlt = true; + IntervalSet reftypes = new IntervalSet(RULE_REF, TOKEN_REF, STRING_LITERAL); for (AltAST ast : altASTs) { - IntervalSet reftypes = new IntervalSet(RULE_REF, TOKEN_REF); - List refs = ast.getNodesWithType(reftypes); + List refs = getRuleTokens(ast.getNodesWithType(reftypes)); allRefs.addAll(refs); Pair, FrequencySet> minAndAltFreq = getElementFrequenciesForAlt(ast); FrequencySet minFreq = minAndAltFreq.a; FrequencySet altFreq = minAndAltFreq.b; for (GrammarAST t : refs) { - String refLabelName = t.getText(); - if ( altFreq.count(refLabelName)>1 ) { - needsList.add(refLabelName); - } + String refLabelName = getName(t); - if (firstAlt && minFreq.count(refLabelName) != 0) { - nonOptional.add(refLabelName); + if (refLabelName != null) { + if (altFreq.count(refLabelName) > 1) { + needsList.add(refLabelName); + } + + if (firstAlt && minFreq.count(refLabelName) != 0) { + nonOptional.add(refLabelName); + } } } @@ -202,7 +208,12 @@ public class RuleFunction extends OutputModelObject { } Set decls = new LinkedHashSet(); for (GrammarAST t : allRefs) { - String refLabelName = t.getText(); + String refLabelName = getName(t); + + if (refLabelName == null) { + continue; + } + List d = getDeclForAltElement(t, refLabelName, needsList.contains(refLabelName), @@ -212,6 +223,35 @@ public class RuleFunction extends OutputModelObject { return decls; } + private List getRuleTokens(List refs) { + List result = new ArrayList<>(refs.size()); + for (GrammarAST ref : refs) { + CommonTree r = ref; + + boolean ignore = false; + while (r != null) { + // Ignore string literals in predicates + if (r instanceof PredAST) { + ignore = true; + break; + } + r = r.parent; + } + + if (!ignore) { + result.add(ref); + } + } + + return result; + } + + private String getName(GrammarAST token) { + String tokenText = token.getText(); + String tokenName = token.getType() != STRING_LITERAL ? tokenText : token.g.getTokenName(tokenText); + return tokenName == null || tokenName.startsWith("T__") ? null : tokenName; // Do not include tokens with auto generated names + } + /** Given list of X and r refs in alt, compute how many of each there are */ protected Pair, FrequencySet> getElementFrequenciesForAlt(AltAST ast) { try { From 70e3e5de70bbcd46fe41e17ad161cc5362a62842 Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 9 Sep 2018 11:55:13 +0300 Subject: [PATCH 3/4] ElementFrequenciesVisitor: override stringRef(TerminalAST ref) for correct processing of arrays of string literals --- .../v4/codegen/model/ElementFrequenciesVisitor.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tool/src/org/antlr/v4/codegen/model/ElementFrequenciesVisitor.java b/tool/src/org/antlr/v4/codegen/model/ElementFrequenciesVisitor.java index ccd5c805f..ef1aae2d4 100644 --- a/tool/src/org/antlr/v4/codegen/model/ElementFrequenciesVisitor.java +++ b/tool/src/org/antlr/v4/codegen/model/ElementFrequenciesVisitor.java @@ -149,6 +149,16 @@ public class ElementFrequenciesVisitor extends GrammarTreeVisitor { minFrequencies.peek().add(ref.getText()); } + @Override + public void stringRef(TerminalAST ref) { + String tokenName = ref.g.getTokenName(ref.getText()); + + if (tokenName != null && !tokenName.startsWith("T__")) { + frequencies.peek().add(tokenName); + minFrequencies.peek().add(tokenName); + } + } + /* * Parser rules */ From 2f7c5bfe4a314e606803fd2c1df36b2b27bed69c Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 9 Sep 2018 11:55:41 +0300 Subject: [PATCH 4/4] Add AssignTokenNamesToStringLiteralsInGeneratedParserRuleContexts and AssignTokenNamesToStringLiteralArraysInGeneratedParserRuleContexts unit-tests --- .../v4/test/tool/TestCodeGeneration.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java index 8fb6fb71e..6fa967421 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java @@ -31,6 +31,7 @@ import java.util.ArrayList; import java.util.List; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; public class TestCodeGeneration extends BaseJavaToolTest { @Before @@ -52,6 +53,24 @@ public class TestCodeGeneration extends BaseJavaToolTest { } } + @Test public void AssignTokenNamesToStringLiteralsInGeneratedParserRuleContexts() throws Exception { + String g = + "grammar T;\n" + + "root: 't1';\n" + + "Token: 't1';"; + List evals = getEvalInfoForString(g, "() { return getToken("); + assertNotEquals(0, evals.size()); + } + + @Test public void AssignTokenNamesToStringLiteralArraysInGeneratedParserRuleContexts() throws Exception { + String g = + "grammar T;\n" + + "root: 't1' 't1';\n" + + "Token: 't1';"; + List evals = getEvalInfoForString(g, "() { return getTokens("); + assertNotEquals(0, evals.size()); + } + /** Add tags around each attribute/template/value write */ public static class DebugInterpreter extends Interpreter { List evals = new ArrayList();