Add utility methods and track token indexes through the left recursive rule transformation to allow external tools to associate ATN states and transitions with original locations in the grammar source file

This commit is contained in:
Terence Parr 2014-06-01 22:27:48 -05:00 committed by Sam Harwell
parent fb49d7c6a2
commit 68b186e77d
16 changed files with 542 additions and 41 deletions

View File

@ -32,6 +32,7 @@ package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.FailedPredicateException;
import org.antlr.v4.runtime.IntStream;
import org.antlr.v4.runtime.NoViableAltException;
import org.antlr.v4.runtime.Parser;
@ -1904,4 +1905,8 @@ public class ParserATNSimulator extends ATNSimulator {
public final PredictionMode getPredictionMode() {
return mode;
}
public Parser getParser() {
return parser;
}
}

View File

@ -198,8 +198,9 @@ public abstract class SemanticContext {
}
@Override
// precedence >= _precedenceStack.peek()
public String toString() {
return super.toString();
return "{"+precedence+">=prec}?";
}
}

View File

@ -31,6 +31,7 @@
package org.antlr.v4.analysis;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenStream;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
@ -44,6 +45,8 @@ import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.GrammarASTWithOptions;
import org.antlr.v4.tool.ast.RuleRefAST;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.STGroupFile;
@ -151,7 +154,7 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
if ( lrlabel!=null ) {
leftRecursiveRuleRefLabels.add(new Pair<GrammarAST,String>(lrlabel,altLabel));
}
stripAssocOptions(altTree);
stripAltLabel(altTree);
// rewrite e to be e_[rec_arg]
@ -257,37 +260,18 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
if ( t==null ) return null;
// get all top-level rule refs from ALT
List<GrammarAST> outerAltRuleRefs = t.getNodesWithTypePreorderDFS(IntervalSet.of(RULE_REF));
for (GrammarAST rref : outerAltRuleRefs) {
for (GrammarAST x : outerAltRuleRefs) {
RuleRefAST rref = (RuleRefAST)x;
boolean recursive = rref.getText().equals(ruleName);
boolean rightmost = rref == outerAltRuleRefs.get(outerAltRuleRefs.size()-1);
if ( recursive && rightmost ) {
rref.setText(ruleName+"<"+LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME+"="+prec+">");
GrammarAST dummyValueNode = new GrammarAST(new CommonToken(ANTLRParser.INT, ""+prec));
rref.setOption(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME, dummyValueNode);
}
}
return t;
}
public void stripAssocOptions(GrammarAST t) {
if ( t==null ) return;
for (GrammarAST options : t.getNodesWithType(ELEMENT_OPTIONS)) {
int i=0;
while ( i<options.getChildCount() ) {
GrammarAST c = (GrammarAST)options.getChild(i);
if ( c.getChild(0).getText().equals("assoc") ) {
options.deleteChild(i); // kill this option
}
else {
i++;
}
}
if ( options.getChildCount()==0 ) {
Tree parent = options.getParent();
parent.deleteChild(options.getChildIndex()); // no more options
return;
}
}
}
/**
* Match (RULE RULE_REF (BLOCK (ALT .*) (ALT RULE_REF[self] .*) (ALT .*)))
* Match (RULE RULE_REF (BLOCK (ALT .*) (ALT (ASSIGN ID RULE_REF[self]) .*) (ALT .*)))
@ -349,9 +333,69 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
public String text(GrammarAST t) {
if ( t==null ) return "";
CommonToken ta = (CommonToken) tokenStream.get(t.getTokenStartIndex());
CommonToken tb = (CommonToken) tokenStream.get(t.getTokenStopIndex());
return tokenStream.toString(ta, tb);
int tokenStartIndex = t.getTokenStartIndex();
int tokenStopIndex = t.getTokenStopIndex();
// ignore tokens from existing option subtrees like:
// (ELEMENT_OPTIONS (= assoc right))
//
// element options are added back according to the values in the map
// returned by getOptions().
IntervalSet ignore = new IntervalSet();
List<GrammarAST> optionsSubTrees = t.getNodesWithType(ELEMENT_OPTIONS);
for (GrammarAST sub : optionsSubTrees) {
ignore.add(sub.getTokenStartIndex(), sub.getTokenStopIndex());
}
// Individual labels appear as RULE_REF or TOKEN_REF tokens in the tree,
// but do not support the ELEMENT_OPTIONS syntax. Make sure to not try
// and add the tokenIndex option when writing these tokens.
IntervalSet noOptions = new IntervalSet();
List<GrammarAST> labeledSubTrees = t.getNodesWithType(new IntervalSet(ASSIGN,PLUS_ASSIGN));
for (GrammarAST sub : labeledSubTrees) {
noOptions.add(sub.getChild(0).getTokenStartIndex());
}
StringBuilder buf = new StringBuilder();
for (int i=tokenStartIndex; i<=tokenStopIndex; i++) {
if ( ignore.contains(i) ) {
continue;
}
Token tok = tokenStream.get(i);
StringBuilder elementOptions = new StringBuilder();
if (!noOptions.contains(i)) {
GrammarAST node = t.getNodeWithTokenIndex(tok.getTokenIndex());
if ( node!=null &&
(tok.getType()==TOKEN_REF ||
tok.getType()==STRING_LITERAL ||
tok.getType()==RULE_REF) )
{
elementOptions.append("tokenIndex=").append(tok.getTokenIndex());
}
if ( node instanceof GrammarASTWithOptions ) {
GrammarASTWithOptions o = (GrammarASTWithOptions)node;
for (Map.Entry<String, GrammarAST> entry : o.getOptions().entrySet()) {
if (elementOptions.length() > 0) {
elementOptions.append(',');
}
elementOptions.append(entry.getKey());
elementOptions.append('=');
elementOptions.append(entry.getValue().getText());
}
}
}
buf.append(tok.getText());
if (elementOptions.length() > 0) {
buf.append('<').append(elementOptions).append('>');
}
}
return buf.toString();
}
public int precedence(int alt) {

View File

@ -31,7 +31,6 @@
package org.antlr.v4.analysis;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.ParserRuleReturnScope;
import org.antlr.runtime.RecognitionException;
@ -71,6 +70,7 @@ import java.util.List;
*/
public class LeftRecursiveRuleTransformer {
public static final String PRECEDENCE_OPTION_NAME = "p";
public static final String TOKENINDEX_OPTION_NAME = "tokenIndex";
public GrammarRootAST ast;
public Collection<Rule> rules;
@ -91,6 +91,7 @@ public class LeftRecursiveRuleTransformer {
for (Rule r : rules) {
if ( !Grammar.isTokenName(r.name) ) {
if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r.ast, r.name) ) {
g.originalTokenStream = g.tokenStream;
boolean fitsPattern = translateLeftRecursiveRule(ast, (LeftRecursiveRule)r, language);
if ( fitsPattern ) leftRecursiveRuleNames.add(r.name);
}
@ -114,7 +115,6 @@ public class LeftRecursiveRuleTransformer {
String language)
{
//tool.log("grammar", ruleAST.toStringTree());
Grammar g = r.ast.g;
GrammarAST prevRuleAST = r.ast;
String ruleName = prevRuleAST.getChild(0).getText();
LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker =
@ -210,6 +210,7 @@ public class LeftRecursiveRuleTransformer {
ParserRuleReturnScope r = p.rule();
RuleAST tree = (RuleAST)r.getTree();
GrammarTransformPipeline.setGrammarPtr(g, tree);
GrammarTransformPipeline.augmentTokensWithOriginalPosition(g, tree);
return tree;
}
catch (Exception e) {

View File

@ -818,8 +818,8 @@ blockSet
;
setElement
: TOKEN_REF<TerminalAST>
| STRING_LITERAL<TerminalAST>
: TOKEN_REF<TerminalAST>^ elementOptions?
| STRING_LITERAL<TerminalAST>^ elementOptions?
| range
| LEXER_CHAR_SET
;

View File

@ -164,7 +164,9 @@ blockSet[boolean invert] returns [ATNFactory.Handle p]
/** Don't combine with atom otherwise it will build spurious ATN nodes */
setElement
: STRING_LITERAL
: ^(STRING_LITERAL .)
| ^(TOKEN_REF .)
| STRING_LITERAL
| TOKEN_REF
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
| LEXER_CHAR_SET

View File

@ -106,10 +106,24 @@ setElement[boolean inLexer]
@after {
GrammarTransformPipeline.setGrammarPtr(g, $tree);
}
: ( a=STRING_LITERAL {!inLexer || CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1}?
| {!inLexer}?=> TOKEN_REF
: ( ^(a=STRING_LITERAL elementOptions) {!inLexer || CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1}?
| a=STRING_LITERAL {!inLexer || CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1}?
| {!inLexer}?=> ^(TOKEN_REF elementOptions)
| {!inLexer}?=> TOKEN_REF
| {inLexer}?=> ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
{CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1 &&
CharSupport.getCharValueFromGrammarCharLiteral($b.getText())!=-1}?
)
;
elementOptions
: ^(ELEMENT_OPTIONS elementOption*)
;
elementOption
: ID
| ^(ASSIGN id=ID v=ID)
| ^(ASSIGN ID v=STRING_LITERAL)
| ^(ASSIGN ID v=ACTION)
| ^(ASSIGN ID v=INT)
;

View File

@ -0,0 +1,98 @@
/*
* [The "BSD license"]
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.Token;
import org.antlr.v4.tool.Grammar;
/** A CommonToken that can also track it's original location,
* derived from options on the element ref like BEGIN<line=34,...>.
*/
public class GrammarToken extends CommonToken {
public Grammar g;
public int originalTokenIndex = -1;
public GrammarToken(Grammar g, Token oldToken) {
super(oldToken);
this.g = g;
}
@Override
public int getCharPositionInLine() {
if ( originalTokenIndex>=0 ) return g.originalTokenStream.get(originalTokenIndex).getCharPositionInLine();
return super.getCharPositionInLine();
}
@Override
public int getLine() {
if ( originalTokenIndex>=0 ) return g.originalTokenStream.get(originalTokenIndex).getLine();
return super.getLine();
}
@Override
public int getTokenIndex() {
return originalTokenIndex;
}
@Override
public int getStartIndex() {
if ( originalTokenIndex>=0 ) {
return ((CommonToken)g.originalTokenStream.get(originalTokenIndex)).getStartIndex();
}
return super.getStartIndex();
}
@Override
public int getStopIndex() {
int n = super.getStopIndex() - super.getStartIndex() + 1;
return getStartIndex() + n - 1;
}
@Override
public String toString() {
String channelStr = "";
if ( channel>0 ) {
channelStr=",channel="+channel;
}
String txt = getText();
if ( txt!=null ) {
txt = txt.replaceAll("\n","\\\\n");
txt = txt.replaceAll("\r","\\\\r");
txt = txt.replaceAll("\t","\\\\t");
}
else {
txt = "<no text>";
}
return "[@"+getTokenIndex()+","+getStartIndex()+":"+getStopIndex()+
"='"+txt+"',<"+getType()+">"+channelStr+","+getLine()+":"+getCharPositionInLine()+"]";
}
}

View File

@ -912,8 +912,10 @@ setElement
@after {
exitSetElement($start);
}
: STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL);}
| TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF);}
: ^(STRING_LITERAL elementOptions) {stringRef((TerminalAST)$STRING_LITERAL);}
| ^(TOKEN_REF elementOptions) {tokenRef((TerminalAST)$TOKEN_REF);}
| STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL);}
| TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF);}
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
{
stringRef((TerminalAST)$a);

View File

@ -185,7 +185,9 @@ element
;
setElement
: STRING_LITERAL
: ^(STRING_LITERAL elementOptions)
| ^(TOKEN_REF elementOptions)
| STRING_LITERAL
| TOKEN_REF
;

View File

@ -47,6 +47,7 @@ import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.GrammarASTWithOptions;
import org.antlr.v4.tool.ast.GrammarRootAST;
import org.antlr.v4.tool.ast.RuleAST;
import org.antlr.v4.tool.ast.RuleRefAST;
import org.antlr.v4.tool.ast.TerminalAST;
import org.stringtemplate.v4.misc.MultiMap;
@ -528,6 +529,9 @@ public class BasicSemanticChecks extends GrammarTreeVisitor {
}
}
if ( elem instanceof RuleRefAST ) {
return checkRuleRefOptions((RuleRefAST)elem, ID, valueAST);
}
if ( elem instanceof TerminalAST ) {
return checkTokenOptions((TerminalAST)elem, ID, valueAST);
}
@ -548,7 +552,7 @@ public class BasicSemanticChecks extends GrammarTreeVisitor {
return false;
}
boolean checkRuleRefOptions(TerminalAST elem, GrammarAST ID, GrammarAST valueAST) {
boolean checkRuleRefOptions(RuleRefAST elem, GrammarAST ID, GrammarAST valueAST) {
Token optionID = ID.token;
String fileName = optionID.getInputStream().getSourceName();
// don't care about id<SimpleValue> options

View File

@ -48,9 +48,10 @@ import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.atn.SemanticContext;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.IntSet;
import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -60,6 +61,7 @@ import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.GrammarASTWithOptions;
import org.antlr.v4.tool.ast.GrammarRootAST;
import org.antlr.v4.tool.ast.PredAST;
import org.antlr.v4.tool.ast.RuleAST;
import org.antlr.v4.tool.ast.TerminalAST;
import java.io.IOException;
@ -114,12 +116,14 @@ public class Grammar implements AttributeResolver {
public static final Set<String> ruleRefOptions = new HashSet<String>();
static {
ruleRefOptions.add(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME);
ruleRefOptions.add(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME);
}
/** Legal options for terminal refs like ID<assoc=right> */
public static final Set<String> tokenOptions = new HashSet<String>();
static {
tokenOptions.add("assoc");
tokenOptions.add(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME);
}
public static final Set<String> actionOptions = new HashSet<String>();
@ -151,6 +155,9 @@ public class Grammar implements AttributeResolver {
/** Track stream used to create this grammar */
@NotNull
public final org.antlr.runtime.TokenStream tokenStream;
/** If we transform grammar, track original unaltered token stream */
public org.antlr.runtime.TokenStream originalTokenStream;
public String text; // testing only
public String fileName;
@ -180,6 +187,8 @@ public class Grammar implements AttributeResolver {
*/
public ATN atn;
public Map<Integer, Interval> stateToGrammarRegionMap;
public Map<Integer, DFA> decisionDFAs = new HashMap<Integer, DFA>();
public List<IntervalSet[]> decisionLOOK;
@ -238,6 +247,8 @@ public class Grammar implements AttributeResolver {
* sempred index is 0..n-1
*/
public LinkedHashMap<PredAST, Integer> sempreds = new LinkedHashMap<PredAST, Integer>();
/** Map the other direction upon demand */
public LinkedHashMap<Integer, PredAST> indexToPredMap;
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
@ -706,6 +717,58 @@ public class Grammar implements AttributeResolver {
return tokenNames;
}
/** Given an arbitrarily complex SemanticContext, walk the "tree" and get display string.
* Pull predicates from grammar text.
*/
public String getSemanticContextDisplayString(SemanticContext semctx) {
if ( semctx instanceof SemanticContext.Predicate ) {
return getPredicateDisplayString((SemanticContext.Predicate)semctx);
}
if ( semctx instanceof SemanticContext.AND ) {
SemanticContext.AND and = (SemanticContext.AND)semctx;
return joinPredicateOperands(and, " and ");
}
if ( semctx instanceof SemanticContext.OR ) {
SemanticContext.OR or = (SemanticContext.OR)semctx;
return joinPredicateOperands(or, " or ");
}
return semctx.toString();
}
public String joinPredicateOperands(SemanticContext.Operator op, String separator) {
StringBuilder buf = new StringBuilder();
for (SemanticContext operand : op.getOperands()) {
if (buf.length() > 0) {
buf.append(separator);
}
buf.append(getSemanticContextDisplayString(operand));
}
return buf.toString();
}
public LinkedHashMap<Integer, PredAST> getIndexToPredicateMap() {
LinkedHashMap<Integer, PredAST> indexToPredMap = new LinkedHashMap<Integer, PredAST>();
for (Rule r : rules.values()) {
for (ActionAST a : r.actions) {
if (a instanceof PredAST) {
PredAST p = (PredAST) a;
indexToPredMap.put(sempreds.get(p), p);
}
}
}
return indexToPredMap;
}
public String getPredicateDisplayString(SemanticContext.Predicate pred) {
if ( indexToPredMap==null ) {
indexToPredMap = getIndexToPredicateMap();
}
ActionAST actionAST = indexToPredMap.get(pred.predIndex);
return actionAST.getText();
}
/** What is the max char value possible for this grammar's target? Use
* unicode max if no target defined.
*/
@ -1004,6 +1067,49 @@ public class Grammar implements AttributeResolver {
decisionDFAs.put(decision, lookaheadDFA);
}
public static Map<Integer, Interval> getStateToGrammarRegionMap(GrammarRootAST ast, IntervalSet grammarTokenTypes) {
Map<Integer, Interval> stateToGrammarRegionMap = new HashMap<Integer, Interval>();
if ( ast==null ) return stateToGrammarRegionMap;
List<GrammarAST> nodes = ast.getNodesWithType(grammarTokenTypes);
for (GrammarAST n : nodes) {
if (n.atnState != null) {
Interval tokenRegion = Interval.of(n.getTokenStartIndex(), n.getTokenStopIndex());
org.antlr.runtime.tree.Tree ruleNode = null;
// RULEs, BLOCKs of transformed recursive rules point to original token interval
switch ( n.getType() ) {
case ANTLRParser.RULE :
ruleNode = n;
break;
case ANTLRParser.BLOCK :
case ANTLRParser.CLOSURE :
ruleNode = n.getAncestor(ANTLRParser.RULE);
break;
}
if ( ruleNode instanceof RuleAST ) {
String ruleName = ((RuleAST) ruleNode).getRuleName();
Rule r = ast.g.getRule(ruleName);
if ( r instanceof LeftRecursiveRule ) {
RuleAST originalAST = ((LeftRecursiveRule) r).getOriginalAST();
tokenRegion = Interval.of(originalAST.getTokenStartIndex(), originalAST.getTokenStopIndex());
}
}
stateToGrammarRegionMap.put(n.atnState.stateNumber, tokenRegion);
}
}
return stateToGrammarRegionMap;
}
/** Given an ATN state number, return the token index range within the grammar from which that ATN state was derived. */
public Interval getStateToGrammarRegion(int atnStateNumber) {
if ( stateToGrammarRegionMap==null ) {
stateToGrammarRegionMap = getStateToGrammarRegionMap(ast, null); // map all nodes with non-null atn state ptr
}
if ( stateToGrammarRegionMap==null ) return Interval.INVALID;
return stateToGrammarRegionMap.get(atnStateNumber);
}
public LexerInterpreter createLexerInterpreter(CharStream input) {
if (this.isParser()) {
throw new IllegalStateException("A lexer interpreter can only be created for a lexer or combined grammar.");

View File

@ -31,14 +31,17 @@
package org.antlr.v4.tool;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeVisitor;
import org.antlr.runtime.tree.TreeVisitorAction;
import org.antlr.v4.Tool;
import org.antlr.v4.analysis.LeftRecursiveRuleTransformer;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.BlockSetTransformer;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.parse.GrammarToken;
import org.antlr.v4.runtime.misc.DoubleKeyMap;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.tool.ast.AltAST;
@ -53,6 +56,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Handle left-recursion and block-set transforms */
@ -126,6 +130,24 @@ public class GrammarTransformPipeline {
});
}
public static void augmentTokensWithOriginalPosition(final Grammar g, GrammarAST tree) {
if ( tree==null ) return;
List<GrammarAST> optionsSubTrees = tree.getNodesWithType(ANTLRParser.ELEMENT_OPTIONS);
for (int i = 0; i < optionsSubTrees.size(); i++) {
GrammarAST t = optionsSubTrees.get(i);
CommonTree elWithOpt = t.parent;
if ( elWithOpt instanceof GrammarASTWithOptions ) {
Map<String, GrammarAST> options = ((GrammarASTWithOptions) elWithOpt).getOptions();
if ( options.containsKey(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME) ) {
GrammarToken newTok = new GrammarToken(g, elWithOpt.getToken());
newTok.originalTokenIndex = Integer.valueOf(options.get(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME).getText());
elWithOpt.token = newTok;
}
}
}
}
/** Merge all the rules, token definitions, and named actions from
imported grammars into the root grammar tree. Perform:

View File

@ -45,6 +45,7 @@ import java.util.Map;
public class LeftRecursiveRule extends Rule {
public List<LeftRecursiveRuleAltInfo> recPrimaryAlts;
public OrderedHashMap<Integer, LeftRecursiveRuleAltInfo> recOpAlts;
public RuleAST originalAST;
/** Did we delete any labels on direct left-recur refs? Points at ID of ^(= ID el) */
public List<Pair<GrammarAST,String>> leftRecursiveRuleRefLabels =
@ -52,6 +53,7 @@ public class LeftRecursiveRule extends Rule {
public LeftRecursiveRule(Grammar g, String name, RuleAST ast) {
super(g, name, ast, 1);
originalAST = ast;
alt = new Alternative[numberOfAlts+1]; // always just one
for (int i=1; i<=numberOfAlts; i++) alt[i] = new Alternative(this, i);
}
@ -69,6 +71,10 @@ public class LeftRecursiveRule extends Rule {
return n;
}
public RuleAST getOriginalAST() {
return originalAST;
}
@Override
public List<AltAST> getUnlabeledAltASTs() {
List<AltAST> alts = new ArrayList<AltAST>();

View File

@ -102,7 +102,7 @@ public class GrammarAST extends CommonTree {
GrammarAST t;
while ( !work.isEmpty() ) {
t = work.remove(0);
if ( types.contains(t.getType()) ) nodes.add(t);
if ( types==null || types.contains(t.getType()) ) nodes.add(t);
if ( t.children!=null ) {
work.addAll(Arrays.asList(t.getChildrenAsArray()));
}
@ -125,6 +125,21 @@ public class GrammarAST extends CommonTree {
}
}
public GrammarAST getNodeWithTokenIndex(int index) {
if ( this.getToken().getTokenIndex()==index ) {
return this;
}
// walk all children of root.
for (int i= 0; i < getChildCount(); i++) {
GrammarAST child = (GrammarAST)getChild(i);
GrammarAST result = child.getNodeWithTokenIndex(index);
if ( result!=null ) {
return result;
}
}
return null;
}
public AltAST getOutermostAltNode() {
if ( this instanceof AltAST && parent.parent instanceof RuleAST ) {
return (AltAST)this;

View File

@ -0,0 +1,179 @@
/*
* [The "BSD license"]
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test;
import org.antlr.runtime.Token;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.ast.GrammarAST;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class TestTokenPositionOptions extends BaseTest {
@Test public void testLeftRecursionRewrite() throws Exception {
Grammar g = new Grammar(
"grammar T;\n" +
"s : e ';' ;\n" +
"e : e '*' e\n" +
" | e '+' e\n" +
" | e '.' ID\n" +
" | '-' e\n" +
" | ID\n" +
" ;\n" +
"ID : [a-z]+ ;\n"
);
String expectedTree =
"(COMBINED_GRAMMAR T (RULES (RULE s (BLOCK (ALT e ';'))) (RULE e (BLOCK (ALT (BLOCK (ALT {} ('-' (ELEMENT_OPTIONS (= tokenIndex 43))) (e (ELEMENT_OPTIONS (= tokenIndex 45) (= p 2)))) (ALT (ID (ELEMENT_OPTIONS (= tokenIndex 49))))) (* (BLOCK (ALT ({precpred(_ctx, 5)}? (ELEMENT_OPTIONS (= p 5))) ('*' (ELEMENT_OPTIONS (= tokenIndex 21))) (e (ELEMENT_OPTIONS (= tokenIndex 23) (= p 6)))) (ALT ({precpred(_ctx, 4)}? (ELEMENT_OPTIONS (= p 4))) ('+' (ELEMENT_OPTIONS (= tokenIndex 29))) (e (ELEMENT_OPTIONS (= tokenIndex 31) (= p 5)))) (ALT ({precpred(_ctx, 3)}? (ELEMENT_OPTIONS (= p 3))) ('.' (ELEMENT_OPTIONS (= tokenIndex 37))) (ID (ELEMENT_OPTIONS (= tokenIndex 39)))))))))))";
assertEquals(expectedTree, g.ast.toStringTree());
String expectedElementTokens =
"[@5,11:11='s',<56>,2:0]\n" +
"[@9,15:15='e',<56>,2:4]\n" +
"[@11,17:19='';'',<61>,2:6]\n" +
"[@15,23:23='e',<56>,3:0]\n" +
"[@43,64:66=''-'',<61>,6:4]\n" +
"[@45,68:68='e',<56>,6:8]\n" +
"[@49,74:75='ID',<65>,7:4]\n" +
"[@21,29:31=''*'',<61>,3:6]\n" +
"[@23,33:33='e',<56>,3:10]\n" +
"[@29,41:43=''+'',<61>,4:6]\n" +
"[@31,45:45='e',<56>,4:10]\n" +
"[@37,53:55=''.'',<61>,5:6]\n" +
"[@39,57:58='ID',<65>,5:10]";
IntervalSet types =
new IntervalSet(ANTLRParser.TOKEN_REF,
ANTLRParser.STRING_LITERAL,
ANTLRParser.RULE_REF);
List<GrammarAST> nodes = g.ast.getNodesWithTypePreorderDFS(types);
List<Token> tokens = new ArrayList<Token>();
for (GrammarAST node : nodes) {
tokens.add(node.getToken());
}
assertEquals(expectedElementTokens, Utils.join(tokens.toArray(), "\n"));
}
@Test public void testLeftRecursionWithLabels() throws Exception {
Grammar g = new Grammar(
"grammar T;\n" +
"s : e ';' ;\n" +
"e : e '*' x=e\n" +
" | e '+' e\n" +
" | e '.' y=ID\n" +
" | '-' e\n" +
" | ID\n" +
" ;\n" +
"ID : [a-z]+ ;\n"
);
String expectedTree =
"(COMBINED_GRAMMAR T (RULES (RULE s (BLOCK (ALT e ';'))) (RULE e (BLOCK (ALT (BLOCK (ALT {} ('-' (ELEMENT_OPTIONS (= tokenIndex 47))) (e (ELEMENT_OPTIONS (= tokenIndex 49) (= p 2)))) (ALT (ID (ELEMENT_OPTIONS (= tokenIndex 53))))) (* (BLOCK (ALT ({precpred(_ctx, 5)}? (ELEMENT_OPTIONS (= p 5))) ('*' (ELEMENT_OPTIONS (= tokenIndex 21))) (= x (e (ELEMENT_OPTIONS (= tokenIndex 25) (= p 6))))) (ALT ({precpred(_ctx, 4)}? (ELEMENT_OPTIONS (= p 4))) ('+' (ELEMENT_OPTIONS (= tokenIndex 31))) (e (ELEMENT_OPTIONS (= tokenIndex 33) (= p 5)))) (ALT ({precpred(_ctx, 3)}? (ELEMENT_OPTIONS (= p 3))) ('.' (ELEMENT_OPTIONS (= tokenIndex 39))) (= y (ID (ELEMENT_OPTIONS (= tokenIndex 43))))))))))))";
assertEquals(expectedTree, g.ast.toStringTree());
String expectedElementTokens =
"[@5,11:11='s',<56>,2:0]\n" +
"[@9,15:15='e',<56>,2:4]\n" +
"[@11,17:19='';'',<61>,2:6]\n" +
"[@15,23:23='e',<56>,3:0]\n" +
"[@47,68:70=''-'',<61>,6:4]\n" +
"[@49,72:72='e',<56>,6:8]\n" +
"[@53,78:79='ID',<65>,7:4]\n" +
"[@21,29:31=''*'',<61>,3:6]\n" +
"[@25,35:35='e',<56>,3:12]\n" +
"[@31,43:45=''+'',<61>,4:6]\n" +
"[@33,47:47='e',<56>,4:10]\n" +
"[@39,55:57=''.'',<61>,5:6]\n" +
"[@43,61:62='ID',<65>,5:12]";
IntervalSet types =
new IntervalSet(ANTLRParser.TOKEN_REF,
ANTLRParser.STRING_LITERAL,
ANTLRParser.RULE_REF);
List<GrammarAST> nodes = g.ast.getNodesWithTypePreorderDFS(types);
List<Token> tokens = new ArrayList<Token>();
for (GrammarAST node : nodes) {
tokens.add(node.getToken());
}
assertEquals(expectedElementTokens, Utils.join(tokens.toArray(), "\n"));
}
@Test public void testLeftRecursionWithSet() throws Exception {
Grammar g = new Grammar(
"grammar T;\n" +
"s : e ';' ;\n" +
"e : e op=('*'|'/') e\n" +
" | e '+' e\n" +
" | e '.' ID\n" +
" | '-' e\n" +
" | ID\n" +
" ;\n" +
"ID : [a-z]+ ;\n"
);
String expectedTree =
"(COMBINED_GRAMMAR T (RULES (RULE s (BLOCK (ALT e ';'))) (RULE e (BLOCK (ALT (BLOCK (ALT {} ('-' (ELEMENT_OPTIONS (= tokenIndex 49))) (e (ELEMENT_OPTIONS (= tokenIndex 51) (= p 2)))) (ALT (ID (ELEMENT_OPTIONS (= tokenIndex 55))))) (* (BLOCK (ALT ({precpred(_ctx, 5)}? (ELEMENT_OPTIONS (= p 5))) (= op (SET ('*' (ELEMENT_OPTIONS (= tokenIndex 24))) ('/' (ELEMENT_OPTIONS (= tokenIndex 26))))) (e (ELEMENT_OPTIONS (= tokenIndex 29) (= p 6)))) (ALT ({precpred(_ctx, 4)}? (ELEMENT_OPTIONS (= p 4))) ('+' (ELEMENT_OPTIONS (= tokenIndex 35))) (e (ELEMENT_OPTIONS (= tokenIndex 37) (= p 5)))) (ALT ({precpred(_ctx, 3)}? (ELEMENT_OPTIONS (= p 3))) ('.' (ELEMENT_OPTIONS (= tokenIndex 43))) (ID (ELEMENT_OPTIONS (= tokenIndex 45)))))))))))";
assertEquals(expectedTree, g.ast.toStringTree());
String expectedElementTokens =
"[@5,11:11='s',<56>,2:0]\n" +
"[@9,15:15='e',<56>,2:4]\n" +
"[@11,17:19='';'',<61>,2:6]\n" +
"[@15,23:23='e',<56>,3:0]\n" +
"[@49,73:75=''-'',<61>,6:4]\n" +
"[@51,77:77='e',<56>,6:8]\n" +
"[@55,83:84='ID',<65>,7:4]\n" +
"[@24,33:35=''*'',<61>,3:10]\n" +
"[@26,37:39=''/'',<61>,3:14]\n" +
"[@29,42:42='e',<56>,3:19]\n" +
"[@35,50:52=''+'',<61>,4:6]\n" +
"[@37,54:54='e',<56>,4:10]\n" +
"[@43,62:64=''.'',<61>,5:6]\n" +
"[@45,66:67='ID',<65>,5:10]";
IntervalSet types =
new IntervalSet(ANTLRParser.TOKEN_REF,
ANTLRParser.STRING_LITERAL,
ANTLRParser.RULE_REF);
List<GrammarAST> nodes = g.ast.getNodesWithTypePreorderDFS(types);
List<Token> tokens = new ArrayList<Token>();
for (GrammarAST node : nodes) {
tokens.add(node.getToken());
}
assertEquals(expectedElementTokens, Utils.join(tokens.toArray(), "\n"));
}
}