From 94bef386cb3e0539f934ed797d80c970add29dea Mon Sep 17 00:00:00 2001 From: parrt Date: Wed, 17 Jun 2015 18:06:22 -0700 Subject: [PATCH] add new interpreter that knows how to track alternatives in rule nodes; some cleanup and improvement to other interpreter --- .../Java/src/org/antlr/v4/runtime/Parser.java | 112 ------- .../antlr/v4/runtime/ParserInterpreter.java | 60 ++-- .../v4/runtime/atn/ParserATNSimulator.java | 1 + .../v4/runtime/atn/ProfilingATNSimulator.java | 4 + .../src/org/antlr/v4/runtime/tree/Trees.java | 16 +- tool/src/org/antlr/v4/tool/Grammar.java | 20 +- .../tool/GrammarInterpreterRuleContext.java | 25 ++ .../v4/tool/GrammarParserInterpreter.java | 292 ++++++++++++++++++ .../v4/test/tool/TestAmbigParseTrees.java | 14 +- 9 files changed, 401 insertions(+), 143 deletions(-) create mode 100644 tool/src/org/antlr/v4/tool/GrammarInterpreterRuleContext.java create mode 100644 tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java diff --git a/runtime/Java/src/org/antlr/v4/runtime/Parser.java b/runtime/Java/src/org/antlr/v4/runtime/Parser.java index 9243bf375..eb299ecd1 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Parser.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Parser.java @@ -32,7 +32,6 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNDeserializationOptions; import org.antlr.v4.runtime.atn.ATNDeserializer; -import org.antlr.v4.runtime.atn.ATNSerializer; import org.antlr.v4.runtime.atn.ATNSimulator; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.AmbiguityInfo; @@ -48,7 +47,6 @@ import org.antlr.v4.runtime.tree.ErrorNode; import org.antlr.v4.runtime.tree.ParseTreeListener; import org.antlr.v4.runtime.tree.ParseTreeWalker; import org.antlr.v4.runtime.tree.TerminalNode; -import org.antlr.v4.runtime.tree.Trees; import org.antlr.v4.runtime.tree.pattern.ParseTreePattern; import org.antlr.v4.runtime.tree.pattern.ParseTreePatternMatcher; @@ -768,116 +766,6 @@ public abstract class Parser extends Recognizer { return false; } - /** Given an AmbiguityInfo object that contains information about an - * ambiguous decision event, return the list of ambiguous parse trees. - * An ambiguity occurs when a specific token sequence can be recognized - * in more than one way by the grammar. These ambiguities are detected only - * at decision points. - * - * The list of trees includes the actual interpretation (that for - * the minimum alternative number) and all ambiguous alternatives. - * The actual interpretation is always first. - * - * This method reuses the same physical input token stream used to - * detect the ambiguity by the original parser in the first place. - * This method resets/seeks within but does not alter originalParser. - * The input position is restored upon exit from this method. - * Parsers using a {@link UnbufferedTokenStream} may not be able to - * perform the necessary save index() / seek(saved_index) operation. - * - * The trees are rooted at the node whose start..stop token indices - * include the start and stop indices of this ambiguity event. That is, - * the trees returns will always include the complete ambiguous subphrase - * identified by the ambiguity event. - * - * Be aware that this method does NOT notify error or parse listeners as - * it would trigger duplicate or otherwise unwanted events. - * - * This uses a temporary ParserATNSimulator and a ParserInterpreter - * so we don't mess up any statistics, event lists, etc... - * The parse tree constructed while identifying/making ambiguityInfo is - * not affected by this method as it creates a new parser interp to - * get the ambiguous interpretations. - * - * Nodes in the returned ambig trees are independent of the original parse - * tree (constructed while identifying/creating ambiguityInfo). - * - * @since 4.5.1 - * - * @param originalParser The parser used to create ambiguityInfo; it - * is not modified by this routine and can be either - * a generated or interpreted parser. It's token - * stream *is* reset/seek()'d. - * @param ambiguityInfo The information about an ambiguous decision event - * for which you want ambiguous parse trees. - * @param startRuleIndex The start rule for the entire grammar, not - * the ambiguous decision. We re-parse the entire input - * and so we need the original start rule. - * - * @return The list of all possible interpretations of - * the input for the decision in ambiguityInfo. - * The actual interpretation chosen by the parser - * is always given first because this method - * retests the input in alternative order and - * ANTLR always resolves ambiguities by choosing - * the first alternative that matches the input. - * - * @throws RecognitionException Throws upon syntax error while matching - * ambig input. - */ - public static List getAmbiguousParseTrees(Parser originalParser, - AmbiguityInfo ambiguityInfo, - int startRuleIndex) - throws RecognitionException - { - List trees = new ArrayList(); - int saveTokenInputPosition = originalParser.getTokenStream().index(); - try { - // Create a new parser interpreter to parse the ambiguous subphrase - ParserInterpreter parser; - if ( originalParser instanceof ParserInterpreter ) { - parser = ((ParserInterpreter)originalParser).copyFrom((ParserInterpreter)originalParser); - } - else { - char[] serializedAtn = ATNSerializer.getSerializedAsChars(originalParser.getATN()); - ATN deserialized = new ATNDeserializer().deserialize(serializedAtn); - parser = new ParserInterpreter(originalParser.getGrammarFileName(), - originalParser.getVocabulary(), - Arrays.asList(originalParser.getRuleNames()), - deserialized, - originalParser.getTokenStream()); - } - - // Make sure that we don't get any error messages from using this temporary parser - parser.removeErrorListeners(); - parser.removeParseListeners(); - parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION); - - // get ambig trees - int alt = ambiguityInfo.ambigAlts.nextSetBit(0); - while ( alt>=0 ) { - // re-parse entire input for all ambiguous alternatives - // (don't have to do first as it's been parsed, but do again for simplicity - // using this temp parser.) - parser.reset(); - parser.getTokenStream().seek(0); // rewind the input all the way for re-parsing - parser.overrideDecision = ambiguityInfo.decision; - parser.overrideDecisionInputIndex = ambiguityInfo.startIndex; - parser.overrideDecisionAlt = alt; - ParserRuleContext t = parser.parse(startRuleIndex); - ParserRuleContext ambigSubTree = - Trees.getRootOfSubtreeEnclosingRegion(t, ambiguityInfo.startIndex, ambiguityInfo.stopIndex); - trees.add(ambigSubTree); - alt = ambiguityInfo.ambigAlts.nextSetBit(alt+1); - } - } - finally { - originalParser.getTokenStream().seek(saveTokenInputPosition); - } - - return trees; - } - /** * Checks whether or not {@code symbol} can follow the current state in the * ATN. The behavior of this method is equivalent to the following, but is diff --git a/runtime/Java/src/org/antlr/v4/runtime/ParserInterpreter.java b/runtime/Java/src/org/antlr/v4/runtime/ParserInterpreter.java index f8f68f721..7037c70fc 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ParserInterpreter.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ParserInterpreter.java @@ -48,7 +48,6 @@ import org.antlr.v4.runtime.dfa.DFA; import org.antlr.v4.runtime.misc.Pair; import java.util.ArrayDeque; -import java.util.Arrays; import java.util.Collection; import java.util.Deque; @@ -100,6 +99,16 @@ public class ParserInterpreter extends Parser { protected int overrideDecision = -1; protected int overrideDecisionInputIndex = -1; protected int overrideDecisionAlt = -1; + protected boolean overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop + + /** What is the current context when we override a decisions? This tells + * us what the root of the parse tree is when using override + * for an ambiguity/lookahead check. + */ + protected InterpreterRuleContext overrideDecisionRoot = null; + + + protected InterpreterRuleContext rootContext; /** * @deprecated Use {@link #ParserInterpreter(String, Vocabulary, Collection, ATN, TokenStream)} instead. @@ -138,17 +147,11 @@ public class ParserInterpreter extends Parser { sharedContextCache)); } - /** A factory-like copy constructor that creates a new parser interpreter by reusing - * the fields of a previous interpreter. - * - * @since 4.5.1 - * - * @param old The interpreter to copy - */ - public ParserInterpreter copyFrom(ParserInterpreter old) { - return new ParserInterpreter(old.grammarFileName, old.vocabulary, - Arrays.asList(old.ruleNames), - old.atn, old.getTokenStream()); + @Override + public void reset() { + super.reset(); + overrideDecisionReached = false; + overrideDecisionRoot = null; } @Override @@ -181,7 +184,7 @@ public class ParserInterpreter extends Parser { public ParserRuleContext parse(int startRuleIndex) { RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex]; - InterpreterRuleContext rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex); + rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex); if (startRuleStartState.isLeftRecursiveRule) { enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0); } @@ -239,12 +242,12 @@ public class ParserInterpreter extends Parser { protected void visitState(ATNState p) { // System.out.println("visitState "+p.stateNumber); - int edge = 1; + int predictedAlt = 1; if ( p instanceof DecisionState ) { - edge = visitDecisionsState((DecisionState) p); + predictedAlt = visitDecisionState((DecisionState) p); } - Transition transition = p.transition(edge - 1); + Transition transition = p.transition(predictedAlt - 1); switch (transition.getSerializationType()) { case Transition.EPSILON: if ( p.getStateType()==ATNState.STAR_LOOP_ENTRY && @@ -318,21 +321,22 @@ public class ParserInterpreter extends Parser { setState(transition.target.stateNumber); } - protected int visitDecisionsState(DecisionState p) { - int edge = 1; + protected int visitDecisionState(DecisionState p) { + int predictedAlt = 1; if ( p.getNumberOfTransitions()>1 ) { - int predictedAlt; getErrorHandler().sync(this); int decision = p.decision; - if (decision == overrideDecision && _input.index() == overrideDecisionInputIndex) { + if ( !overrideDecisionReached && + decision == overrideDecision && _input.index() == overrideDecisionInputIndex) + { predictedAlt = overrideDecisionAlt; + overrideDecisionReached = true; } else { predictedAlt = getInterpreter().adaptivePredict(_input, decision, _ctx); } - edge = predictedAlt; } - return edge; + return predictedAlt; } /** Provide simple "factory" for InterpreterRuleContext's. */ @@ -405,6 +409,10 @@ public class ParserInterpreter extends Parser { overrideDecisionAlt = forcedAlt; } + public InterpreterRuleContext getOverrideDecisionRoot() { + return overrideDecisionRoot; + } + /** Rely on the error handler for this parser but, if no tokens are consumed * to recover, add an error node. Otherwise, nothing is seen in the parse * tree. @@ -418,10 +426,6 @@ public class ParserInterpreter extends Parser { InputMismatchException ime = (InputMismatchException)e; Token tok = e.getOffendingToken(); int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element - String tokenText; - if ( expectedTokenType== Token.EOF ) tokenText = ""; - else tokenText = ""; - Token errToken = getTokenFactory().create(new Pair(tok.getTokenSource(), tok.getTokenSource().getInputStream()), expectedTokenType, tok.getText(), @@ -446,4 +450,8 @@ public class ParserInterpreter extends Parser { protected Token recoverInline() { return _errHandler.recoverInline(this); } + + public InterpreterRuleContext getRootContext() { + return rootContext; + } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java index a07a90e4a..15f71ec58 100755 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java @@ -398,6 +398,7 @@ public class ParserATNSimulator extends ATNSimulator { * appropriate start state for the precedence level rather * than simply setting DFA.s0. */ + dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway s0_closure = applyPrecedenceFilter(s0_closure); s0 = addDFAState(dfa, new DFAState(s0_closure)); dfa.setPrecedenceStartState(parser.getPrecedence(), s0); diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ProfilingATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ProfilingATNSimulator.java index 77ac65010..20ffbd297 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ProfilingATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ProfilingATNSimulator.java @@ -244,4 +244,8 @@ public class ProfilingATNSimulator extends ParserATNSimulator { public DecisionInfo[] getDecisionInfo() { return decisions; } + + public DFAState getCurrentState() { + return currentState; + } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/Trees.java b/runtime/Java/src/org/antlr/v4/runtime/tree/Trees.java index 4bce486e8..cae1db694 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/tree/Trees.java +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/Trees.java @@ -178,7 +178,6 @@ public class Trees { /** Return a list of all ancestors of this node. The first node of * list is the root and the last is the parent of this node. */ - public static List getAncestors(Tree t) { if ( t.getParent()==null ) return Collections.emptyList(); List ancestors = new ArrayList(); @@ -190,6 +189,21 @@ public class Trees { return ancestors; } + /** Return true if t is u's parent or a node on path to root from u. + * Use == not equals(). + * + * @since 4.5.1 + */ + public static boolean isAncestorOf(Tree t, Tree u) { + if ( t==null || u==null || t.getParent()==null ) return false; + Tree p = u.getParent(); + while ( p!=null ) { + if ( t == p ) return true; + p = p.getParent(); + } + return false; + } + public static Collection findAllTokenNodes(ParseTree t, int ttype) { return findAllNodes(t, ttype, true); } diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index 4de32bb6f..5632ab451 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -32,6 +32,7 @@ package org.antlr.v4.tool; import org.antlr.v4.Tool; import org.antlr.v4.analysis.LeftRecursiveRuleTransformer; +import org.antlr.v4.automata.ParserATNFactory; import org.antlr.v4.misc.CharSupport; import org.antlr.v4.misc.OrderedHashMap; import org.antlr.v4.misc.Utils; @@ -203,7 +204,6 @@ public class Grammar implements AttributeResolver { public List decisionLOOK; - public final Tool tool; /** Token names and literal tokens like "void" are uniquely indexed. @@ -507,6 +507,14 @@ public class Grammar implements AttributeResolver { */ } + public ATN getATN() { + if ( atn==null ) { + ParserATNFactory factory = new ParserATNFactory(this); + atn = factory.createATN(); + } + return atn; + } + public Rule getRule(int index) { return indexToRule.get(index); } public Rule getRule(String grammarName, String ruleName) { @@ -1313,6 +1321,16 @@ public class Grammar implements AttributeResolver { return new LexerInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), ((LexerGrammar)this).modes.keySet(), deserialized, input); } + /** @since 4.5.1 */ + public GrammarParserInterpreter createGrammarParserInterpreter(TokenStream tokenStream) { + if (this.isLexer()) { + throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar."); + } + char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn); + ATN deserialized = new ATNDeserializer().deserialize(serializedAtn); + return new GrammarParserInterpreter(this, deserialized, tokenStream); + } + public ParserInterpreter createParserInterpreter(TokenStream tokenStream) { if (this.isLexer()) { throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar."); diff --git a/tool/src/org/antlr/v4/tool/GrammarInterpreterRuleContext.java b/tool/src/org/antlr/v4/tool/GrammarInterpreterRuleContext.java new file mode 100644 index 000000000..88b1a7f26 --- /dev/null +++ b/tool/src/org/antlr/v4/tool/GrammarInterpreterRuleContext.java @@ -0,0 +1,25 @@ +package org.antlr.v4.tool; + +import org.antlr.v4.runtime.InterpreterRuleContext; +import org.antlr.v4.runtime.ParserRuleContext; + +/** + * @since 4.5.1 + */ +public class GrammarInterpreterRuleContext extends InterpreterRuleContext { + protected int outerAltNum = 1; + + public GrammarInterpreterRuleContext(ParserRuleContext parent, int invokingStateNumber, int ruleIndex) { + super(parent, invokingStateNumber, ruleIndex); + } + + /** The predicted outermost alternative for the rule associated + * with this context object. If left recursive, the true original + * outermost alternative is returned. + */ + public int getOuterAltNum() { return outerAltNum; } + + public void setOuterAltNum(int outerAltNum) { + this.outerAltNum = outerAltNum; + } +} diff --git a/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java b/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java new file mode 100644 index 000000000..7f46ddfbe --- /dev/null +++ b/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java @@ -0,0 +1,292 @@ +package org.antlr.v4.tool; + +import org.antlr.v4.runtime.BailErrorStrategy; +import org.antlr.v4.runtime.InterpreterRuleContext; +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.ParserInterpreter; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.UnbufferedTokenStream; +import org.antlr.v4.runtime.Vocabulary; +import org.antlr.v4.runtime.atn.ATN; +import org.antlr.v4.runtime.atn.ATNDeserializer; +import org.antlr.v4.runtime.atn.ATNSerializer; +import org.antlr.v4.runtime.atn.ATNState; +import org.antlr.v4.runtime.atn.DecisionState; +import org.antlr.v4.runtime.atn.PredictionMode; +import org.antlr.v4.runtime.atn.RuleStartState; +import org.antlr.v4.runtime.atn.StarLoopEntryState; +import org.antlr.v4.runtime.tree.Trees; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class GrammarParserInterpreter extends ParserInterpreter { + /** The grammar associated with this interpreter. Unlike the + * {@link ParserInterpreter} from the standard distribution, + * this can reference Grammar, which is in the tools area not + * purely runtime. + */ + protected final Grammar g; + + protected BitSet decisionStatesThatSetOuterAltNumInContext; + + /** Cache {@link LeftRecursiveRule#getPrimaryAlts()} and + * {@link LeftRecursiveRule#getRecursiveOpAlts()} for states in + * {@link #decisionStatesThatSetOuterAltNumInContext}. + */ + protected final Map stateToAltsMap = new HashMap(); + + public GrammarParserInterpreter(Grammar g, + String grammarFileName, + Vocabulary vocabulary, + Collection ruleNames, + ATN atn, + TokenStream input) { + super(grammarFileName, vocabulary, ruleNames, atn, input); + this.g = g; + } + + public GrammarParserInterpreter(Grammar g, ATN atn, TokenStream input) { + super(g.fileName, g.getVocabulary(), + Arrays.asList(g.getRuleNames()), + atn, // must run ATN through serializer to set some state flags + input); + this.g = g; + decisionStatesThatSetOuterAltNumInContext = findOuterMostDecisionStates(); + } + + @Override + protected InterpreterRuleContext createInterpreterRuleContext(ParserRuleContext parent, + int invokingStateNumber, + int ruleIndex) + { + return new GrammarInterpreterRuleContext(parent, invokingStateNumber, ruleIndex); + } + + @Override + public void reset() { + super.reset(); + overrideDecisionRoot = null; + } + + /** identify the ATN states where we need to set the outer alt number. + * For regular rules, that's the block at the target to rule start state. + * For left-recursive rules, we track the primary block, which looks just + * like a regular rule's outer block, and the star loop block (always + * there even if 1 alt). + */ + public BitSet findOuterMostDecisionStates() { + BitSet track = new BitSet(atn.states.size()); + int numberOfDecisions = atn.getNumberOfDecisions(); + for (int i = 0; i < numberOfDecisions; i++) { + DecisionState decisionState = atn.getDecisionState(i); + RuleStartState startState = atn.ruleToStartState[decisionState.ruleIndex]; + // Look for StarLoopEntryState that is in any left recursive rule + if ( decisionState instanceof StarLoopEntryState) { + StarLoopEntryState loopEntry = (StarLoopEntryState)decisionState; + if ( loopEntry.isPrecedenceDecision ) { + // Recursive alts always result in a (...)* in the transformed + // left recursive rule and that always has a BasicBlockStartState + // even if just 1 recursive alt exists. + ATNState blockStart = loopEntry.transition(0).target; + // track the StarBlockStartState associated with the recursive alternatives + track.set(blockStart.stateNumber); + } + } + else if ( startState.transition(0).target == decisionState ) { + // always track outermost block for any rule if it exists + track.set(decisionState.stateNumber); + } + } + return track; + } + + /**In the case of left-recursive rules, + * there is typically a decision for the primary alternatives and a + * decision to choose between the recursive operator alternatives. + * For example, the following left recursive rule has two primary and 2 + * recursive alternatives.

+ * + e : e '*' e + | '-' INT + | e '+' e + | ID + ; + + *

ANTLR rewrites that rule to be

+ + e[int precedence] + : ('-' INT | ID) + ( {...}? '*' e[5] + | {...}? '+' e[3] + )* + ; + + * + *

So, there are two decisions associated with picking the outermost alt. + * This complicates our tracking significantly. The outermost alternative number + * is a function of the decision (ATN state) within a left recursive rule and the + * predicted alternative coming back from adaptivePredict(). + */ + @Override + protected int visitDecisionState(DecisionState p) { + int predictedAlt = super.visitDecisionState(p); + if( p.getNumberOfTransitions() > 1) { +// System.out.print("decision "+p.decision+": "+predictedAlt); + if( p.decision == this.overrideDecision && + this._input.index() == this.overrideDecisionInputIndex) + { +// System.out.print(" OVERRIDE"); + overrideDecisionRoot = (GrammarInterpreterRuleContext)getContext(); + } +// System.out.println(); + } + + GrammarInterpreterRuleContext ctx = (GrammarInterpreterRuleContext)_ctx; + if ( decisionStatesThatSetOuterAltNumInContext.get(p.stateNumber) ) { + ctx.outerAltNum = predictedAlt; + Rule r = g.getRule(p.ruleIndex); + if ( atn.ruleToStartState[r.index].isLeftRecursiveRule ) { + int[] alts = stateToAltsMap.get(p); + LeftRecursiveRule lr = (LeftRecursiveRule) g.getRule(p.ruleIndex); + if (p.getStateType() == ATNState.BLOCK_START) { + if ( alts==null ) { + alts = lr.getPrimaryAlts(); + stateToAltsMap.put(p, alts); // cache it + } + } + else if (p.getStateType() == ATNState.STAR_BLOCK_START) { + if ( alts==null ) { + alts = lr.getRecursiveOpAlts(); + stateToAltsMap.put(p, alts); // cache it + } + } + ctx.outerAltNum = alts[predictedAlt]; + } + } + + return predictedAlt; + } + + /** Given an AmbiguityInfo object that contains information about an + * ambiguous decision event, return the list of ambiguous parse trees. + * An ambiguity occurs when a specific token sequence can be recognized + * in more than one way by the grammar. These ambiguities are detected only + * at decision points. + * + * The list of trees includes the actual interpretation (that for + * the minimum alternative number) and all ambiguous alternatives. + * The actual interpretation is always first. + * + * This method reuses the same physical input token stream used to + * detect the ambiguity by the original parser in the first place. + * This method resets/seeks within but does not alter originalParser. + * The input position is restored upon exit from this method. + * Parsers using a {@link UnbufferedTokenStream} may not be able to + * perform the necessary save index() / seek(saved_index) operation. + * + * The trees are rooted at the node whose start..stop token indices + * include the start and stop indices of this ambiguity event. That is, + * the trees returns will always include the complete ambiguous subphrase + * identified by the ambiguity event. + * + * Be aware that this method does NOT notify error or parse listeners as + * it would trigger duplicate or otherwise unwanted events. + * + * This uses a temporary ParserATNSimulator and a ParserInterpreter + * so we don't mess up any statistics, event lists, etc... + * The parse tree constructed while identifying/making ambiguityInfo is + * not affected by this method as it creates a new parser interp to + * get the ambiguous interpretations. + * + * Nodes in the returned ambig trees are independent of the original parse + * tree (constructed while identifying/creating ambiguityInfo). + * + * @since 4.5.1 + * + * @param originalParser The parser used to create ambiguityInfo; it + * is not modified by this routine and can be either + * a generated or interpreted parser. It's token + * stream *is* reset/seek()'d. + * @param ambiguityInfo The information about an ambiguous decision event + * for which you want ambiguous parse trees. + * @param startRuleIndex The start rule for the entire grammar, not + * the ambiguous decision. We re-parse the entire input + * and so we need the original start rule. + * + * @return The list of all possible interpretations of + * the input for the decision in ambiguityInfo. + * The actual interpretation chosen by the parser + * is always given first because this method + * retests the input in alternative order and + * ANTLR always resolves ambiguities by choosing + * the first alternative that matches the input. + * + * @throws RecognitionException Throws upon syntax error while matching + * ambig input. + */ + public static List getAllPossibleParseTrees(Grammar g, + Parser originalParser, + TokenStream tokens, + int decision, + BitSet alts, + int startIndex, + int stopIndex, + int startRuleIndex) + throws RecognitionException + { + List trees = new ArrayList(); + // Create a new parser interpreter to parse the ambiguous subphrase + ParserInterpreter parser; + if (originalParser instanceof ParserInterpreter) { + parser = new GrammarParserInterpreter(g, originalParser.getATN(), originalParser.getTokenStream()); + } + else { + char[] serializedAtn = ATNSerializer.getSerializedAsChars(originalParser.getATN()); + ATN deserialized = new ATNDeserializer().deserialize(serializedAtn); + parser = new ParserInterpreter(originalParser.getGrammarFileName(), + originalParser.getVocabulary(), + Arrays.asList(originalParser.getRuleNames()), + deserialized, + tokens); + } + + parser.setInputStream(tokens); + + // Make sure that we don't get any error messages from using this temporary parser + parser.setErrorHandler(new BailErrorStrategy()); + parser.removeErrorListeners(); + parser.removeParseListeners(); + parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION); + + // get ambig trees + int alt = alts.nextSetBit(0); + while (alt >= 0) { + // re-parse entire input for all ambiguous alternatives + // (don't have to do first as it's been parsed, but do again for simplicity + // using this temp parser.) + parser.reset(); + parser.getTokenStream().seek(0); // rewind the input all the way for re-parsing + parser.addDecisionOverride(decision, startIndex, alt); + ParserRuleContext t = parser.parse(startRuleIndex); + GrammarInterpreterRuleContext ambigSubTree = + (GrammarInterpreterRuleContext) Trees.getRootOfSubtreeEnclosingRegion(t, startIndex, stopIndex); + // Use higher of overridden decision tree or tree enclosing all tokens + if ( Trees.isAncestorOf(parser.getOverrideDecisionRoot(), ambigSubTree) ) { + ambigSubTree = (GrammarInterpreterRuleContext)parser.getOverrideDecisionRoot(); + } + trees.add(ambigSubTree); + alt = alts.nextSetBit(alt + 1); + } + + return trees; + } + +} diff --git a/tool/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java b/tool/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java index 7fd5e2912..0b1fac959 100644 --- a/tool/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java +++ b/tool/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java @@ -3,7 +3,6 @@ package org.antlr.v4.test.tool; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.LexerInterpreter; -import org.antlr.v4.runtime.Parser; import org.antlr.v4.runtime.ParserInterpreter; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.atn.ATNState; @@ -16,6 +15,7 @@ import org.antlr.v4.runtime.atn.RuleStartState; import org.antlr.v4.runtime.atn.Transition; import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.tool.Grammar; +import org.antlr.v4.tool.GrammarParserInterpreter; import org.antlr.v4.tool.LexerGrammar; import org.junit.Test; @@ -211,7 +211,7 @@ public class TestAmbigParseTrees { { LexerInterpreter lexEngine = lg.createLexerInterpreter(new ANTLRInputStream(input)); CommonTokenStream tokens = new CommonTokenStream(lexEngine); - final ParserInterpreter parser = g.createParserInterpreter(tokens); + final GrammarParserInterpreter parser = g.createGrammarParserInterpreter(tokens); parser.setProfile(true); parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION); @@ -226,7 +226,15 @@ public class TestAmbigParseTrees { assertEquals(1, ambiguities.size()); AmbiguityInfo ambiguityInfo = ambiguities.get(0); - List ambiguousParseTrees = Parser.getAmbiguousParseTrees(parser, ambiguityInfo, ruleIndex); + List ambiguousParseTrees = + GrammarParserInterpreter.getAllPossibleParseTrees(g, + parser, + tokens, + ambiguityInfo.decision, + ambiguityInfo.ambigAlts, + ambiguityInfo.startIndex, + ambiguityInfo.stopIndex, + ruleIndex); assertEquals(expectedAmbigAlts, ambiguityInfo.ambigAlts.toString()); assertEquals(ambiguityInfo.ambigAlts.cardinality(), ambiguousParseTrees.size());