Merge pull request #1427 from ericvergnaud/poor-left-recursive-rule-performance
Fix poor left recursive rule performance in python + javascript
This commit is contained in:
commit
595fb089b0
|
@ -3,6 +3,8 @@ package org.antlr.v4.test.runtime.descriptors;
|
|||
import org.antlr.v4.test.runtime.BaseParserTestDescriptor;
|
||||
import org.antlr.v4.test.runtime.CommentHasStringValue;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class PerformanceDescriptors {
|
||||
/*
|
||||
* This is a regression test for antlr/antlr4#192 "Poor performance of
|
||||
|
@ -105,7 +107,7 @@ public class PerformanceDescriptors {
|
|||
|
||||
@Override
|
||||
public boolean ignore(String targetName) {
|
||||
return !targetName.equals("Java");
|
||||
return !Arrays.asList("Java", "Python2", "Python3", "Node").contains(targetName);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -485,7 +485,7 @@ ATNDeserializer.prototype.stateIsEndStateFor = function(state, idx) {
|
|||
|
||||
//
|
||||
// Analyze the {@link StarLoopEntryState} states in the specified ATN to set
|
||||
// the {@link StarLoopEntryState//precedenceRuleDecision} field to the
|
||||
// the {@link StarLoopEntryState//isPrecedenceDecision} field to the
|
||||
// correct value.
|
||||
//
|
||||
// @param atn The ATN.
|
||||
|
@ -505,7 +505,7 @@ ATNDeserializer.prototype.markPrecedenceDecisions = function(atn) {
|
|||
if (maybeLoopEndState instanceof LoopEndState) {
|
||||
if ( maybeLoopEndState.epsilonOnlyTransitions &&
|
||||
(maybeLoopEndState.transitions[0].target instanceof RuleStopState)) {
|
||||
state.precedenceRuleDecision = true;
|
||||
state.isPrecedenceDecision = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -303,7 +303,7 @@ function StarLoopEntryState() {
|
|||
this.stateType = ATNState.STAR_LOOP_ENTRY;
|
||||
this.loopBackState = null;
|
||||
// Indicates whether this state can benefit from a precedence DFA during SLL decision making.
|
||||
this.precedenceRuleDecision = null;
|
||||
this.isPrecedenceDecision = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -262,6 +262,7 @@ var Set = Utils.Set;
|
|||
var BitSet = Utils.BitSet;
|
||||
var DoubleDict = Utils.DoubleDict;
|
||||
var ATN = require('./ATN').ATN;
|
||||
var ATNState = require('./ATNState').ATNState;
|
||||
var ATNConfig = require('./ATNConfig').ATNConfig;
|
||||
var ATNConfigSet = require('./ATNConfigSet').ATNConfigSet;
|
||||
var Token = require('./../Token').Token;
|
||||
|
@ -359,16 +360,7 @@ ParserATNSimulator.prototype.adaptivePredict = function(input, decision, outerCo
|
|||
" exec LA(1)==" + this.getLookaheadName(input) +
|
||||
", outerContext=" + outerContext.toString(this.parser.ruleNames));
|
||||
}
|
||||
// If this is not a precedence DFA, we check the ATN start state
|
||||
// to determine if this ATN start state is the decision for the
|
||||
// closure block that determines whether a precedence rule
|
||||
// should continue or complete.
|
||||
//
|
||||
if (!dfa.precedenceDfa && (dfa.atnStartState instanceof StarLoopEntryState)) {
|
||||
if (dfa.atnStartState.precedenceRuleDecision) {
|
||||
dfa.setPrecedenceDfa(true);
|
||||
}
|
||||
}
|
||||
|
||||
var fullCtx = false;
|
||||
var s0_closure = this.computeStartState(dfa.atnStartState, RuleContext.EMPTY, fullCtx);
|
||||
|
||||
|
@ -379,6 +371,7 @@ ParserATNSimulator.prototype.adaptivePredict = function(input, decision, outerCo
|
|||
// appropriate start state for the precedence level rather
|
||||
// than simply setting DFA.s0.
|
||||
//
|
||||
dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway
|
||||
s0_closure = this.applyPrecedenceFilter(s0_closure);
|
||||
s0 = this.addDFAState(dfa, new DFAState(null, s0_closure));
|
||||
dfa.setPrecedenceStartState(this.parser.getPrecedence(), s0);
|
||||
|
@ -1293,6 +1286,9 @@ ParserATNSimulator.prototype.closure_ = function(config, configs, closureBusy, c
|
|||
// both epsilon transitions and non-epsilon transitions.
|
||||
}
|
||||
for(var i = 0;i<p.transitions.length; i++) {
|
||||
if(i==0 && this.canDropLoopEntryEdgeInLeftRecursiveRule(config))
|
||||
continue;
|
||||
|
||||
var t = p.transitions[i];
|
||||
var continueCollecting = collectPredicates && !(t instanceof ActionTransition);
|
||||
var c = this.getEpsilonTarget(config, t, continueCollecting, depth === 0, fullCtx, treatEofAsEpsilon);
|
||||
|
@ -1337,6 +1333,69 @@ ParserATNSimulator.prototype.closure_ = function(config, configs, closureBusy, c
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
ParserATNSimulator.prototype.canDropLoopEntryEdgeInLeftRecursiveRule = function(config) {
|
||||
// return False
|
||||
var p = config.state;
|
||||
// First check to see if we are in StarLoopEntryState generated during
|
||||
// left-recursion elimination. For efficiency, also check if
|
||||
// the context has an empty stack case. If so, it would mean
|
||||
// global FOLLOW so we can't perform optimization
|
||||
// Are we the special loop entry/exit state? or SLL wildcard
|
||||
if(p.stateType != ATNState.STAR_LOOP_ENTRY || !p.isPrecedenceDecision ||
|
||||
config.context.isEmpty() || config.context.hasEmptyPath())
|
||||
return false;
|
||||
|
||||
// Require all return states to return back to the same rule that p is in.
|
||||
var numCtxs = config.context.length;
|
||||
for(var i=0; i<numCtxs; i++) { // for each stack context
|
||||
var returnState = this.atn.states[config.context.getReturnState(i)];
|
||||
if (returnState.ruleIndex != p.ruleIndex)
|
||||
return false;
|
||||
}
|
||||
|
||||
var decisionStartState = p.transitions[0].target;
|
||||
var blockEndStateNum = decisionStartState.endState.stateNumber;
|
||||
var blockEndState = this.atn.states[blockEndStateNum];
|
||||
|
||||
// Verify that the top of each stack context leads to loop entry/exit
|
||||
// state through epsilon edges and w/o leaving rule.
|
||||
for(var i=0; i<numCtxs; i++) { // for each stack context
|
||||
var returnStateNumber = config.context.getReturnState(i);
|
||||
var returnState = this.atn.states[returnStateNumber];
|
||||
// all states must have single outgoing epsilon edge
|
||||
if (returnState.transitions.length != 1 || !returnState.transitions[0].isEpsilon)
|
||||
return false;
|
||||
|
||||
// Look for prefix op case like 'not expr', (' type ')' expr
|
||||
var returnStateTarget = returnState.transitions[0].target;
|
||||
if ( returnState.stateType == ATNState.BLOCK_END && returnStateTarget == p )
|
||||
continue;
|
||||
|
||||
// Look for 'expr op expr' or case where expr's return state is block end
|
||||
// of (...)* internal block; the block end points to loop back
|
||||
// which points to p but we don't need to check that
|
||||
if ( returnState == blockEndState )
|
||||
continue;
|
||||
|
||||
// Look for ternary expr ? expr : expr. The return state points at block end,
|
||||
// which points at loop entry state
|
||||
if ( returnStateTarget == blockEndState )
|
||||
continue;
|
||||
|
||||
// Look for complex prefix 'between expr and expr' case where 2nd expr's
|
||||
// return state points at block end state of (...)* internal block
|
||||
if (returnStateTarget.stateType == ATNState.BLOCK_END && returnStateTarget.transitionslength == 1
|
||||
&& returnStateTarget.transitions[0].isEpsilon && returnStateTarget.transitions[0].target == p)
|
||||
continue;
|
||||
|
||||
// anything else ain't conforming
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
ParserATNSimulator.prototype.getRuleName = function( index) {
|
||||
if (this.parser!==null && index>=0) {
|
||||
return this.parser.ruleNames[index];
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
var DFAState = require('./DFAState').DFAState;
|
||||
var StarLoopEntryState = require('../atn/ATNState').StarLoopEntryState;
|
||||
var ATNConfigSet = require('./../atn/ATNConfigSet').ATNConfigSet;
|
||||
var DFASerializer = require('./DFASerializer').DFASerializer;
|
||||
var LexerDFASerializer = require('./DFASerializer').LexerDFASerializer;
|
||||
|
@ -58,6 +59,17 @@ function DFA(atnStartState, decision) {
|
|||
// {@code false}. This is the backing field for {@link //isPrecedenceDfa},
|
||||
// {@link //setPrecedenceDfa}.
|
||||
this.precedenceDfa = false;
|
||||
if (atnStartState instanceof StarLoopEntryState)
|
||||
{
|
||||
if (atnStartState.isPrecedenceDecision) {
|
||||
this.precedenceDfa = true;
|
||||
precedenceState = new DFAState(null, new ATNConfigSet());
|
||||
precedenceState.edges = [];
|
||||
precedenceState.isAcceptState = false;
|
||||
precedenceState.requiresFullContext = false;
|
||||
this.s0 = precedenceState;
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -125,7 +137,7 @@ DFA.prototype.setPrecedenceDfa = function(precedenceDfa) {
|
|||
if (this.precedenceDfa!==precedenceDfa) {
|
||||
this._states = new DFAStatesSet();
|
||||
if (precedenceDfa) {
|
||||
var precedenceState = new DFAState(new ATNConfigSet());
|
||||
var precedenceState = new DFAState(null, new ATNConfigSet());
|
||||
precedenceState.edges = [];
|
||||
precedenceState.isAcceptState = false;
|
||||
precedenceState.requiresFullContext = false;
|
||||
|
|
|
@ -69,7 +69,7 @@ class ParserInterpreter(Parser):
|
|||
for state in atn.states:
|
||||
if not isinstance(state, StarLoopEntryState):
|
||||
continue
|
||||
if state.precedenceRuleDecision:
|
||||
if state.isPrecedenceDecision:
|
||||
self.pushRecursionContextStates.add(state.stateNumber)
|
||||
# get atn simulator that knows how to do predictions
|
||||
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)
|
||||
|
|
|
@ -368,7 +368,7 @@ class ATNDeserializer (object):
|
|||
|
||||
#
|
||||
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
|
||||
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the
|
||||
# the {@link StarLoopEntryState#isPrecedenceDecision} field to the
|
||||
# correct value.
|
||||
#
|
||||
# @param atn The ATN.
|
||||
|
@ -387,7 +387,7 @@ class ATNDeserializer (object):
|
|||
if isinstance(maybeLoopEndState, LoopEndState):
|
||||
if maybeLoopEndState.epsilonOnlyTransitions and \
|
||||
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
|
||||
state.precedenceRuleDecision = True
|
||||
state.isPrecedenceDecision = True
|
||||
|
||||
def verifyATN(self, atn):
|
||||
if not self.deserializationOptions.verifyATN:
|
||||
|
|
|
@ -262,7 +262,7 @@ class StarLoopEntryState(DecisionState):
|
|||
self.stateType = self.STAR_LOOP_ENTRY
|
||||
self.loopBackState = None
|
||||
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
|
||||
self.precedenceRuleDecision = None
|
||||
self.isPrecedenceDecision = None
|
||||
|
||||
# Mark the end of a * or + loop.
|
||||
class LoopEndState(ATNState):
|
||||
|
|
|
@ -266,7 +266,7 @@ from antlr4.atn.ATN import ATN
|
|||
from antlr4.atn.ATNConfig import ATNConfig
|
||||
from antlr4.atn.ATNConfigSet import ATNConfigSet
|
||||
from antlr4.atn.ATNSimulator import ATNSimulator
|
||||
from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState
|
||||
from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState, ATNState
|
||||
from antlr4.atn.PredictionMode import PredictionMode
|
||||
from antlr4.atn.SemanticContext import SemanticContext, AND, andContext, orContext
|
||||
from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition
|
||||
|
@ -341,15 +341,6 @@ class ParserATNSimulator(ATNSimulator):
|
|||
" exec LA(1)==" + self.getLookaheadName(input) +
|
||||
", outerContext=" + outerContext.toString(self.parser.literalNames, None))
|
||||
|
||||
# If this is not a precedence DFA, we check the ATN start state
|
||||
# to determine if this ATN start state is the decision for the
|
||||
# closure block that determines whether a precedence rule
|
||||
# should continue or complete.
|
||||
#
|
||||
if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
|
||||
if dfa.atnStartState.precedenceRuleDecision:
|
||||
dfa.setPrecedenceDfa(True)
|
||||
|
||||
fullCtx = False
|
||||
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
|
||||
|
||||
|
@ -360,6 +351,7 @@ class ParserATNSimulator(ATNSimulator):
|
|||
# appropriate start state for the precedence level rather
|
||||
# than simply setting DFA.s0.
|
||||
#
|
||||
dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
|
||||
s0_closure = self.applyPrecedenceFilter(s0_closure)
|
||||
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
|
||||
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
|
||||
|
@ -1168,7 +1160,13 @@ class ParserATNSimulator(ATNSimulator):
|
|||
# make sure to not return here, because EOF transitions can act as
|
||||
# both epsilon transitions and non-epsilon transitions.
|
||||
|
||||
first = True
|
||||
for t in p.transitions:
|
||||
if first:
|
||||
first = False
|
||||
if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
|
||||
continue
|
||||
|
||||
continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
|
||||
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
|
||||
if c is not None:
|
||||
|
@ -1205,6 +1203,161 @@ class ParserATNSimulator(ATNSimulator):
|
|||
|
||||
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
|
||||
|
||||
|
||||
|
||||
# Implements first-edge (loop entry) elimination as an optimization
|
||||
# during closure operations. See antlr/antlr4#1398.
|
||||
#
|
||||
# The optimization is to avoid adding the loop entry config when
|
||||
# the exit path can only lead back to the same
|
||||
# StarLoopEntryState after popping context at the rule end state
|
||||
# (traversing only epsilon edges, so we're still in closure, in
|
||||
# this same rule).
|
||||
#
|
||||
# We need to detect any state that can reach loop entry on
|
||||
# epsilon w/o exiting rule. We don't have to look at FOLLOW
|
||||
# links, just ensure that all stack tops for config refer to key
|
||||
# states in LR rule.
|
||||
#
|
||||
# To verify we are in the right situation we must first check
|
||||
# closure is at a StarLoopEntryState generated during LR removal.
|
||||
# Then we check that each stack top of context is a return state
|
||||
# from one of these cases:
|
||||
#
|
||||
# 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
|
||||
# 2. expr op expr. The return state is the block end of internal block of (...)*
|
||||
# 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
|
||||
# That state points at block end of internal block of (...)*.
|
||||
# 4. expr '?' expr ':' expr. The return state points at block end,
|
||||
# which points at loop entry state.
|
||||
#
|
||||
# If any is true for each stack top, then closure does not add a
|
||||
# config to the current config set for edge[0], the loop entry branch.
|
||||
#
|
||||
# Conditions fail if any context for the current config is:
|
||||
#
|
||||
# a. empty (we'd fall out of expr to do a global FOLLOW which could
|
||||
# even be to some weird spot in expr) or,
|
||||
# b. lies outside of expr or,
|
||||
# c. lies within expr but at a state not the BlockEndState
|
||||
# generated during LR removal
|
||||
#
|
||||
# Do we need to evaluate predicates ever in closure for this case?
|
||||
#
|
||||
# No. Predicates, including precedence predicates, are only
|
||||
# evaluated when computing a DFA start state. I.e., only before
|
||||
# the lookahead (but not parser) consumes a token.
|
||||
#
|
||||
# There are no epsilon edges allowed in LR rule alt blocks or in
|
||||
# the "primary" part (ID here). If closure is in
|
||||
# StarLoopEntryState any lookahead operation will have consumed a
|
||||
# token as there are no epsilon-paths that lead to
|
||||
# StarLoopEntryState. We do not have to evaluate predicates
|
||||
# therefore if we are in the generated StarLoopEntryState of a LR
|
||||
# rule. Note that when making a prediction starting at that
|
||||
# decision point, decision d=2, compute-start-state performs
|
||||
# closure starting at edges[0], edges[1] emanating from
|
||||
# StarLoopEntryState. That means it is not performing closure on
|
||||
# StarLoopEntryState during compute-start-state.
|
||||
#
|
||||
# How do we know this always gives same prediction answer?
|
||||
#
|
||||
# Without predicates, loop entry and exit paths are ambiguous
|
||||
# upon remaining input +b (in, say, a+b). Either paths lead to
|
||||
# valid parses. Closure can lead to consuming + immediately or by
|
||||
# falling out of this call to expr back into expr and loop back
|
||||
# again to StarLoopEntryState to match +b. In this special case,
|
||||
# we choose the more efficient path, which is to take the bypass
|
||||
# path.
|
||||
#
|
||||
# The lookahead language has not changed because closure chooses
|
||||
# one path over the other. Both paths lead to consuming the same
|
||||
# remaining input during a lookahead operation. If the next token
|
||||
# is an operator, lookahead will enter the choice block with
|
||||
# operators. If it is not, lookahead will exit expr. Same as if
|
||||
# closure had chosen to enter the choice block immediately.
|
||||
#
|
||||
# Closure is examining one config (some loopentrystate, some alt,
|
||||
# context) which means it is considering exactly one alt. Closure
|
||||
# always copies the same alt to any derived configs.
|
||||
#
|
||||
# How do we know this optimization doesn't mess up precedence in
|
||||
# our parse trees?
|
||||
#
|
||||
# Looking through expr from left edge of stat only has to confirm
|
||||
# that an input, say, a+b+c; begins with any valid interpretation
|
||||
# of an expression. The precedence actually doesn't matter when
|
||||
# making a decision in stat seeing through expr. It is only when
|
||||
# parsing rule expr that we must use the precedence to get the
|
||||
# right interpretation and, hence, parse tree.
|
||||
#
|
||||
# @since 4.6
|
||||
#
|
||||
def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
|
||||
# return False
|
||||
p = config.state
|
||||
# First check to see if we are in StarLoopEntryState generated during
|
||||
# left-recursion elimination. For efficiency, also check if
|
||||
# the context has an empty stack case. If so, it would mean
|
||||
# global FOLLOW so we can't perform optimization
|
||||
# Are we the special loop entry/exit state? or SLL wildcard
|
||||
if p.stateType != ATNState.STAR_LOOP_ENTRY \
|
||||
or not p.isPrecedenceDecision \
|
||||
or config.context.isEmpty() \
|
||||
or config.context.hasEmptyPath():
|
||||
return False
|
||||
|
||||
# Require all return states to return back to the same rule
|
||||
# that p is in.
|
||||
numCtxs = len(config.context)
|
||||
for i in range(0, numCtxs): # for each stack context
|
||||
returnState = self.atn.states[config.context.getReturnState(i)]
|
||||
if returnState.ruleIndex != p.ruleIndex:
|
||||
return False
|
||||
|
||||
decisionStartState = p.transitions[0].target
|
||||
blockEndStateNum = decisionStartState.endState.stateNumber
|
||||
blockEndState = self.atn.states[blockEndStateNum]
|
||||
|
||||
# Verify that the top of each stack context leads to loop entry/exit
|
||||
# state through epsilon edges and w/o leaving rule.
|
||||
for i in range(0, numCtxs): # for each stack context
|
||||
returnStateNumber = config.context.getReturnState(i)
|
||||
returnState = self.atn.states[returnStateNumber]
|
||||
# all states must have single outgoing epsilon edge
|
||||
if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
|
||||
return False
|
||||
|
||||
# Look for prefix op case like 'not expr', (' type ')' expr
|
||||
returnStateTarget = returnState.transitions[0].target
|
||||
if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
|
||||
continue
|
||||
|
||||
# Look for 'expr op expr' or case where expr's return state is block end
|
||||
# of (...)* internal block; the block end points to loop back
|
||||
# which points to p but we don't need to check that
|
||||
if returnState is blockEndState:
|
||||
continue
|
||||
|
||||
# Look for ternary expr ? expr : expr. The return state points at block end,
|
||||
# which points at loop entry state
|
||||
if returnStateTarget is blockEndState:
|
||||
continue
|
||||
|
||||
# Look for complex prefix 'between expr and expr' case where 2nd expr's
|
||||
# return state points at block end state of (...)* internal block
|
||||
if returnStateTarget.stateType == ATNState.BLOCK_END \
|
||||
and len(returnStateTarget.transitions) == 1 \
|
||||
and returnStateTarget.transitions[0].isEpsilon \
|
||||
and returnStateTarget.transitions[0].target is p:
|
||||
continue
|
||||
|
||||
# anything else ain't conforming
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def getRuleName(self, index):
|
||||
if self.parser is not None and index>=0:
|
||||
return self.parser.ruleNames[index]
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
from antlr4.atn.ATNState import StarLoopEntryState
|
||||
|
||||
from antlr4.atn.ATNConfigSet import ATNConfigSet
|
||||
from antlr4.dfa.DFAState import DFAState
|
||||
|
@ -48,6 +49,15 @@ class DFA(object):
|
|||
# {@link #setPrecedenceDfa}.
|
||||
self.precedenceDfa = False
|
||||
|
||||
if isinstance(atnStartState, StarLoopEntryState):
|
||||
if atnStartState.isPrecedenceDecision:
|
||||
self.precedenceDfa = True
|
||||
precedenceState = DFAState(configs=ATNConfigSet())
|
||||
precedenceState.edges = []
|
||||
precedenceState.isAcceptState = False
|
||||
precedenceState.requiresFullContext = False
|
||||
self.s0 = precedenceState
|
||||
|
||||
|
||||
# Get the start state for a specific precedence value.
|
||||
#
|
||||
|
@ -111,7 +121,7 @@ class DFA(object):
|
|||
if self.precedenceDfa != precedenceDfa:
|
||||
self._states = dict()
|
||||
if precedenceDfa:
|
||||
precedenceState = DFAState(ATNConfigSet())
|
||||
precedenceState = DFAState(configs=ATNConfigSet())
|
||||
precedenceState.edges = []
|
||||
precedenceState.isAcceptState = False
|
||||
precedenceState.requiresFullContext = False
|
||||
|
|
|
@ -71,7 +71,7 @@ class ParserInterpreter(Parser):
|
|||
for state in atn.states:
|
||||
if not isinstance(state, StarLoopEntryState):
|
||||
continue
|
||||
if state.precedenceRuleDecision:
|
||||
if state.isPrecedenceDecision:
|
||||
self.pushRecursionContextStates.add(state.stateNumber)
|
||||
# get atn simulator that knows how to do predictions
|
||||
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)
|
||||
|
|
|
@ -368,7 +368,7 @@ class ATNDeserializer (object):
|
|||
|
||||
#
|
||||
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
|
||||
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the
|
||||
# the {@link StarLoopEntryState#isPrecedenceDecision} field to the
|
||||
# correct value.
|
||||
#
|
||||
# @param atn The ATN.
|
||||
|
@ -387,7 +387,7 @@ class ATNDeserializer (object):
|
|||
if isinstance(maybeLoopEndState, LoopEndState):
|
||||
if maybeLoopEndState.epsilonOnlyTransitions and \
|
||||
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
|
||||
state.precedenceRuleDecision = True
|
||||
state.isPrecedenceDecision = True
|
||||
|
||||
def verifyATN(self, atn:ATN):
|
||||
if not self.deserializationOptions.verifyATN:
|
||||
|
|
|
@ -37,7 +37,7 @@ from antlr4.dfa.DFAState import DFAState
|
|||
class ATNSimulator(object):
|
||||
|
||||
# Must distinguish between missing edge and edge we know leads nowhere#/
|
||||
ERROR = DFAState(ATNConfigSet())
|
||||
ERROR = DFAState(configs=ATNConfigSet())
|
||||
ERROR.stateNumber = 0x7FFFFFFF
|
||||
|
||||
# The context cache maps all PredictionContext objects that are ==
|
||||
|
|
|
@ -261,7 +261,7 @@ class StarLoopEntryState(DecisionState):
|
|||
self.stateType = self.STAR_LOOP_ENTRY
|
||||
self.loopBackState = None
|
||||
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
|
||||
self.precedenceRuleDecision = None
|
||||
self.isPrecedenceDecision = None
|
||||
|
||||
# Mark the end of a * or + loop.
|
||||
class LoopEndState(ATNState):
|
||||
|
|
|
@ -346,15 +346,6 @@ class ParserATNSimulator(ATNSimulator):
|
|||
" exec LA(1)==" + self.getLookaheadName(input) +
|
||||
", outerContext=" + outerContext.toString(self.parser.literalNames, None))
|
||||
|
||||
# If this is not a precedence DFA, we check the ATN start state
|
||||
# to determine if this ATN start state is the decision for the
|
||||
# closure block that determines whether a precedence rule
|
||||
# should continue or complete.
|
||||
#
|
||||
if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
|
||||
if dfa.atnStartState.precedenceRuleDecision:
|
||||
dfa.setPrecedenceDfa(True)
|
||||
|
||||
fullCtx = False
|
||||
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
|
||||
|
||||
|
@ -365,6 +356,7 @@ class ParserATNSimulator(ATNSimulator):
|
|||
# appropriate start state for the precedence level rather
|
||||
# than simply setting DFA.s0.
|
||||
#
|
||||
dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
|
||||
s0_closure = self.applyPrecedenceFilter(s0_closure)
|
||||
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
|
||||
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
|
||||
|
@ -1173,7 +1165,13 @@ class ParserATNSimulator(ATNSimulator):
|
|||
# make sure to not return here, because EOF transitions can act as
|
||||
# both epsilon transitions and non-epsilon transitions.
|
||||
|
||||
first = True
|
||||
for t in p.transitions:
|
||||
if first:
|
||||
first = False
|
||||
if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
|
||||
continue
|
||||
|
||||
continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
|
||||
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
|
||||
if c is not None:
|
||||
|
@ -1210,6 +1208,161 @@ class ParserATNSimulator(ATNSimulator):
|
|||
|
||||
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
|
||||
|
||||
|
||||
|
||||
# Implements first-edge (loop entry) elimination as an optimization
|
||||
# during closure operations. See antlr/antlr4#1398.
|
||||
#
|
||||
# The optimization is to avoid adding the loop entry config when
|
||||
# the exit path can only lead back to the same
|
||||
# StarLoopEntryState after popping context at the rule end state
|
||||
# (traversing only epsilon edges, so we're still in closure, in
|
||||
# this same rule).
|
||||
#
|
||||
# We need to detect any state that can reach loop entry on
|
||||
# epsilon w/o exiting rule. We don't have to look at FOLLOW
|
||||
# links, just ensure that all stack tops for config refer to key
|
||||
# states in LR rule.
|
||||
#
|
||||
# To verify we are in the right situation we must first check
|
||||
# closure is at a StarLoopEntryState generated during LR removal.
|
||||
# Then we check that each stack top of context is a return state
|
||||
# from one of these cases:
|
||||
#
|
||||
# 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
|
||||
# 2. expr op expr. The return state is the block end of internal block of (...)*
|
||||
# 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
|
||||
# That state points at block end of internal block of (...)*.
|
||||
# 4. expr '?' expr ':' expr. The return state points at block end,
|
||||
# which points at loop entry state.
|
||||
#
|
||||
# If any is true for each stack top, then closure does not add a
|
||||
# config to the current config set for edge[0], the loop entry branch.
|
||||
#
|
||||
# Conditions fail if any context for the current config is:
|
||||
#
|
||||
# a. empty (we'd fall out of expr to do a global FOLLOW which could
|
||||
# even be to some weird spot in expr) or,
|
||||
# b. lies outside of expr or,
|
||||
# c. lies within expr but at a state not the BlockEndState
|
||||
# generated during LR removal
|
||||
#
|
||||
# Do we need to evaluate predicates ever in closure for this case?
|
||||
#
|
||||
# No. Predicates, including precedence predicates, are only
|
||||
# evaluated when computing a DFA start state. I.e., only before
|
||||
# the lookahead (but not parser) consumes a token.
|
||||
#
|
||||
# There are no epsilon edges allowed in LR rule alt blocks or in
|
||||
# the "primary" part (ID here). If closure is in
|
||||
# StarLoopEntryState any lookahead operation will have consumed a
|
||||
# token as there are no epsilon-paths that lead to
|
||||
# StarLoopEntryState. We do not have to evaluate predicates
|
||||
# therefore if we are in the generated StarLoopEntryState of a LR
|
||||
# rule. Note that when making a prediction starting at that
|
||||
# decision point, decision d=2, compute-start-state performs
|
||||
# closure starting at edges[0], edges[1] emanating from
|
||||
# StarLoopEntryState. That means it is not performing closure on
|
||||
# StarLoopEntryState during compute-start-state.
|
||||
#
|
||||
# How do we know this always gives same prediction answer?
|
||||
#
|
||||
# Without predicates, loop entry and exit paths are ambiguous
|
||||
# upon remaining input +b (in, say, a+b). Either paths lead to
|
||||
# valid parses. Closure can lead to consuming + immediately or by
|
||||
# falling out of this call to expr back into expr and loop back
|
||||
# again to StarLoopEntryState to match +b. In this special case,
|
||||
# we choose the more efficient path, which is to take the bypass
|
||||
# path.
|
||||
#
|
||||
# The lookahead language has not changed because closure chooses
|
||||
# one path over the other. Both paths lead to consuming the same
|
||||
# remaining input during a lookahead operation. If the next token
|
||||
# is an operator, lookahead will enter the choice block with
|
||||
# operators. If it is not, lookahead will exit expr. Same as if
|
||||
# closure had chosen to enter the choice block immediately.
|
||||
#
|
||||
# Closure is examining one config (some loopentrystate, some alt,
|
||||
# context) which means it is considering exactly one alt. Closure
|
||||
# always copies the same alt to any derived configs.
|
||||
#
|
||||
# How do we know this optimization doesn't mess up precedence in
|
||||
# our parse trees?
|
||||
#
|
||||
# Looking through expr from left edge of stat only has to confirm
|
||||
# that an input, say, a+b+c; begins with any valid interpretation
|
||||
# of an expression. The precedence actually doesn't matter when
|
||||
# making a decision in stat seeing through expr. It is only when
|
||||
# parsing rule expr that we must use the precedence to get the
|
||||
# right interpretation and, hence, parse tree.
|
||||
#
|
||||
# @since 4.6
|
||||
#
|
||||
def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
|
||||
# return False
|
||||
p = config.state
|
||||
# First check to see if we are in StarLoopEntryState generated during
|
||||
# left-recursion elimination. For efficiency, also check if
|
||||
# the context has an empty stack case. If so, it would mean
|
||||
# global FOLLOW so we can't perform optimization
|
||||
# Are we the special loop entry/exit state? or SLL wildcard
|
||||
if p.stateType != ATNState.STAR_LOOP_ENTRY \
|
||||
or not p.isPrecedenceDecision \
|
||||
or config.context.isEmpty() \
|
||||
or config.context.hasEmptyPath():
|
||||
return False
|
||||
|
||||
# Require all return states to return back to the same rule
|
||||
# that p is in.
|
||||
numCtxs = len(config.context)
|
||||
for i in range(0, numCtxs): # for each stack context
|
||||
returnState = self.atn.states[config.context.getReturnState(i)]
|
||||
if returnState.ruleIndex != p.ruleIndex:
|
||||
return False
|
||||
|
||||
decisionStartState = p.transitions[0].target
|
||||
blockEndStateNum = decisionStartState.endState.stateNumber
|
||||
blockEndState = self.atn.states[blockEndStateNum]
|
||||
|
||||
# Verify that the top of each stack context leads to loop entry/exit
|
||||
# state through epsilon edges and w/o leaving rule.
|
||||
for i in range(0, numCtxs): # for each stack context
|
||||
returnStateNumber = config.context.getReturnState(i)
|
||||
returnState = self.atn.states[returnStateNumber]
|
||||
# all states must have single outgoing epsilon edge
|
||||
if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
|
||||
return False
|
||||
|
||||
# Look for prefix op case like 'not expr', (' type ')' expr
|
||||
returnStateTarget = returnState.transitions[0].target
|
||||
if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
|
||||
continue
|
||||
|
||||
# Look for 'expr op expr' or case where expr's return state is block end
|
||||
# of (...)* internal block; the block end points to loop back
|
||||
# which points to p but we don't need to check that
|
||||
if returnState is blockEndState:
|
||||
continue
|
||||
|
||||
# Look for ternary expr ? expr : expr. The return state points at block end,
|
||||
# which points at loop entry state
|
||||
if returnStateTarget is blockEndState:
|
||||
continue
|
||||
|
||||
# Look for complex prefix 'between expr and expr' case where 2nd expr's
|
||||
# return state points at block end state of (...)* internal block
|
||||
if returnStateTarget.stateType == ATNState.BLOCK_END \
|
||||
and len(returnStateTarget.transitions) == 1 \
|
||||
and returnStateTarget.transitions[0].isEpsilon \
|
||||
and returnStateTarget.transitions[0].target is p:
|
||||
continue
|
||||
|
||||
# anything else ain't conforming
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def getRuleName(self, index:int):
|
||||
if self.parser is not None and index>=0:
|
||||
return self.parser.ruleNames[index]
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
from antlr4.atn.ATNState import StarLoopEntryState
|
||||
|
||||
from antlr4.atn.ATNConfigSet import ATNConfigSet
|
||||
from antlr4.atn.ATNState import DecisionState
|
||||
|
@ -49,6 +50,15 @@ class DFA(object):
|
|||
# {@link #setPrecedenceDfa}.
|
||||
self.precedenceDfa = False
|
||||
|
||||
if isinstance(atnStartState, StarLoopEntryState):
|
||||
if atnStartState.isPrecedenceDecision:
|
||||
self.precedenceDfa = True
|
||||
precedenceState = DFAState(configs=ATNConfigSet())
|
||||
precedenceState.edges = []
|
||||
precedenceState.isAcceptState = False
|
||||
precedenceState.requiresFullContext = False
|
||||
self.s0 = precedenceState
|
||||
|
||||
|
||||
# Get the start state for a specific precedence value.
|
||||
#
|
||||
|
@ -112,7 +122,7 @@ class DFA(object):
|
|||
if self.precedenceDfa != precedenceDfa:
|
||||
self._states = dict()
|
||||
if precedenceDfa:
|
||||
precedenceState = DFAState(ATNConfigSet())
|
||||
precedenceState = DFAState(configs=ATNConfigSet())
|
||||
precedenceState.edges = []
|
||||
precedenceState.isAcceptState = False
|
||||
precedenceState.requiresFullContext = False
|
||||
|
|
Loading…
Reference in New Issue