implement LR optimisation in Python3 + align naming + DFAState ctor bugs

This commit is contained in:
Eric Vergnaud 2016-11-30 00:40:42 +08:00
parent ccde4051bd
commit 6e071f4950
6 changed files with 178 additions and 15 deletions

View File

@ -71,7 +71,7 @@ class ParserInterpreter(Parser):
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
if state.precedenceRuleDecision:
if state.isPrecedenceDecision:
self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)

View File

@ -368,7 +368,7 @@ class ATNDeserializer (object):
#
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the
# the {@link StarLoopEntryState#isPrecedenceDecision} field to the
# correct value.
#
# @param atn The ATN.
@ -387,7 +387,7 @@ class ATNDeserializer (object):
if isinstance(maybeLoopEndState, LoopEndState):
if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
state.precedenceRuleDecision = True
state.isPrecedenceDecision = True
def verifyATN(self, atn:ATN):
if not self.deserializationOptions.verifyATN:

View File

@ -37,7 +37,7 @@ from antlr4.dfa.DFAState import DFAState
class ATNSimulator(object):
# Must distinguish between missing edge and edge we know leads nowhere#/
ERROR = DFAState(ATNConfigSet())
ERROR = DFAState(configs=ATNConfigSet())
ERROR.stateNumber = 0x7FFFFFFF
# The context cache maps all PredictionContext objects that are ==

View File

@ -261,7 +261,7 @@ class StarLoopEntryState(DecisionState):
self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
self.precedenceRuleDecision = None
self.isPrecedenceDecision = None
# Mark the end of a * or + loop.
class LoopEndState(ATNState):

View File

@ -346,15 +346,6 @@ class ParserATNSimulator(ATNSimulator):
" exec LA(1)==" + self.getLookaheadName(input) +
", outerContext=" + outerContext.toString(self.parser.literalNames, None))
# If this is not a precedence DFA, we check the ATN start state
# to determine if this ATN start state is the decision for the
# closure block that determines whether a precedence rule
# should continue or complete.
#
if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
if dfa.atnStartState.precedenceRuleDecision:
dfa.setPrecedenceDfa(True)
fullCtx = False
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
@ -365,6 +356,7 @@ class ParserATNSimulator(ATNSimulator):
# appropriate start state for the precedence level rather
# than simply setting DFA.s0.
#
dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
s0_closure = self.applyPrecedenceFilter(s0_closure)
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
@ -1173,7 +1165,13 @@ class ParserATNSimulator(ATNSimulator):
# make sure to not return here, because EOF transitions can act as
# both epsilon transitions and non-epsilon transitions.
first = True
for t in p.transitions:
if first:
first = False
if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
continue
continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
if c is not None:
@ -1210,6 +1208,161 @@ class ParserATNSimulator(ATNSimulator):
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
# Implements first-edge (loop entry) elimination as an optimization
# during closure operations. See antlr/antlr4#1398.
#
# The optimization is to avoid adding the loop entry config when
# the exit path can only lead back to the same
# StarLoopEntryState after popping context at the rule end state
# (traversing only epsilon edges, so we're still in closure, in
# this same rule).
#
# We need to detect any state that can reach loop entry on
# epsilon w/o exiting rule. We don't have to look at FOLLOW
# links, just ensure that all stack tops for config refer to key
# states in LR rule.
#
# To verify we are in the right situation we must first check
# closure is at a StarLoopEntryState generated during LR removal.
# Then we check that each stack top of context is a return state
# from one of these cases:
#
# 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
# 2. expr op expr. The return state is the block end of internal block of (...)*
# 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
# That state points at block end of internal block of (...)*.
# 4. expr '?' expr ':' expr. The return state points at block end,
# which points at loop entry state.
#
# If any is true for each stack top, then closure does not add a
# config to the current config set for edge[0], the loop entry branch.
#
# Conditions fail if any context for the current config is:
#
# a. empty (we'd fall out of expr to do a global FOLLOW which could
# even be to some weird spot in expr) or,
# b. lies outside of expr or,
# c. lies within expr but at a state not the BlockEndState
# generated during LR removal
#
# Do we need to evaluate predicates ever in closure for this case?
#
# No. Predicates, including precedence predicates, are only
# evaluated when computing a DFA start state. I.e., only before
# the lookahead (but not parser) consumes a token.
#
# There are no epsilon edges allowed in LR rule alt blocks or in
# the "primary" part (ID here). If closure is in
# StarLoopEntryState any lookahead operation will have consumed a
# token as there are no epsilon-paths that lead to
# StarLoopEntryState. We do not have to evaluate predicates
# therefore if we are in the generated StarLoopEntryState of a LR
# rule. Note that when making a prediction starting at that
# decision point, decision d=2, compute-start-state performs
# closure starting at edges[0], edges[1] emanating from
# StarLoopEntryState. That means it is not performing closure on
# StarLoopEntryState during compute-start-state.
#
# How do we know this always gives same prediction answer?
#
# Without predicates, loop entry and exit paths are ambiguous
# upon remaining input +b (in, say, a+b). Either paths lead to
# valid parses. Closure can lead to consuming + immediately or by
# falling out of this call to expr back into expr and loop back
# again to StarLoopEntryState to match +b. In this special case,
# we choose the more efficient path, which is to take the bypass
# path.
#
# The lookahead language has not changed because closure chooses
# one path over the other. Both paths lead to consuming the same
# remaining input during a lookahead operation. If the next token
# is an operator, lookahead will enter the choice block with
# operators. If it is not, lookahead will exit expr. Same as if
# closure had chosen to enter the choice block immediately.
#
# Closure is examining one config (some loopentrystate, some alt,
# context) which means it is considering exactly one alt. Closure
# always copies the same alt to any derived configs.
#
# How do we know this optimization doesn't mess up precedence in
# our parse trees?
#
# Looking through expr from left edge of stat only has to confirm
# that an input, say, a+b+c; begins with any valid interpretation
# of an expression. The precedence actually doesn't matter when
# making a decision in stat seeing through expr. It is only when
# parsing rule expr that we must use the precedence to get the
# right interpretation and, hence, parse tree.
#
# @since 4.6
#
def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
# return False
p = config.state
# First check to see if we are in StarLoopEntryState generated during
# left-recursion elimination. For efficiency, also check if
# the context has an empty stack case. If so, it would mean
# global FOLLOW so we can't perform optimization
# Are we the special loop entry/exit state? or SLL wildcard
if p.stateType != ATNState.STAR_LOOP_ENTRY \
or not p.isPrecedenceDecision \
or config.context.isEmpty() \
or config.context.hasEmptyPath():
return False
# Require all return states to return back to the same rule
# that p is in.
numCtxs = len(config.context)
for i in range(0, numCtxs): # for each stack context
returnState = self.atn.states[config.context.getReturnState(i)]
if returnState.ruleIndex != p.ruleIndex:
return False
decisionStartState = p.transitions[0].target
blockEndStateNum = decisionStartState.endState.stateNumber
blockEndState = self.atn.states[blockEndStateNum]
# Verify that the top of each stack context leads to loop entry/exit
# state through epsilon edges and w/o leaving rule.
for i in range(0, numCtxs): # for each stack context
returnStateNumber = config.context.getReturnState(i)
returnState = self.atn.states[returnStateNumber]
# all states must have single outgoing epsilon edge
if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
return False
# Look for prefix op case like 'not expr', (' type ')' expr
returnStateTarget = returnState.transitions[0].target
if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
continue
# Look for 'expr op expr' or case where expr's return state is block end
# of (...)* internal block; the block end points to loop back
# which points to p but we don't need to check that
if returnState is blockEndState:
continue
# Look for ternary expr ? expr : expr. The return state points at block end,
# which points at loop entry state
if returnStateTarget is blockEndState:
continue
# Look for complex prefix 'between expr and expr' case where 2nd expr's
# return state points at block end state of (...)* internal block
if returnStateTarget.stateType == ATNState.BLOCK_END \
and len(returnStateTarget.transitions) == 1 \
and returnStateTarget.transitions[0].isEpsilon \
and returnStateTarget.transitions[0].target is p:
continue
# anything else ain't conforming
return False
return True
def getRuleName(self, index:int):
if self.parser is not None and index>=0:
return self.parser.ruleNames[index]

View File

@ -27,6 +27,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from antlr4.atn.ATNState import StarLoopEntryState
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNState import DecisionState
@ -49,6 +50,15 @@ class DFA(object):
# {@link #setPrecedenceDfa}.
self.precedenceDfa = False
if isinstance(atnStartState, StarLoopEntryState):
if atnStartState.isPrecedenceDecision:
self.precedenceDfa = True
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False
self.s0 = precedenceState
# Get the start state for a specific precedence value.
#
@ -112,7 +122,7 @@ class DFA(object):
if self.precedenceDfa != precedenceDfa:
self._states = dict()
if precedenceDfa:
precedenceState = DFAState(ATNConfigSet())
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False