implement LR optimisation in Python3 + align naming + DFAState ctor bugs

This commit is contained in:
Eric Vergnaud 2016-11-30 00:40:42 +08:00
parent ccde4051bd
commit 6e071f4950
6 changed files with 178 additions and 15 deletions

View File

@ -71,7 +71,7 @@ class ParserInterpreter(Parser):
for state in atn.states: for state in atn.states:
if not isinstance(state, StarLoopEntryState): if not isinstance(state, StarLoopEntryState):
continue continue
if state.precedenceRuleDecision: if state.isPrecedenceDecision:
self.pushRecursionContextStates.add(state.stateNumber) self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions # get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache) self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)

View File

@ -368,7 +368,7 @@ class ATNDeserializer (object):
# #
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set # Analyze the {@link StarLoopEntryState} states in the specified ATN to set
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the # the {@link StarLoopEntryState#isPrecedenceDecision} field to the
# correct value. # correct value.
# #
# @param atn The ATN. # @param atn The ATN.
@ -387,7 +387,7 @@ class ATNDeserializer (object):
if isinstance(maybeLoopEndState, LoopEndState): if isinstance(maybeLoopEndState, LoopEndState):
if maybeLoopEndState.epsilonOnlyTransitions and \ if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState): isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
state.precedenceRuleDecision = True state.isPrecedenceDecision = True
def verifyATN(self, atn:ATN): def verifyATN(self, atn:ATN):
if not self.deserializationOptions.verifyATN: if not self.deserializationOptions.verifyATN:

View File

@ -37,7 +37,7 @@ from antlr4.dfa.DFAState import DFAState
class ATNSimulator(object): class ATNSimulator(object):
# Must distinguish between missing edge and edge we know leads nowhere#/ # Must distinguish between missing edge and edge we know leads nowhere#/
ERROR = DFAState(ATNConfigSet()) ERROR = DFAState(configs=ATNConfigSet())
ERROR.stateNumber = 0x7FFFFFFF ERROR.stateNumber = 0x7FFFFFFF
# The context cache maps all PredictionContext objects that are == # The context cache maps all PredictionContext objects that are ==

View File

@ -261,7 +261,7 @@ class StarLoopEntryState(DecisionState):
self.stateType = self.STAR_LOOP_ENTRY self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making. # Indicates whether this state can benefit from a precedence DFA during SLL decision making.
self.precedenceRuleDecision = None self.isPrecedenceDecision = None
# Mark the end of a * or + loop. # Mark the end of a * or + loop.
class LoopEndState(ATNState): class LoopEndState(ATNState):

View File

@ -346,15 +346,6 @@ class ParserATNSimulator(ATNSimulator):
" exec LA(1)==" + self.getLookaheadName(input) + " exec LA(1)==" + self.getLookaheadName(input) +
", outerContext=" + outerContext.toString(self.parser.literalNames, None)) ", outerContext=" + outerContext.toString(self.parser.literalNames, None))
# If this is not a precedence DFA, we check the ATN start state
# to determine if this ATN start state is the decision for the
# closure block that determines whether a precedence rule
# should continue or complete.
#
if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
if dfa.atnStartState.precedenceRuleDecision:
dfa.setPrecedenceDfa(True)
fullCtx = False fullCtx = False
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx) s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
@ -365,6 +356,7 @@ class ParserATNSimulator(ATNSimulator):
# appropriate start state for the precedence level rather # appropriate start state for the precedence level rather
# than simply setting DFA.s0. # than simply setting DFA.s0.
# #
dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
s0_closure = self.applyPrecedenceFilter(s0_closure) s0_closure = self.applyPrecedenceFilter(s0_closure)
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure)) s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0) dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
@ -1173,7 +1165,13 @@ class ParserATNSimulator(ATNSimulator):
# make sure to not return here, because EOF transitions can act as # make sure to not return here, because EOF transitions can act as
# both epsilon transitions and non-epsilon transitions. # both epsilon transitions and non-epsilon transitions.
first = True
for t in p.transitions: for t in p.transitions:
if first:
first = False
if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
continue
continueCollecting = collectPredicates and not isinstance(t, ActionTransition) continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon) c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
if c is not None: if c is not None:
@ -1210,6 +1208,161 @@ class ParserATNSimulator(ATNSimulator):
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon) self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
# Implements first-edge (loop entry) elimination as an optimization
# during closure operations. See antlr/antlr4#1398.
#
# The optimization is to avoid adding the loop entry config when
# the exit path can only lead back to the same
# StarLoopEntryState after popping context at the rule end state
# (traversing only epsilon edges, so we're still in closure, in
# this same rule).
#
# We need to detect any state that can reach loop entry on
# epsilon w/o exiting rule. We don't have to look at FOLLOW
# links, just ensure that all stack tops for config refer to key
# states in LR rule.
#
# To verify we are in the right situation we must first check
# closure is at a StarLoopEntryState generated during LR removal.
# Then we check that each stack top of context is a return state
# from one of these cases:
#
# 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
# 2. expr op expr. The return state is the block end of internal block of (...)*
# 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
# That state points at block end of internal block of (...)*.
# 4. expr '?' expr ':' expr. The return state points at block end,
# which points at loop entry state.
#
# If any is true for each stack top, then closure does not add a
# config to the current config set for edge[0], the loop entry branch.
#
# Conditions fail if any context for the current config is:
#
# a. empty (we'd fall out of expr to do a global FOLLOW which could
# even be to some weird spot in expr) or,
# b. lies outside of expr or,
# c. lies within expr but at a state not the BlockEndState
# generated during LR removal
#
# Do we need to evaluate predicates ever in closure for this case?
#
# No. Predicates, including precedence predicates, are only
# evaluated when computing a DFA start state. I.e., only before
# the lookahead (but not parser) consumes a token.
#
# There are no epsilon edges allowed in LR rule alt blocks or in
# the "primary" part (ID here). If closure is in
# StarLoopEntryState any lookahead operation will have consumed a
# token as there are no epsilon-paths that lead to
# StarLoopEntryState. We do not have to evaluate predicates
# therefore if we are in the generated StarLoopEntryState of a LR
# rule. Note that when making a prediction starting at that
# decision point, decision d=2, compute-start-state performs
# closure starting at edges[0], edges[1] emanating from
# StarLoopEntryState. That means it is not performing closure on
# StarLoopEntryState during compute-start-state.
#
# How do we know this always gives same prediction answer?
#
# Without predicates, loop entry and exit paths are ambiguous
# upon remaining input +b (in, say, a+b). Either paths lead to
# valid parses. Closure can lead to consuming + immediately or by
# falling out of this call to expr back into expr and loop back
# again to StarLoopEntryState to match +b. In this special case,
# we choose the more efficient path, which is to take the bypass
# path.
#
# The lookahead language has not changed because closure chooses
# one path over the other. Both paths lead to consuming the same
# remaining input during a lookahead operation. If the next token
# is an operator, lookahead will enter the choice block with
# operators. If it is not, lookahead will exit expr. Same as if
# closure had chosen to enter the choice block immediately.
#
# Closure is examining one config (some loopentrystate, some alt,
# context) which means it is considering exactly one alt. Closure
# always copies the same alt to any derived configs.
#
# How do we know this optimization doesn't mess up precedence in
# our parse trees?
#
# Looking through expr from left edge of stat only has to confirm
# that an input, say, a+b+c; begins with any valid interpretation
# of an expression. The precedence actually doesn't matter when
# making a decision in stat seeing through expr. It is only when
# parsing rule expr that we must use the precedence to get the
# right interpretation and, hence, parse tree.
#
# @since 4.6
#
def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
# return False
p = config.state
# First check to see if we are in StarLoopEntryState generated during
# left-recursion elimination. For efficiency, also check if
# the context has an empty stack case. If so, it would mean
# global FOLLOW so we can't perform optimization
# Are we the special loop entry/exit state? or SLL wildcard
if p.stateType != ATNState.STAR_LOOP_ENTRY \
or not p.isPrecedenceDecision \
or config.context.isEmpty() \
or config.context.hasEmptyPath():
return False
# Require all return states to return back to the same rule
# that p is in.
numCtxs = len(config.context)
for i in range(0, numCtxs): # for each stack context
returnState = self.atn.states[config.context.getReturnState(i)]
if returnState.ruleIndex != p.ruleIndex:
return False
decisionStartState = p.transitions[0].target
blockEndStateNum = decisionStartState.endState.stateNumber
blockEndState = self.atn.states[blockEndStateNum]
# Verify that the top of each stack context leads to loop entry/exit
# state through epsilon edges and w/o leaving rule.
for i in range(0, numCtxs): # for each stack context
returnStateNumber = config.context.getReturnState(i)
returnState = self.atn.states[returnStateNumber]
# all states must have single outgoing epsilon edge
if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
return False
# Look for prefix op case like 'not expr', (' type ')' expr
returnStateTarget = returnState.transitions[0].target
if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
continue
# Look for 'expr op expr' or case where expr's return state is block end
# of (...)* internal block; the block end points to loop back
# which points to p but we don't need to check that
if returnState is blockEndState:
continue
# Look for ternary expr ? expr : expr. The return state points at block end,
# which points at loop entry state
if returnStateTarget is blockEndState:
continue
# Look for complex prefix 'between expr and expr' case where 2nd expr's
# return state points at block end state of (...)* internal block
if returnStateTarget.stateType == ATNState.BLOCK_END \
and len(returnStateTarget.transitions) == 1 \
and returnStateTarget.transitions[0].isEpsilon \
and returnStateTarget.transitions[0].target is p:
continue
# anything else ain't conforming
return False
return True
def getRuleName(self, index:int): def getRuleName(self, index:int):
if self.parser is not None and index>=0: if self.parser is not None and index>=0:
return self.parser.ruleNames[index] return self.parser.ruleNames[index]

View File

@ -27,6 +27,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from antlr4.atn.ATNState import StarLoopEntryState
from antlr4.atn.ATNConfigSet import ATNConfigSet from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNState import DecisionState from antlr4.atn.ATNState import DecisionState
@ -49,6 +50,15 @@ class DFA(object):
# {@link #setPrecedenceDfa}. # {@link #setPrecedenceDfa}.
self.precedenceDfa = False self.precedenceDfa = False
if isinstance(atnStartState, StarLoopEntryState):
if atnStartState.isPrecedenceDecision:
self.precedenceDfa = True
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False
self.s0 = precedenceState
# Get the start state for a specific precedence value. # Get the start state for a specific precedence value.
# #
@ -112,7 +122,7 @@ class DFA(object):
if self.precedenceDfa != precedenceDfa: if self.precedenceDfa != precedenceDfa:
self._states = dict() self._states = dict()
if precedenceDfa: if precedenceDfa:
precedenceState = DFAState(ATNConfigSet()) precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = [] precedenceState.edges = []
precedenceState.isAcceptState = False precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False precedenceState.requiresFullContext = False