start migrating

This commit is contained in:
Eric Vergnaud 2016-11-28 23:05:43 +08:00
parent 66ea9891aa
commit 3026c3cc94
1 changed files with 180 additions and 1 deletions

View File

@ -266,7 +266,7 @@ from antlr4.atn.ATN import ATN
from antlr4.atn.ATNConfig import ATNConfig from antlr4.atn.ATNConfig import ATNConfig
from antlr4.atn.ATNConfigSet import ATNConfigSet from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNSimulator import ATNSimulator from antlr4.atn.ATNSimulator import ATNSimulator
from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState, ATNState
from antlr4.atn.PredictionMode import PredictionMode from antlr4.atn.PredictionMode import PredictionMode
from antlr4.atn.SemanticContext import SemanticContext, AND, andContext, orContext from antlr4.atn.SemanticContext import SemanticContext, AND, andContext, orContext
from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition
@ -1168,7 +1168,11 @@ class ParserATNSimulator(ATNSimulator):
# make sure to not return here, because EOF transitions can act as # make sure to not return here, because EOF transitions can act as
# both epsilon transitions and non-epsilon transitions. # both epsilon transitions and non-epsilon transitions.
first = True
for t in p.transitions: for t in p.transitions:
if first and self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
continue
continueCollecting = collectPredicates and not isinstance(t, ActionTransition) continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon) c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
if c is not None: if c is not None:
@ -1205,6 +1209,181 @@ class ParserATNSimulator(ATNSimulator):
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon) self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
# Implements first-edge (loop entry) elimination as an optimization
# during closure operations. See antlr/antlr4#1398.
#
# The optimization is to avoid adding the loop entry config when
# the exit path can only lead back to the same
# StarLoopEntryState after popping context at the rule end state
# (traversing only epsilon edges, so we're still in closure, in
# this same rule).
#
# We need to detect any state that can reach loop entry on
# epsilon w/o exiting rule. We don't have to look at FOLLOW
# links, just ensure that all stack tops for config refer to key
# states in LR rule.
#
# To verify we are in the right situation we must first check
# closure is at a StarLoopEntryState generated during LR removal.
# Then we check that each stack top of context is a return state
# from one of these cases:
#
# 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
# 2. expr op expr. The return state is the block end of internal block of (...)*
# 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
# That state points at block end of internal block of (...)*.
# 4. expr '?' expr ':' expr. The return state points at block end,
# which points at loop entry state.
#
# If any is true for each stack top, then closure does not add a
# config to the current config set for edge[0], the loop entry branch.
#
# Conditions fail if any context for the current config is:
#
# a. empty (we'd fall out of expr to do a global FOLLOW which could
# even be to some weird spot in expr) or,
# b. lies outside of expr or,
# c. lies within expr but at a state not the BlockEndState
# generated during LR removal
#
# Do we need to evaluate predicates ever in closure for this case?
#
# No. Predicates, including precedence predicates, are only
# evaluated when computing a DFA start state. I.e., only before
# the lookahead (but not parser) consumes a token.
#
# There are no epsilon edges allowed in LR rule alt blocks or in
# the "primary" part (ID here). If closure is in
# StarLoopEntryState any lookahead operation will have consumed a
# token as there are no epsilon-paths that lead to
# StarLoopEntryState. We do not have to evaluate predicates
# therefore if we are in the generated StarLoopEntryState of a LR
# rule. Note that when making a prediction starting at that
# decision point, decision d=2, compute-start-state performs
# closure starting at edges[0], edges[1] emanating from
# StarLoopEntryState. That means it is not performing closure on
# StarLoopEntryState during compute-start-state.
#
# How do we know this always gives same prediction answer?
#
# Without predicates, loop entry and exit paths are ambiguous
# upon remaining input +b (in, say, a+b). Either paths lead to
# valid parses. Closure can lead to consuming + immediately or by
# falling out of this call to expr back into expr and loop back
# again to StarLoopEntryState to match +b. In this special case,
# we choose the more efficient path, which is to take the bypass
# path.
#
# The lookahead language has not changed because closure chooses
# one path over the other. Both paths lead to consuming the same
# remaining input during a lookahead operation. If the next token
# is an operator, lookahead will enter the choice block with
# operators. If it is not, lookahead will exit expr. Same as if
# closure had chosen to enter the choice block immediately.
#
# Closure is examining one config (some loopentrystate, some alt,
# context) which means it is considering exactly one alt. Closure
# always copies the same alt to any derived configs.
#
# How do we know this optimization doesn't mess up precedence in
# our parse trees?
#
# Looking through expr from left edge of stat only has to confirm
# that an input, say, a+b+c; begins with any valid interpretation
# of an expression. The precedence actually doesn't matter when
# making a decision in stat seeing through expr. It is only when
# parsing rule expr that we must use the precedence to get the
# right interpretation and, hence, parse tree.
#
# @since 4.6
#
def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
p = config.state
# First check to see if we are in StarLoopEntryState generated during
# left-recursion elimination. For efficiency, also check if
# the context has an empty stack case. If so, it would mean
# global FOLLOW so we can't perform optimization
# Are we the special loop entry/exit state? or SLL wildcard
if p.getStateType() != ATNState.STAR_LOOP_ENTRY \
or not p.isPrecedenceDecision \
or config.context.isEmpty() \
or config.context.hasEmptyPath():
return False
# Require all return states to return back to the same rule
# that p is in.
numCtxs = len(config.context)
for i in range(0, numCtxs): # for each stack context
returnState = atn.states.get(config.context.getReturnState(i));
if (returnState.ruleIndex != p.ruleIndex) return false;
}
BlockStartState
decisionStartState = (BlockStartState)
p.transition(0).target;
int
blockEndStateNum = decisionStartState.endState.stateNumber;
BlockEndState
blockEndState = (BlockEndState)
atn.states.get(blockEndStateNum);
# Verify that the top of each stack context leads to loop entry/exit
# state through epsilon edges and w/o leaving rule.
for (int
i = 0;
i < numCtxs;
i + +) { # for each stack context
int
returnStateNumber = config.context.getReturnState(i);
ATNState
returnState = atn.states.get(returnStateNumber);
# all states must have single outgoing epsilon edge
if (returnState.getNumberOfTransitions() != 1 | |
!returnState.transition(0).isEpsilon() )
{
return false;
}
# Look for prefix op case like 'not expr', (' type ')' expr
ATNState
returnStateTarget = returnState.transition(0).target;
if (returnState.getStateType() == BLOCK_END & & returnStateTarget == p)
{
continue;
}
# Look for 'expr op expr' or case where expr's return state is block end
# of (...)* internal block; the block end points to loop back
# which points to p but we don't need to check that
if (returnState == blockEndState) {
continue;
}
# Look for ternary expr ? expr : expr. The return state points at block end,
# which points at loop entry state
if (returnStateTarget == blockEndState) {
continue;
}
# Look for complex prefix 'between expr and expr' case where 2nd expr's
# return state points at block end state of (...)* internal block
if (returnStateTarget.getStateType() == BLOCK_END & &
returnStateTarget.getNumberOfTransitions() == 1 & &
returnStateTarget.transition(0).isEpsilon() & &
returnStateTarget.transition(0).target == p )
{
continue;
}
# anything else ain't conforming
return false;
}
return true;
}
def getRuleName(self, index): def getRuleName(self, index):
if self.parser is not None and index>=0: if self.parser is not None and index>=0:
return self.parser.ruleNames[index] return self.parser.ruleNames[index]