start migrating

2016-11-28 23:05:43 +08:00 · 2016-11-28 23:05:43 +08:00 · 3026c3cc94
parent 66ea9891aa
commit 3026c3cc94
1 changed files with 180 additions and 1 deletions
--- a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py
+++ b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py
@ -266,7 +266,7 @@ from antlr4.atn.ATN import ATN
 from antlr4.atn.ATNConfig import ATNConfig
 from antlr4.atn.ATNConfigSet import ATNConfigSet
 from antlr4.atn.ATNSimulator import ATNSimulator
-from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState
+from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState, ATNState
 from antlr4.atn.PredictionMode import PredictionMode
 from antlr4.atn.SemanticContext import SemanticContext, AND, andContext, orContext
 from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition
@ -1168,7 +1168,11 @@ class ParserATNSimulator(ATNSimulator):
            # make sure to not return here, because EOF transitions can act as
            # both epsilon transitions and non-epsilon transitions.

+        first = True
        for t in p.transitions:
+            if first and self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
+                continue
+
            continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
            c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
            if c is not None:
@ -1205,6 +1209,181 @@ class ParserATNSimulator(ATNSimulator):

                self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)

+
+
+    # Implements first-edge (loop entry) elimination as an optimization
+    #  during closure operations.  See antlr/antlr4#1398.
+    #
+    # The optimization is to avoid adding the loop entry config when
+    # the exit path can only lead back to the same
+    # StarLoopEntryState after popping context at the rule end state
+    # (traversing only epsilon edges, so we're still in closure, in
+    # this same rule).
+    #
+    # We need to detect any state that can reach loop entry on
+    # epsilon w/o exiting rule. We don't have to look at FOLLOW
+    # links, just ensure that all stack tops for config refer to key
+    # states in LR rule.
+    #
+    # To verify we are in the right situation we must first check
+    # closure is at a StarLoopEntryState generated during LR removal.
+    # Then we check that each stack top of context is a return state
+    # from one of these cases:
+    #
+    #   1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
+    #   2. expr op expr. The return state is the block end of internal block of (...)*
+    #   3. 'between' expr 'and' expr. The return state of 2nd expr reference.
+    #      That state points at block end of internal block of (...)*.
+    #   4. expr '?' expr ':' expr. The return state points at block end,
+    #      which points at loop entry state.
+    #
+    # If any is true for each stack top, then closure does not add a
+    # config to the current config set for edge[0], the loop entry branch.
+    #
+    #  Conditions fail if any context for the current config is:
+    #
+    #   a. empty (we'd fall out of expr to do a global FOLLOW which could
+    #      even be to some weird spot in expr) or,
+    #   b. lies outside of expr or,
+    #   c. lies within expr but at a state not the BlockEndState
+    #   generated during LR removal
+    #
+    # Do we need to evaluate predicates ever in closure for this case?
+    #
+    # No. Predicates, including precedence predicates, are only
+    # evaluated when computing a DFA start state. I.e., only before
+    # the lookahead (but not parser) consumes a token.
+    #
+    # There are no epsilon edges allowed in LR rule alt blocks or in
+    # the "primary" part (ID here). If closure is in
+    # StarLoopEntryState any lookahead operation will have consumed a
+    # token as there are no epsilon-paths that lead to
+    # StarLoopEntryState. We do not have to evaluate predicates
+    # therefore if we are in the generated StarLoopEntryState of a LR
+    # rule. Note that when making a prediction starting at that
+    # decision point, decision d=2, compute-start-state performs
+    # closure starting at edges[0], edges[1] emanating from
+    # StarLoopEntryState. That means it is not performing closure on
+    # StarLoopEntryState during compute-start-state.
+    #
+    # How do we know this always gives same prediction answer?
+    #
+    # Without predicates, loop entry and exit paths are ambiguous
+    # upon remaining input +b (in, say, a+b). Either paths lead to
+    # valid parses. Closure can lead to consuming + immediately or by
+    # falling out of this call to expr back into expr and loop back
+    # again to StarLoopEntryState to match +b. In this special case,
+    # we choose the more efficient path, which is to take the bypass
+    # path.
+    #
+    # The lookahead language has not changed because closure chooses
+    # one path over the other. Both paths lead to consuming the same
+    # remaining input during a lookahead operation. If the next token
+    # is an operator, lookahead will enter the choice block with
+    # operators. If it is not, lookahead will exit expr. Same as if
+    # closure had chosen to enter the choice block immediately.
+    #
+    # Closure is examining one config (some loopentrystate, some alt,
+    # context) which means it is considering exactly one alt. Closure
+    # always copies the same alt to any derived configs.
+    #
+    # How do we know this optimization doesn't mess up precedence in
+    # our parse trees?
+    #
+    # Looking through expr from left edge of stat only has to confirm
+    # that an input, say, a+b+c; begins with any valid interpretation
+    # of an expression. The precedence actually doesn't matter when
+    # making a decision in stat seeing through expr. It is only when
+    # parsing rule expr that we must use the precedence to get the
+    # right interpretation and, hence, parse tree.
+    #
+    # @since 4.6
+    #
+    def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
+        p = config.state
+        # First check to see if we are in StarLoopEntryState generated during
+        # left-recursion elimination. For efficiency, also check if
+        # the context has an empty stack case. If so, it would mean
+        # global FOLLOW so we can't perform optimization
+        # Are we the special loop entry/exit state? or SLL wildcard
+        if p.getStateType() != ATNState.STAR_LOOP_ENTRY \
+                or not p.isPrecedenceDecision \
+                or config.context.isEmpty()   \
+                or config.context.hasEmptyPath():
+            return False
+
+        # Require all return states to return back to the same rule
+        # that p is in.
+        numCtxs = len(config.context)
+        for i in range(0, numCtxs):  # for each stack context
+            returnState = atn.states.get(config.context.getReturnState(i));
+    if (returnState.ruleIndex != p.ruleIndex) return false;
+
+
+
+}
+
+BlockStartState
+decisionStartState = (BlockStartState)
+p.transition(0).target;
+int
+blockEndStateNum = decisionStartState.endState.stateNumber;
+BlockEndState
+blockEndState = (BlockEndState)
+atn.states.get(blockEndStateNum);
+
+# Verify that the top of each stack context leads to loop entry/exit
+# state through epsilon edges and w/o leaving rule.
+for (int
+i = 0;
+i < numCtxs;
+i + +) {  # for each stack context
+int
+returnStateNumber = config.context.getReturnState(i);
+ATNState
+returnState = atn.states.get(returnStateNumber);
+# all states must have single outgoing epsilon edge
+if (returnState.getNumberOfTransitions() != 1 | |
+!returnState.transition(0).isEpsilon() )
+{
+return false;
+}
+# Look for prefix op case like 'not expr', (' type ')' expr
+ATNState
+returnStateTarget = returnState.transition(0).target;
+if (returnState.getStateType() == BLOCK_END & & returnStateTarget == p)
+{
+continue;
+}
+# Look for 'expr op expr' or case where expr's return state is block end
+# of (...)* internal block; the block end points to loop back
+# which points to p but we don't need to check that
+if (returnState == blockEndState) {
+continue;
+}
+# Look for ternary expr ? expr : expr. The return state points at block end,
+# which points at loop entry state
+if (returnStateTarget == blockEndState) {
+continue;
+}
+# Look for complex prefix 'between expr and expr' case where 2nd expr's
+# return state points at block end state of (...)* internal block
+if (returnStateTarget.getStateType() == BLOCK_END & &
+    returnStateTarget.getNumberOfTransitions() == 1 & &
+    returnStateTarget.transition(0).isEpsilon() & &
+    returnStateTarget.transition(0).target == p )
+    {
+continue;
+}
+
+# anything else ain't conforming
+return false;
+}
+
+return true;
+}
+
+
    def getRuleName(self, index):
        if self.parser is not None and index>=0:
            return self.parser.ruleNames[index]