diff --git a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py index ee18bad5e..17e3722dd 100755 --- a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py +++ b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py @@ -266,7 +266,7 @@ from antlr4.atn.ATN import ATN from antlr4.atn.ATNConfig import ATNConfig from antlr4.atn.ATNConfigSet import ATNConfigSet from antlr4.atn.ATNSimulator import ATNSimulator -from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState +from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState, ATNState from antlr4.atn.PredictionMode import PredictionMode from antlr4.atn.SemanticContext import SemanticContext, AND, andContext, orContext from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition @@ -1168,7 +1168,11 @@ class ParserATNSimulator(ATNSimulator): # make sure to not return here, because EOF transitions can act as # both epsilon transitions and non-epsilon transitions. + first = True for t in p.transitions: + if first and self.canDropLoopEntryEdgeInLeftRecursiveRule(config): + continue + continueCollecting = collectPredicates and not isinstance(t, ActionTransition) c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon) if c is not None: @@ -1205,6 +1209,181 @@ class ParserATNSimulator(ATNSimulator): self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon) + + + # Implements first-edge (loop entry) elimination as an optimization + # during closure operations. See antlr/antlr4#1398. + # + # The optimization is to avoid adding the loop entry config when + # the exit path can only lead back to the same + # StarLoopEntryState after popping context at the rule end state + # (traversing only epsilon edges, so we're still in closure, in + # this same rule). + # + # We need to detect any state that can reach loop entry on + # epsilon w/o exiting rule. We don't have to look at FOLLOW + # links, just ensure that all stack tops for config refer to key + # states in LR rule. + # + # To verify we are in the right situation we must first check + # closure is at a StarLoopEntryState generated during LR removal. + # Then we check that each stack top of context is a return state + # from one of these cases: + # + # 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + # 2. expr op expr. The return state is the block end of internal block of (...)* + # 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + # That state points at block end of internal block of (...)*. + # 4. expr '?' expr ':' expr. The return state points at block end, + # which points at loop entry state. + # + # If any is true for each stack top, then closure does not add a + # config to the current config set for edge[0], the loop entry branch. + # + # Conditions fail if any context for the current config is: + # + # a. empty (we'd fall out of expr to do a global FOLLOW which could + # even be to some weird spot in expr) or, + # b. lies outside of expr or, + # c. lies within expr but at a state not the BlockEndState + # generated during LR removal + # + # Do we need to evaluate predicates ever in closure for this case? + # + # No. Predicates, including precedence predicates, are only + # evaluated when computing a DFA start state. I.e., only before + # the lookahead (but not parser) consumes a token. + # + # There are no epsilon edges allowed in LR rule alt blocks or in + # the "primary" part (ID here). If closure is in + # StarLoopEntryState any lookahead operation will have consumed a + # token as there are no epsilon-paths that lead to + # StarLoopEntryState. We do not have to evaluate predicates + # therefore if we are in the generated StarLoopEntryState of a LR + # rule. Note that when making a prediction starting at that + # decision point, decision d=2, compute-start-state performs + # closure starting at edges[0], edges[1] emanating from + # StarLoopEntryState. That means it is not performing closure on + # StarLoopEntryState during compute-start-state. + # + # How do we know this always gives same prediction answer? + # + # Without predicates, loop entry and exit paths are ambiguous + # upon remaining input +b (in, say, a+b). Either paths lead to + # valid parses. Closure can lead to consuming + immediately or by + # falling out of this call to expr back into expr and loop back + # again to StarLoopEntryState to match +b. In this special case, + # we choose the more efficient path, which is to take the bypass + # path. + # + # The lookahead language has not changed because closure chooses + # one path over the other. Both paths lead to consuming the same + # remaining input during a lookahead operation. If the next token + # is an operator, lookahead will enter the choice block with + # operators. If it is not, lookahead will exit expr. Same as if + # closure had chosen to enter the choice block immediately. + # + # Closure is examining one config (some loopentrystate, some alt, + # context) which means it is considering exactly one alt. Closure + # always copies the same alt to any derived configs. + # + # How do we know this optimization doesn't mess up precedence in + # our parse trees? + # + # Looking through expr from left edge of stat only has to confirm + # that an input, say, a+b+c; begins with any valid interpretation + # of an expression. The precedence actually doesn't matter when + # making a decision in stat seeing through expr. It is only when + # parsing rule expr that we must use the precedence to get the + # right interpretation and, hence, parse tree. + # + # @since 4.6 + # + def canDropLoopEntryEdgeInLeftRecursiveRule(self, config): + p = config.state + # First check to see if we are in StarLoopEntryState generated during + # left-recursion elimination. For efficiency, also check if + # the context has an empty stack case. If so, it would mean + # global FOLLOW so we can't perform optimization + # Are we the special loop entry/exit state? or SLL wildcard + if p.getStateType() != ATNState.STAR_LOOP_ENTRY \ + or not p.isPrecedenceDecision \ + or config.context.isEmpty() \ + or config.context.hasEmptyPath(): + return False + + # Require all return states to return back to the same rule + # that p is in. + numCtxs = len(config.context) + for i in range(0, numCtxs): # for each stack context + returnState = atn.states.get(config.context.getReturnState(i)); + if (returnState.ruleIndex != p.ruleIndex) return false; + + + +} + +BlockStartState +decisionStartState = (BlockStartState) +p.transition(0).target; +int +blockEndStateNum = decisionStartState.endState.stateNumber; +BlockEndState +blockEndState = (BlockEndState) +atn.states.get(blockEndStateNum); + +# Verify that the top of each stack context leads to loop entry/exit +# state through epsilon edges and w/o leaving rule. +for (int +i = 0; +i < numCtxs; +i + +) { # for each stack context +int +returnStateNumber = config.context.getReturnState(i); +ATNState +returnState = atn.states.get(returnStateNumber); +# all states must have single outgoing epsilon edge +if (returnState.getNumberOfTransitions() != 1 | | +!returnState.transition(0).isEpsilon() ) +{ +return false; +} +# Look for prefix op case like 'not expr', (' type ')' expr +ATNState +returnStateTarget = returnState.transition(0).target; +if (returnState.getStateType() == BLOCK_END & & returnStateTarget == p) +{ +continue; +} +# Look for 'expr op expr' or case where expr's return state is block end +# of (...)* internal block; the block end points to loop back +# which points to p but we don't need to check that +if (returnState == blockEndState) { +continue; +} +# Look for ternary expr ? expr : expr. The return state points at block end, +# which points at loop entry state +if (returnStateTarget == blockEndState) { +continue; +} +# Look for complex prefix 'between expr and expr' case where 2nd expr's +# return state points at block end state of (...)* internal block +if (returnStateTarget.getStateType() == BLOCK_END & & + returnStateTarget.getNumberOfTransitions() == 1 & & + returnStateTarget.transition(0).isEpsilon() & & + returnStateTarget.transition(0).target == p ) + { +continue; +} + +# anything else ain't conforming +return false; +} + +return true; +} + + def getRuleName(self, index): if self.parser is not None and index>=0: return self.parser.ruleNames[index]