diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java
index c0a676690..c499af7b8 100644
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java
@@ -3,6 +3,8 @@ package org.antlr.v4.test.runtime.descriptors;
import org.antlr.v4.test.runtime.BaseParserTestDescriptor;
import org.antlr.v4.test.runtime.CommentHasStringValue;
+import java.util.Arrays;
+
public class PerformanceDescriptors {
/*
* This is a regression test for antlr/antlr4#192 "Poor performance of
@@ -105,7 +107,7 @@ public class PerformanceDescriptors {
@Override
public boolean ignore(String targetName) {
- return !targetName.equals("Java");
+ return !Arrays.asList("Java", "Python2", "Python3", "Node").contains(targetName);
}
}
diff --git a/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js b/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
index 548549826..4fb9fa9f2 100644
--- a/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
+++ b/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
@@ -485,7 +485,7 @@ ATNDeserializer.prototype.stateIsEndStateFor = function(state, idx) {
//
// Analyze the {@link StarLoopEntryState} states in the specified ATN to set
-// the {@link StarLoopEntryState//precedenceRuleDecision} field to the
+// the {@link StarLoopEntryState//isPrecedenceDecision} field to the
// correct value.
//
// @param atn The ATN.
@@ -505,7 +505,7 @@ ATNDeserializer.prototype.markPrecedenceDecisions = function(atn) {
if (maybeLoopEndState instanceof LoopEndState) {
if ( maybeLoopEndState.epsilonOnlyTransitions &&
(maybeLoopEndState.transitions[0].target instanceof RuleStopState)) {
- state.precedenceRuleDecision = true;
+ state.isPrecedenceDecision = true;
}
}
}
diff --git a/runtime/JavaScript/src/antlr4/atn/ATNState.js b/runtime/JavaScript/src/antlr4/atn/ATNState.js
index e311b120c..04fb32894 100644
--- a/runtime/JavaScript/src/antlr4/atn/ATNState.js
+++ b/runtime/JavaScript/src/antlr4/atn/ATNState.js
@@ -303,7 +303,7 @@ function StarLoopEntryState() {
this.stateType = ATNState.STAR_LOOP_ENTRY;
this.loopBackState = null;
// Indicates whether this state can benefit from a precedence DFA during SLL decision making.
- this.precedenceRuleDecision = null;
+ this.isPrecedenceDecision = null;
return this;
}
diff --git a/runtime/JavaScript/src/antlr4/atn/ParserATNSimulator.js b/runtime/JavaScript/src/antlr4/atn/ParserATNSimulator.js
index b740ed92a..3bb12eaea 100644
--- a/runtime/JavaScript/src/antlr4/atn/ParserATNSimulator.js
+++ b/runtime/JavaScript/src/antlr4/atn/ParserATNSimulator.js
@@ -262,6 +262,7 @@ var Set = Utils.Set;
var BitSet = Utils.BitSet;
var DoubleDict = Utils.DoubleDict;
var ATN = require('./ATN').ATN;
+var ATNState = require('./ATNState').ATNState;
var ATNConfig = require('./ATNConfig').ATNConfig;
var ATNConfigSet = require('./ATNConfigSet').ATNConfigSet;
var Token = require('./../Token').Token;
@@ -359,16 +360,7 @@ ParserATNSimulator.prototype.adaptivePredict = function(input, decision, outerCo
" exec LA(1)==" + this.getLookaheadName(input) +
", outerContext=" + outerContext.toString(this.parser.ruleNames));
}
- // If this is not a precedence DFA, we check the ATN start state
- // to determine if this ATN start state is the decision for the
- // closure block that determines whether a precedence rule
- // should continue or complete.
- //
- if (!dfa.precedenceDfa && (dfa.atnStartState instanceof StarLoopEntryState)) {
- if (dfa.atnStartState.precedenceRuleDecision) {
- dfa.setPrecedenceDfa(true);
- }
- }
+
var fullCtx = false;
var s0_closure = this.computeStartState(dfa.atnStartState, RuleContext.EMPTY, fullCtx);
@@ -379,6 +371,7 @@ ParserATNSimulator.prototype.adaptivePredict = function(input, decision, outerCo
// appropriate start state for the precedence level rather
// than simply setting DFA.s0.
//
+ dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway
s0_closure = this.applyPrecedenceFilter(s0_closure);
s0 = this.addDFAState(dfa, new DFAState(null, s0_closure));
dfa.setPrecedenceStartState(this.parser.getPrecedence(), s0);
@@ -1293,6 +1286,9 @@ ParserATNSimulator.prototype.closure_ = function(config, configs, closureBusy, c
// both epsilon transitions and non-epsilon transitions.
}
for(var i = 0;i
=0) {
return this.parser.ruleNames[index];
diff --git a/runtime/JavaScript/src/antlr4/dfa/DFA.js b/runtime/JavaScript/src/antlr4/dfa/DFA.js
index d93d9c5ab..1ffae26e6 100644
--- a/runtime/JavaScript/src/antlr4/dfa/DFA.js
+++ b/runtime/JavaScript/src/antlr4/dfa/DFA.js
@@ -29,6 +29,7 @@
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
var DFAState = require('./DFAState').DFAState;
+var StarLoopEntryState = require('../atn/ATNState').StarLoopEntryState;
var ATNConfigSet = require('./../atn/ATNConfigSet').ATNConfigSet;
var DFASerializer = require('./DFASerializer').DFASerializer;
var LexerDFASerializer = require('./DFASerializer').LexerDFASerializer;
@@ -58,6 +59,17 @@ function DFA(atnStartState, decision) {
// {@code false}. This is the backing field for {@link //isPrecedenceDfa},
// {@link //setPrecedenceDfa}.
this.precedenceDfa = false;
+ if (atnStartState instanceof StarLoopEntryState)
+ {
+ if (atnStartState.isPrecedenceDecision) {
+ this.precedenceDfa = true;
+ precedenceState = new DFAState(null, new ATNConfigSet());
+ precedenceState.edges = [];
+ precedenceState.isAcceptState = false;
+ precedenceState.requiresFullContext = false;
+ this.s0 = precedenceState;
+ }
+ }
return this;
}
@@ -125,7 +137,7 @@ DFA.prototype.setPrecedenceDfa = function(precedenceDfa) {
if (this.precedenceDfa!==precedenceDfa) {
this._states = new DFAStatesSet();
if (precedenceDfa) {
- var precedenceState = new DFAState(new ATNConfigSet());
+ var precedenceState = new DFAState(null, new ATNConfigSet());
precedenceState.edges = [];
precedenceState.isAcceptState = false;
precedenceState.requiresFullContext = false;
diff --git a/runtime/Python2/src/antlr4/ParserInterpreter.py b/runtime/Python2/src/antlr4/ParserInterpreter.py
index ed059f9d5..f3b473032 100644
--- a/runtime/Python2/src/antlr4/ParserInterpreter.py
+++ b/runtime/Python2/src/antlr4/ParserInterpreter.py
@@ -69,7 +69,7 @@ class ParserInterpreter(Parser):
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
- if state.precedenceRuleDecision:
+ if state.isPrecedenceDecision:
self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)
diff --git a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
index 8745806be..20c561f41 100644
--- a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
+++ b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
@@ -368,7 +368,7 @@ class ATNDeserializer (object):
#
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
- # the {@link StarLoopEntryState#precedenceRuleDecision} field to the
+ # the {@link StarLoopEntryState#isPrecedenceDecision} field to the
# correct value.
#
# @param atn The ATN.
@@ -387,7 +387,7 @@ class ATNDeserializer (object):
if isinstance(maybeLoopEndState, LoopEndState):
if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
- state.precedenceRuleDecision = True
+ state.isPrecedenceDecision = True
def verifyATN(self, atn):
if not self.deserializationOptions.verifyATN:
diff --git a/runtime/Python2/src/antlr4/atn/ATNState.py b/runtime/Python2/src/antlr4/atn/ATNState.py
index d10e22ddb..038377704 100644
--- a/runtime/Python2/src/antlr4/atn/ATNState.py
+++ b/runtime/Python2/src/antlr4/atn/ATNState.py
@@ -262,7 +262,7 @@ class StarLoopEntryState(DecisionState):
self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
- self.precedenceRuleDecision = None
+ self.isPrecedenceDecision = None
# Mark the end of a * or + loop.
class LoopEndState(ATNState):
diff --git a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py
index ee18bad5e..d48610249 100755
--- a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py
+++ b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py
@@ -266,7 +266,7 @@ from antlr4.atn.ATN import ATN
from antlr4.atn.ATNConfig import ATNConfig
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNSimulator import ATNSimulator
-from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState
+from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState, ATNState
from antlr4.atn.PredictionMode import PredictionMode
from antlr4.atn.SemanticContext import SemanticContext, AND, andContext, orContext
from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition
@@ -341,15 +341,6 @@ class ParserATNSimulator(ATNSimulator):
" exec LA(1)==" + self.getLookaheadName(input) +
", outerContext=" + outerContext.toString(self.parser.literalNames, None))
- # If this is not a precedence DFA, we check the ATN start state
- # to determine if this ATN start state is the decision for the
- # closure block that determines whether a precedence rule
- # should continue or complete.
- #
- if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
- if dfa.atnStartState.precedenceRuleDecision:
- dfa.setPrecedenceDfa(True)
-
fullCtx = False
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
@@ -360,6 +351,7 @@ class ParserATNSimulator(ATNSimulator):
# appropriate start state for the precedence level rather
# than simply setting DFA.s0.
#
+ dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
s0_closure = self.applyPrecedenceFilter(s0_closure)
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
@@ -1168,7 +1160,13 @@ class ParserATNSimulator(ATNSimulator):
# make sure to not return here, because EOF transitions can act as
# both epsilon transitions and non-epsilon transitions.
+ first = True
for t in p.transitions:
+ if first:
+ first = False
+ if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
+ continue
+
continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
if c is not None:
@@ -1205,6 +1203,161 @@ class ParserATNSimulator(ATNSimulator):
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
+
+
+ # Implements first-edge (loop entry) elimination as an optimization
+ # during closure operations. See antlr/antlr4#1398.
+ #
+ # The optimization is to avoid adding the loop entry config when
+ # the exit path can only lead back to the same
+ # StarLoopEntryState after popping context at the rule end state
+ # (traversing only epsilon edges, so we're still in closure, in
+ # this same rule).
+ #
+ # We need to detect any state that can reach loop entry on
+ # epsilon w/o exiting rule. We don't have to look at FOLLOW
+ # links, just ensure that all stack tops for config refer to key
+ # states in LR rule.
+ #
+ # To verify we are in the right situation we must first check
+ # closure is at a StarLoopEntryState generated during LR removal.
+ # Then we check that each stack top of context is a return state
+ # from one of these cases:
+ #
+ # 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
+ # 2. expr op expr. The return state is the block end of internal block of (...)*
+ # 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
+ # That state points at block end of internal block of (...)*.
+ # 4. expr '?' expr ':' expr. The return state points at block end,
+ # which points at loop entry state.
+ #
+ # If any is true for each stack top, then closure does not add a
+ # config to the current config set for edge[0], the loop entry branch.
+ #
+ # Conditions fail if any context for the current config is:
+ #
+ # a. empty (we'd fall out of expr to do a global FOLLOW which could
+ # even be to some weird spot in expr) or,
+ # b. lies outside of expr or,
+ # c. lies within expr but at a state not the BlockEndState
+ # generated during LR removal
+ #
+ # Do we need to evaluate predicates ever in closure for this case?
+ #
+ # No. Predicates, including precedence predicates, are only
+ # evaluated when computing a DFA start state. I.e., only before
+ # the lookahead (but not parser) consumes a token.
+ #
+ # There are no epsilon edges allowed in LR rule alt blocks or in
+ # the "primary" part (ID here). If closure is in
+ # StarLoopEntryState any lookahead operation will have consumed a
+ # token as there are no epsilon-paths that lead to
+ # StarLoopEntryState. We do not have to evaluate predicates
+ # therefore if we are in the generated StarLoopEntryState of a LR
+ # rule. Note that when making a prediction starting at that
+ # decision point, decision d=2, compute-start-state performs
+ # closure starting at edges[0], edges[1] emanating from
+ # StarLoopEntryState. That means it is not performing closure on
+ # StarLoopEntryState during compute-start-state.
+ #
+ # How do we know this always gives same prediction answer?
+ #
+ # Without predicates, loop entry and exit paths are ambiguous
+ # upon remaining input +b (in, say, a+b). Either paths lead to
+ # valid parses. Closure can lead to consuming + immediately or by
+ # falling out of this call to expr back into expr and loop back
+ # again to StarLoopEntryState to match +b. In this special case,
+ # we choose the more efficient path, which is to take the bypass
+ # path.
+ #
+ # The lookahead language has not changed because closure chooses
+ # one path over the other. Both paths lead to consuming the same
+ # remaining input during a lookahead operation. If the next token
+ # is an operator, lookahead will enter the choice block with
+ # operators. If it is not, lookahead will exit expr. Same as if
+ # closure had chosen to enter the choice block immediately.
+ #
+ # Closure is examining one config (some loopentrystate, some alt,
+ # context) which means it is considering exactly one alt. Closure
+ # always copies the same alt to any derived configs.
+ #
+ # How do we know this optimization doesn't mess up precedence in
+ # our parse trees?
+ #
+ # Looking through expr from left edge of stat only has to confirm
+ # that an input, say, a+b+c; begins with any valid interpretation
+ # of an expression. The precedence actually doesn't matter when
+ # making a decision in stat seeing through expr. It is only when
+ # parsing rule expr that we must use the precedence to get the
+ # right interpretation and, hence, parse tree.
+ #
+ # @since 4.6
+ #
+ def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
+ # return False
+ p = config.state
+ # First check to see if we are in StarLoopEntryState generated during
+ # left-recursion elimination. For efficiency, also check if
+ # the context has an empty stack case. If so, it would mean
+ # global FOLLOW so we can't perform optimization
+ # Are we the special loop entry/exit state? or SLL wildcard
+ if p.stateType != ATNState.STAR_LOOP_ENTRY \
+ or not p.isPrecedenceDecision \
+ or config.context.isEmpty() \
+ or config.context.hasEmptyPath():
+ return False
+
+ # Require all return states to return back to the same rule
+ # that p is in.
+ numCtxs = len(config.context)
+ for i in range(0, numCtxs): # for each stack context
+ returnState = self.atn.states[config.context.getReturnState(i)]
+ if returnState.ruleIndex != p.ruleIndex:
+ return False
+
+ decisionStartState = p.transitions[0].target
+ blockEndStateNum = decisionStartState.endState.stateNumber
+ blockEndState = self.atn.states[blockEndStateNum]
+
+ # Verify that the top of each stack context leads to loop entry/exit
+ # state through epsilon edges and w/o leaving rule.
+ for i in range(0, numCtxs): # for each stack context
+ returnStateNumber = config.context.getReturnState(i)
+ returnState = self.atn.states[returnStateNumber]
+ # all states must have single outgoing epsilon edge
+ if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
+ return False
+
+ # Look for prefix op case like 'not expr', (' type ')' expr
+ returnStateTarget = returnState.transitions[0].target
+ if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
+ continue
+
+ # Look for 'expr op expr' or case where expr's return state is block end
+ # of (...)* internal block; the block end points to loop back
+ # which points to p but we don't need to check that
+ if returnState is blockEndState:
+ continue
+
+ # Look for ternary expr ? expr : expr. The return state points at block end,
+ # which points at loop entry state
+ if returnStateTarget is blockEndState:
+ continue
+
+ # Look for complex prefix 'between expr and expr' case where 2nd expr's
+ # return state points at block end state of (...)* internal block
+ if returnStateTarget.stateType == ATNState.BLOCK_END \
+ and len(returnStateTarget.transitions) == 1 \
+ and returnStateTarget.transitions[0].isEpsilon \
+ and returnStateTarget.transitions[0].target is p:
+ continue
+
+ # anything else ain't conforming
+ return False
+
+ return True
+
+
def getRuleName(self, index):
if self.parser is not None and index>=0:
return self.parser.ruleNames[index]
diff --git a/runtime/Python2/src/antlr4/dfa/DFA.py b/runtime/Python2/src/antlr4/dfa/DFA.py
index 4ed5fb6e9..014dd18f9 100644
--- a/runtime/Python2/src/antlr4/dfa/DFA.py
+++ b/runtime/Python2/src/antlr4/dfa/DFA.py
@@ -27,6 +27,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from antlr4.atn.ATNState import StarLoopEntryState
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.dfa.DFAState import DFAState
@@ -48,6 +49,15 @@ class DFA(object):
# {@link #setPrecedenceDfa}.
self.precedenceDfa = False
+ if isinstance(atnStartState, StarLoopEntryState):
+ if atnStartState.isPrecedenceDecision:
+ self.precedenceDfa = True
+ precedenceState = DFAState(configs=ATNConfigSet())
+ precedenceState.edges = []
+ precedenceState.isAcceptState = False
+ precedenceState.requiresFullContext = False
+ self.s0 = precedenceState
+
# Get the start state for a specific precedence value.
#
@@ -111,7 +121,7 @@ class DFA(object):
if self.precedenceDfa != precedenceDfa:
self._states = dict()
if precedenceDfa:
- precedenceState = DFAState(ATNConfigSet())
+ precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False
diff --git a/runtime/Python3/src/antlr4/ParserInterpreter.py b/runtime/Python3/src/antlr4/ParserInterpreter.py
index 1b6a990ef..91b9d56b2 100644
--- a/runtime/Python3/src/antlr4/ParserInterpreter.py
+++ b/runtime/Python3/src/antlr4/ParserInterpreter.py
@@ -71,7 +71,7 @@ class ParserInterpreter(Parser):
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
- if state.precedenceRuleDecision:
+ if state.isPrecedenceDecision:
self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)
diff --git a/runtime/Python3/src/antlr4/atn/ATNDeserializer.py b/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
index 2c225c3ac..ded137c73 100644
--- a/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
+++ b/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
@@ -368,7 +368,7 @@ class ATNDeserializer (object):
#
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
- # the {@link StarLoopEntryState#precedenceRuleDecision} field to the
+ # the {@link StarLoopEntryState#isPrecedenceDecision} field to the
# correct value.
#
# @param atn The ATN.
@@ -387,7 +387,7 @@ class ATNDeserializer (object):
if isinstance(maybeLoopEndState, LoopEndState):
if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
- state.precedenceRuleDecision = True
+ state.isPrecedenceDecision = True
def verifyATN(self, atn:ATN):
if not self.deserializationOptions.verifyATN:
diff --git a/runtime/Python3/src/antlr4/atn/ATNSimulator.py b/runtime/Python3/src/antlr4/atn/ATNSimulator.py
index ac5328757..bdde71a0d 100644
--- a/runtime/Python3/src/antlr4/atn/ATNSimulator.py
+++ b/runtime/Python3/src/antlr4/atn/ATNSimulator.py
@@ -37,7 +37,7 @@ from antlr4.dfa.DFAState import DFAState
class ATNSimulator(object):
# Must distinguish between missing edge and edge we know leads nowhere#/
- ERROR = DFAState(ATNConfigSet())
+ ERROR = DFAState(configs=ATNConfigSet())
ERROR.stateNumber = 0x7FFFFFFF
# The context cache maps all PredictionContext objects that are ==
diff --git a/runtime/Python3/src/antlr4/atn/ATNState.py b/runtime/Python3/src/antlr4/atn/ATNState.py
index 6dd89f020..ae31e374d 100644
--- a/runtime/Python3/src/antlr4/atn/ATNState.py
+++ b/runtime/Python3/src/antlr4/atn/ATNState.py
@@ -261,7 +261,7 @@ class StarLoopEntryState(DecisionState):
self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
- self.precedenceRuleDecision = None
+ self.isPrecedenceDecision = None
# Mark the end of a * or + loop.
class LoopEndState(ATNState):
diff --git a/runtime/Python3/src/antlr4/atn/ParserATNSimulator.py b/runtime/Python3/src/antlr4/atn/ParserATNSimulator.py
index 7cb5867b8..b085530b4 100644
--- a/runtime/Python3/src/antlr4/atn/ParserATNSimulator.py
+++ b/runtime/Python3/src/antlr4/atn/ParserATNSimulator.py
@@ -346,15 +346,6 @@ class ParserATNSimulator(ATNSimulator):
" exec LA(1)==" + self.getLookaheadName(input) +
", outerContext=" + outerContext.toString(self.parser.literalNames, None))
- # If this is not a precedence DFA, we check the ATN start state
- # to determine if this ATN start state is the decision for the
- # closure block that determines whether a precedence rule
- # should continue or complete.
- #
- if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
- if dfa.atnStartState.precedenceRuleDecision:
- dfa.setPrecedenceDfa(True)
-
fullCtx = False
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
@@ -365,6 +356,7 @@ class ParserATNSimulator(ATNSimulator):
# appropriate start state for the precedence level rather
# than simply setting DFA.s0.
#
+ dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
s0_closure = self.applyPrecedenceFilter(s0_closure)
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
@@ -1173,7 +1165,13 @@ class ParserATNSimulator(ATNSimulator):
# make sure to not return here, because EOF transitions can act as
# both epsilon transitions and non-epsilon transitions.
+ first = True
for t in p.transitions:
+ if first:
+ first = False
+ if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
+ continue
+
continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
if c is not None:
@@ -1210,6 +1208,161 @@ class ParserATNSimulator(ATNSimulator):
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
+
+
+ # Implements first-edge (loop entry) elimination as an optimization
+ # during closure operations. See antlr/antlr4#1398.
+ #
+ # The optimization is to avoid adding the loop entry config when
+ # the exit path can only lead back to the same
+ # StarLoopEntryState after popping context at the rule end state
+ # (traversing only epsilon edges, so we're still in closure, in
+ # this same rule).
+ #
+ # We need to detect any state that can reach loop entry on
+ # epsilon w/o exiting rule. We don't have to look at FOLLOW
+ # links, just ensure that all stack tops for config refer to key
+ # states in LR rule.
+ #
+ # To verify we are in the right situation we must first check
+ # closure is at a StarLoopEntryState generated during LR removal.
+ # Then we check that each stack top of context is a return state
+ # from one of these cases:
+ #
+ # 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
+ # 2. expr op expr. The return state is the block end of internal block of (...)*
+ # 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
+ # That state points at block end of internal block of (...)*.
+ # 4. expr '?' expr ':' expr. The return state points at block end,
+ # which points at loop entry state.
+ #
+ # If any is true for each stack top, then closure does not add a
+ # config to the current config set for edge[0], the loop entry branch.
+ #
+ # Conditions fail if any context for the current config is:
+ #
+ # a. empty (we'd fall out of expr to do a global FOLLOW which could
+ # even be to some weird spot in expr) or,
+ # b. lies outside of expr or,
+ # c. lies within expr but at a state not the BlockEndState
+ # generated during LR removal
+ #
+ # Do we need to evaluate predicates ever in closure for this case?
+ #
+ # No. Predicates, including precedence predicates, are only
+ # evaluated when computing a DFA start state. I.e., only before
+ # the lookahead (but not parser) consumes a token.
+ #
+ # There are no epsilon edges allowed in LR rule alt blocks or in
+ # the "primary" part (ID here). If closure is in
+ # StarLoopEntryState any lookahead operation will have consumed a
+ # token as there are no epsilon-paths that lead to
+ # StarLoopEntryState. We do not have to evaluate predicates
+ # therefore if we are in the generated StarLoopEntryState of a LR
+ # rule. Note that when making a prediction starting at that
+ # decision point, decision d=2, compute-start-state performs
+ # closure starting at edges[0], edges[1] emanating from
+ # StarLoopEntryState. That means it is not performing closure on
+ # StarLoopEntryState during compute-start-state.
+ #
+ # How do we know this always gives same prediction answer?
+ #
+ # Without predicates, loop entry and exit paths are ambiguous
+ # upon remaining input +b (in, say, a+b). Either paths lead to
+ # valid parses. Closure can lead to consuming + immediately or by
+ # falling out of this call to expr back into expr and loop back
+ # again to StarLoopEntryState to match +b. In this special case,
+ # we choose the more efficient path, which is to take the bypass
+ # path.
+ #
+ # The lookahead language has not changed because closure chooses
+ # one path over the other. Both paths lead to consuming the same
+ # remaining input during a lookahead operation. If the next token
+ # is an operator, lookahead will enter the choice block with
+ # operators. If it is not, lookahead will exit expr. Same as if
+ # closure had chosen to enter the choice block immediately.
+ #
+ # Closure is examining one config (some loopentrystate, some alt,
+ # context) which means it is considering exactly one alt. Closure
+ # always copies the same alt to any derived configs.
+ #
+ # How do we know this optimization doesn't mess up precedence in
+ # our parse trees?
+ #
+ # Looking through expr from left edge of stat only has to confirm
+ # that an input, say, a+b+c; begins with any valid interpretation
+ # of an expression. The precedence actually doesn't matter when
+ # making a decision in stat seeing through expr. It is only when
+ # parsing rule expr that we must use the precedence to get the
+ # right interpretation and, hence, parse tree.
+ #
+ # @since 4.6
+ #
+ def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
+ # return False
+ p = config.state
+ # First check to see if we are in StarLoopEntryState generated during
+ # left-recursion elimination. For efficiency, also check if
+ # the context has an empty stack case. If so, it would mean
+ # global FOLLOW so we can't perform optimization
+ # Are we the special loop entry/exit state? or SLL wildcard
+ if p.stateType != ATNState.STAR_LOOP_ENTRY \
+ or not p.isPrecedenceDecision \
+ or config.context.isEmpty() \
+ or config.context.hasEmptyPath():
+ return False
+
+ # Require all return states to return back to the same rule
+ # that p is in.
+ numCtxs = len(config.context)
+ for i in range(0, numCtxs): # for each stack context
+ returnState = self.atn.states[config.context.getReturnState(i)]
+ if returnState.ruleIndex != p.ruleIndex:
+ return False
+
+ decisionStartState = p.transitions[0].target
+ blockEndStateNum = decisionStartState.endState.stateNumber
+ blockEndState = self.atn.states[blockEndStateNum]
+
+ # Verify that the top of each stack context leads to loop entry/exit
+ # state through epsilon edges and w/o leaving rule.
+ for i in range(0, numCtxs): # for each stack context
+ returnStateNumber = config.context.getReturnState(i)
+ returnState = self.atn.states[returnStateNumber]
+ # all states must have single outgoing epsilon edge
+ if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
+ return False
+
+ # Look for prefix op case like 'not expr', (' type ')' expr
+ returnStateTarget = returnState.transitions[0].target
+ if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
+ continue
+
+ # Look for 'expr op expr' or case where expr's return state is block end
+ # of (...)* internal block; the block end points to loop back
+ # which points to p but we don't need to check that
+ if returnState is blockEndState:
+ continue
+
+ # Look for ternary expr ? expr : expr. The return state points at block end,
+ # which points at loop entry state
+ if returnStateTarget is blockEndState:
+ continue
+
+ # Look for complex prefix 'between expr and expr' case where 2nd expr's
+ # return state points at block end state of (...)* internal block
+ if returnStateTarget.stateType == ATNState.BLOCK_END \
+ and len(returnStateTarget.transitions) == 1 \
+ and returnStateTarget.transitions[0].isEpsilon \
+ and returnStateTarget.transitions[0].target is p:
+ continue
+
+ # anything else ain't conforming
+ return False
+
+ return True
+
+
def getRuleName(self, index:int):
if self.parser is not None and index>=0:
return self.parser.ruleNames[index]
diff --git a/runtime/Python3/src/antlr4/dfa/DFA.py b/runtime/Python3/src/antlr4/dfa/DFA.py
index 80a48b56e..19881b3fd 100644
--- a/runtime/Python3/src/antlr4/dfa/DFA.py
+++ b/runtime/Python3/src/antlr4/dfa/DFA.py
@@ -27,6 +27,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from antlr4.atn.ATNState import StarLoopEntryState
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNState import DecisionState
@@ -49,6 +50,15 @@ class DFA(object):
# {@link #setPrecedenceDfa}.
self.precedenceDfa = False
+ if isinstance(atnStartState, StarLoopEntryState):
+ if atnStartState.isPrecedenceDecision:
+ self.precedenceDfa = True
+ precedenceState = DFAState(configs=ATNConfigSet())
+ precedenceState.edges = []
+ precedenceState.isAcceptState = False
+ precedenceState.requiresFullContext = False
+ self.s0 = precedenceState
+
# Get the start state for a specific precedence value.
#
@@ -112,7 +122,7 @@ class DFA(object):
if self.precedenceDfa != precedenceDfa:
self._states = dict()
if precedenceDfa:
- precedenceState = DFAState(ATNConfigSet())
+ precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False