Merge pull request #1427 from ericvergnaud/poor-left-recursive-rule-performance

Fix poor left recursive rule performance in python + javascript
This commit is contained in:
Terence Parr 2016-11-29 10:10:24 -08:00 committed by GitHub
commit 595fb089b0
16 changed files with 444 additions and 45 deletions

View File

@ -3,6 +3,8 @@ package org.antlr.v4.test.runtime.descriptors;
import org.antlr.v4.test.runtime.BaseParserTestDescriptor;
import org.antlr.v4.test.runtime.CommentHasStringValue;
import java.util.Arrays;
public class PerformanceDescriptors {
/*
* This is a regression test for antlr/antlr4#192 "Poor performance of
@ -105,7 +107,7 @@ public class PerformanceDescriptors {
@Override
public boolean ignore(String targetName) {
return !targetName.equals("Java");
return !Arrays.asList("Java", "Python2", "Python3", "Node").contains(targetName);
}
}

View File

@ -485,7 +485,7 @@ ATNDeserializer.prototype.stateIsEndStateFor = function(state, idx) {
//
// Analyze the {@link StarLoopEntryState} states in the specified ATN to set
// the {@link StarLoopEntryState//precedenceRuleDecision} field to the
// the {@link StarLoopEntryState//isPrecedenceDecision} field to the
// correct value.
//
// @param atn The ATN.
@ -505,7 +505,7 @@ ATNDeserializer.prototype.markPrecedenceDecisions = function(atn) {
if (maybeLoopEndState instanceof LoopEndState) {
if ( maybeLoopEndState.epsilonOnlyTransitions &&
(maybeLoopEndState.transitions[0].target instanceof RuleStopState)) {
state.precedenceRuleDecision = true;
state.isPrecedenceDecision = true;
}
}
}

View File

@ -303,7 +303,7 @@ function StarLoopEntryState() {
this.stateType = ATNState.STAR_LOOP_ENTRY;
this.loopBackState = null;
// Indicates whether this state can benefit from a precedence DFA during SLL decision making.
this.precedenceRuleDecision = null;
this.isPrecedenceDecision = null;
return this;
}

View File

@ -262,6 +262,7 @@ var Set = Utils.Set;
var BitSet = Utils.BitSet;
var DoubleDict = Utils.DoubleDict;
var ATN = require('./ATN').ATN;
var ATNState = require('./ATNState').ATNState;
var ATNConfig = require('./ATNConfig').ATNConfig;
var ATNConfigSet = require('./ATNConfigSet').ATNConfigSet;
var Token = require('./../Token').Token;
@ -359,16 +360,7 @@ ParserATNSimulator.prototype.adaptivePredict = function(input, decision, outerCo
" exec LA(1)==" + this.getLookaheadName(input) +
", outerContext=" + outerContext.toString(this.parser.ruleNames));
}
// If this is not a precedence DFA, we check the ATN start state
// to determine if this ATN start state is the decision for the
// closure block that determines whether a precedence rule
// should continue or complete.
//
if (!dfa.precedenceDfa && (dfa.atnStartState instanceof StarLoopEntryState)) {
if (dfa.atnStartState.precedenceRuleDecision) {
dfa.setPrecedenceDfa(true);
}
}
var fullCtx = false;
var s0_closure = this.computeStartState(dfa.atnStartState, RuleContext.EMPTY, fullCtx);
@ -379,6 +371,7 @@ ParserATNSimulator.prototype.adaptivePredict = function(input, decision, outerCo
// appropriate start state for the precedence level rather
// than simply setting DFA.s0.
//
dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway
s0_closure = this.applyPrecedenceFilter(s0_closure);
s0 = this.addDFAState(dfa, new DFAState(null, s0_closure));
dfa.setPrecedenceStartState(this.parser.getPrecedence(), s0);
@ -1293,6 +1286,9 @@ ParserATNSimulator.prototype.closure_ = function(config, configs, closureBusy, c
// both epsilon transitions and non-epsilon transitions.
}
for(var i = 0;i<p.transitions.length; i++) {
if(i==0 && this.canDropLoopEntryEdgeInLeftRecursiveRule(config))
continue;
var t = p.transitions[i];
var continueCollecting = collectPredicates && !(t instanceof ActionTransition);
var c = this.getEpsilonTarget(config, t, continueCollecting, depth === 0, fullCtx, treatEofAsEpsilon);
@ -1337,6 +1333,69 @@ ParserATNSimulator.prototype.closure_ = function(config, configs, closureBusy, c
}
};
ParserATNSimulator.prototype.canDropLoopEntryEdgeInLeftRecursiveRule = function(config) {
// return False
var p = config.state;
// First check to see if we are in StarLoopEntryState generated during
// left-recursion elimination. For efficiency, also check if
// the context has an empty stack case. If so, it would mean
// global FOLLOW so we can't perform optimization
// Are we the special loop entry/exit state? or SLL wildcard
if(p.stateType != ATNState.STAR_LOOP_ENTRY || !p.isPrecedenceDecision ||
config.context.isEmpty() || config.context.hasEmptyPath())
return false;
// Require all return states to return back to the same rule that p is in.
var numCtxs = config.context.length;
for(var i=0; i<numCtxs; i++) { // for each stack context
var returnState = this.atn.states[config.context.getReturnState(i)];
if (returnState.ruleIndex != p.ruleIndex)
return false;
}
var decisionStartState = p.transitions[0].target;
var blockEndStateNum = decisionStartState.endState.stateNumber;
var blockEndState = this.atn.states[blockEndStateNum];
// Verify that the top of each stack context leads to loop entry/exit
// state through epsilon edges and w/o leaving rule.
for(var i=0; i<numCtxs; i++) { // for each stack context
var returnStateNumber = config.context.getReturnState(i);
var returnState = this.atn.states[returnStateNumber];
// all states must have single outgoing epsilon edge
if (returnState.transitions.length != 1 || !returnState.transitions[0].isEpsilon)
return false;
// Look for prefix op case like 'not expr', (' type ')' expr
var returnStateTarget = returnState.transitions[0].target;
if ( returnState.stateType == ATNState.BLOCK_END && returnStateTarget == p )
continue;
// Look for 'expr op expr' or case where expr's return state is block end
// of (...)* internal block; the block end points to loop back
// which points to p but we don't need to check that
if ( returnState == blockEndState )
continue;
// Look for ternary expr ? expr : expr. The return state points at block end,
// which points at loop entry state
if ( returnStateTarget == blockEndState )
continue;
// Look for complex prefix 'between expr and expr' case where 2nd expr's
// return state points at block end state of (...)* internal block
if (returnStateTarget.stateType == ATNState.BLOCK_END && returnStateTarget.transitionslength == 1
&& returnStateTarget.transitions[0].isEpsilon && returnStateTarget.transitions[0].target == p)
continue;
// anything else ain't conforming
return false;
}
return true;
};
ParserATNSimulator.prototype.getRuleName = function( index) {
if (this.parser!==null && index>=0) {
return this.parser.ruleNames[index];

View File

@ -29,6 +29,7 @@
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
var DFAState = require('./DFAState').DFAState;
var StarLoopEntryState = require('../atn/ATNState').StarLoopEntryState;
var ATNConfigSet = require('./../atn/ATNConfigSet').ATNConfigSet;
var DFASerializer = require('./DFASerializer').DFASerializer;
var LexerDFASerializer = require('./DFASerializer').LexerDFASerializer;
@ -58,6 +59,17 @@ function DFA(atnStartState, decision) {
// {@code false}. This is the backing field for {@link //isPrecedenceDfa},
// {@link //setPrecedenceDfa}.
this.precedenceDfa = false;
if (atnStartState instanceof StarLoopEntryState)
{
if (atnStartState.isPrecedenceDecision) {
this.precedenceDfa = true;
precedenceState = new DFAState(null, new ATNConfigSet());
precedenceState.edges = [];
precedenceState.isAcceptState = false;
precedenceState.requiresFullContext = false;
this.s0 = precedenceState;
}
}
return this;
}
@ -125,7 +137,7 @@ DFA.prototype.setPrecedenceDfa = function(precedenceDfa) {
if (this.precedenceDfa!==precedenceDfa) {
this._states = new DFAStatesSet();
if (precedenceDfa) {
var precedenceState = new DFAState(new ATNConfigSet());
var precedenceState = new DFAState(null, new ATNConfigSet());
precedenceState.edges = [];
precedenceState.isAcceptState = false;
precedenceState.requiresFullContext = false;

View File

@ -69,7 +69,7 @@ class ParserInterpreter(Parser):
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
if state.precedenceRuleDecision:
if state.isPrecedenceDecision:
self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)

View File

@ -368,7 +368,7 @@ class ATNDeserializer (object):
#
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the
# the {@link StarLoopEntryState#isPrecedenceDecision} field to the
# correct value.
#
# @param atn The ATN.
@ -387,7 +387,7 @@ class ATNDeserializer (object):
if isinstance(maybeLoopEndState, LoopEndState):
if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
state.precedenceRuleDecision = True
state.isPrecedenceDecision = True
def verifyATN(self, atn):
if not self.deserializationOptions.verifyATN:

View File

@ -262,7 +262,7 @@ class StarLoopEntryState(DecisionState):
self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
self.precedenceRuleDecision = None
self.isPrecedenceDecision = None
# Mark the end of a * or + loop.
class LoopEndState(ATNState):

View File

@ -266,7 +266,7 @@ from antlr4.atn.ATN import ATN
from antlr4.atn.ATNConfig import ATNConfig
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNSimulator import ATNSimulator
from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState
from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState, ATNState
from antlr4.atn.PredictionMode import PredictionMode
from antlr4.atn.SemanticContext import SemanticContext, AND, andContext, orContext
from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition
@ -341,15 +341,6 @@ class ParserATNSimulator(ATNSimulator):
" exec LA(1)==" + self.getLookaheadName(input) +
", outerContext=" + outerContext.toString(self.parser.literalNames, None))
# If this is not a precedence DFA, we check the ATN start state
# to determine if this ATN start state is the decision for the
# closure block that determines whether a precedence rule
# should continue or complete.
#
if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
if dfa.atnStartState.precedenceRuleDecision:
dfa.setPrecedenceDfa(True)
fullCtx = False
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
@ -360,6 +351,7 @@ class ParserATNSimulator(ATNSimulator):
# appropriate start state for the precedence level rather
# than simply setting DFA.s0.
#
dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
s0_closure = self.applyPrecedenceFilter(s0_closure)
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
@ -1168,7 +1160,13 @@ class ParserATNSimulator(ATNSimulator):
# make sure to not return here, because EOF transitions can act as
# both epsilon transitions and non-epsilon transitions.
first = True
for t in p.transitions:
if first:
first = False
if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
continue
continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
if c is not None:
@ -1205,6 +1203,161 @@ class ParserATNSimulator(ATNSimulator):
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
# Implements first-edge (loop entry) elimination as an optimization
# during closure operations. See antlr/antlr4#1398.
#
# The optimization is to avoid adding the loop entry config when
# the exit path can only lead back to the same
# StarLoopEntryState after popping context at the rule end state
# (traversing only epsilon edges, so we're still in closure, in
# this same rule).
#
# We need to detect any state that can reach loop entry on
# epsilon w/o exiting rule. We don't have to look at FOLLOW
# links, just ensure that all stack tops for config refer to key
# states in LR rule.
#
# To verify we are in the right situation we must first check
# closure is at a StarLoopEntryState generated during LR removal.
# Then we check that each stack top of context is a return state
# from one of these cases:
#
# 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
# 2. expr op expr. The return state is the block end of internal block of (...)*
# 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
# That state points at block end of internal block of (...)*.
# 4. expr '?' expr ':' expr. The return state points at block end,
# which points at loop entry state.
#
# If any is true for each stack top, then closure does not add a
# config to the current config set for edge[0], the loop entry branch.
#
# Conditions fail if any context for the current config is:
#
# a. empty (we'd fall out of expr to do a global FOLLOW which could
# even be to some weird spot in expr) or,
# b. lies outside of expr or,
# c. lies within expr but at a state not the BlockEndState
# generated during LR removal
#
# Do we need to evaluate predicates ever in closure for this case?
#
# No. Predicates, including precedence predicates, are only
# evaluated when computing a DFA start state. I.e., only before
# the lookahead (but not parser) consumes a token.
#
# There are no epsilon edges allowed in LR rule alt blocks or in
# the "primary" part (ID here). If closure is in
# StarLoopEntryState any lookahead operation will have consumed a
# token as there are no epsilon-paths that lead to
# StarLoopEntryState. We do not have to evaluate predicates
# therefore if we are in the generated StarLoopEntryState of a LR
# rule. Note that when making a prediction starting at that
# decision point, decision d=2, compute-start-state performs
# closure starting at edges[0], edges[1] emanating from
# StarLoopEntryState. That means it is not performing closure on
# StarLoopEntryState during compute-start-state.
#
# How do we know this always gives same prediction answer?
#
# Without predicates, loop entry and exit paths are ambiguous
# upon remaining input +b (in, say, a+b). Either paths lead to
# valid parses. Closure can lead to consuming + immediately or by
# falling out of this call to expr back into expr and loop back
# again to StarLoopEntryState to match +b. In this special case,
# we choose the more efficient path, which is to take the bypass
# path.
#
# The lookahead language has not changed because closure chooses
# one path over the other. Both paths lead to consuming the same
# remaining input during a lookahead operation. If the next token
# is an operator, lookahead will enter the choice block with
# operators. If it is not, lookahead will exit expr. Same as if
# closure had chosen to enter the choice block immediately.
#
# Closure is examining one config (some loopentrystate, some alt,
# context) which means it is considering exactly one alt. Closure
# always copies the same alt to any derived configs.
#
# How do we know this optimization doesn't mess up precedence in
# our parse trees?
#
# Looking through expr from left edge of stat only has to confirm
# that an input, say, a+b+c; begins with any valid interpretation
# of an expression. The precedence actually doesn't matter when
# making a decision in stat seeing through expr. It is only when
# parsing rule expr that we must use the precedence to get the
# right interpretation and, hence, parse tree.
#
# @since 4.6
#
def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
# return False
p = config.state
# First check to see if we are in StarLoopEntryState generated during
# left-recursion elimination. For efficiency, also check if
# the context has an empty stack case. If so, it would mean
# global FOLLOW so we can't perform optimization
# Are we the special loop entry/exit state? or SLL wildcard
if p.stateType != ATNState.STAR_LOOP_ENTRY \
or not p.isPrecedenceDecision \
or config.context.isEmpty() \
or config.context.hasEmptyPath():
return False
# Require all return states to return back to the same rule
# that p is in.
numCtxs = len(config.context)
for i in range(0, numCtxs): # for each stack context
returnState = self.atn.states[config.context.getReturnState(i)]
if returnState.ruleIndex != p.ruleIndex:
return False
decisionStartState = p.transitions[0].target
blockEndStateNum = decisionStartState.endState.stateNumber
blockEndState = self.atn.states[blockEndStateNum]
# Verify that the top of each stack context leads to loop entry/exit
# state through epsilon edges and w/o leaving rule.
for i in range(0, numCtxs): # for each stack context
returnStateNumber = config.context.getReturnState(i)
returnState = self.atn.states[returnStateNumber]
# all states must have single outgoing epsilon edge
if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
return False
# Look for prefix op case like 'not expr', (' type ')' expr
returnStateTarget = returnState.transitions[0].target
if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
continue
# Look for 'expr op expr' or case where expr's return state is block end
# of (...)* internal block; the block end points to loop back
# which points to p but we don't need to check that
if returnState is blockEndState:
continue
# Look for ternary expr ? expr : expr. The return state points at block end,
# which points at loop entry state
if returnStateTarget is blockEndState:
continue
# Look for complex prefix 'between expr and expr' case where 2nd expr's
# return state points at block end state of (...)* internal block
if returnStateTarget.stateType == ATNState.BLOCK_END \
and len(returnStateTarget.transitions) == 1 \
and returnStateTarget.transitions[0].isEpsilon \
and returnStateTarget.transitions[0].target is p:
continue
# anything else ain't conforming
return False
return True
def getRuleName(self, index):
if self.parser is not None and index>=0:
return self.parser.ruleNames[index]

View File

@ -27,6 +27,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from antlr4.atn.ATNState import StarLoopEntryState
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.dfa.DFAState import DFAState
@ -48,6 +49,15 @@ class DFA(object):
# {@link #setPrecedenceDfa}.
self.precedenceDfa = False
if isinstance(atnStartState, StarLoopEntryState):
if atnStartState.isPrecedenceDecision:
self.precedenceDfa = True
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False
self.s0 = precedenceState
# Get the start state for a specific precedence value.
#
@ -111,7 +121,7 @@ class DFA(object):
if self.precedenceDfa != precedenceDfa:
self._states = dict()
if precedenceDfa:
precedenceState = DFAState(ATNConfigSet())
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False

View File

@ -71,7 +71,7 @@ class ParserInterpreter(Parser):
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
if state.precedenceRuleDecision:
if state.isPrecedenceDecision:
self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)

View File

@ -368,7 +368,7 @@ class ATNDeserializer (object):
#
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the
# the {@link StarLoopEntryState#isPrecedenceDecision} field to the
# correct value.
#
# @param atn The ATN.
@ -387,7 +387,7 @@ class ATNDeserializer (object):
if isinstance(maybeLoopEndState, LoopEndState):
if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
state.precedenceRuleDecision = True
state.isPrecedenceDecision = True
def verifyATN(self, atn:ATN):
if not self.deserializationOptions.verifyATN:

View File

@ -37,7 +37,7 @@ from antlr4.dfa.DFAState import DFAState
class ATNSimulator(object):
# Must distinguish between missing edge and edge we know leads nowhere#/
ERROR = DFAState(ATNConfigSet())
ERROR = DFAState(configs=ATNConfigSet())
ERROR.stateNumber = 0x7FFFFFFF
# The context cache maps all PredictionContext objects that are ==

View File

@ -261,7 +261,7 @@ class StarLoopEntryState(DecisionState):
self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
self.precedenceRuleDecision = None
self.isPrecedenceDecision = None
# Mark the end of a * or + loop.
class LoopEndState(ATNState):

View File

@ -346,15 +346,6 @@ class ParserATNSimulator(ATNSimulator):
" exec LA(1)==" + self.getLookaheadName(input) +
", outerContext=" + outerContext.toString(self.parser.literalNames, None))
# If this is not a precedence DFA, we check the ATN start state
# to determine if this ATN start state is the decision for the
# closure block that determines whether a precedence rule
# should continue or complete.
#
if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState):
if dfa.atnStartState.precedenceRuleDecision:
dfa.setPrecedenceDfa(True)
fullCtx = False
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
@ -365,6 +356,7 @@ class ParserATNSimulator(ATNSimulator):
# appropriate start state for the precedence level rather
# than simply setting DFA.s0.
#
dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway
s0_closure = self.applyPrecedenceFilter(s0_closure)
s0 = self.addDFAState(dfa, DFAState(configs=s0_closure))
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
@ -1173,7 +1165,13 @@ class ParserATNSimulator(ATNSimulator):
# make sure to not return here, because EOF transitions can act as
# both epsilon transitions and non-epsilon transitions.
first = True
for t in p.transitions:
if first:
first = False
if self.canDropLoopEntryEdgeInLeftRecursiveRule(config):
continue
continueCollecting = collectPredicates and not isinstance(t, ActionTransition)
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
if c is not None:
@ -1210,6 +1208,161 @@ class ParserATNSimulator(ATNSimulator):
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
# Implements first-edge (loop entry) elimination as an optimization
# during closure operations. See antlr/antlr4#1398.
#
# The optimization is to avoid adding the loop entry config when
# the exit path can only lead back to the same
# StarLoopEntryState after popping context at the rule end state
# (traversing only epsilon edges, so we're still in closure, in
# this same rule).
#
# We need to detect any state that can reach loop entry on
# epsilon w/o exiting rule. We don't have to look at FOLLOW
# links, just ensure that all stack tops for config refer to key
# states in LR rule.
#
# To verify we are in the right situation we must first check
# closure is at a StarLoopEntryState generated during LR removal.
# Then we check that each stack top of context is a return state
# from one of these cases:
#
# 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
# 2. expr op expr. The return state is the block end of internal block of (...)*
# 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
# That state points at block end of internal block of (...)*.
# 4. expr '?' expr ':' expr. The return state points at block end,
# which points at loop entry state.
#
# If any is true for each stack top, then closure does not add a
# config to the current config set for edge[0], the loop entry branch.
#
# Conditions fail if any context for the current config is:
#
# a. empty (we'd fall out of expr to do a global FOLLOW which could
# even be to some weird spot in expr) or,
# b. lies outside of expr or,
# c. lies within expr but at a state not the BlockEndState
# generated during LR removal
#
# Do we need to evaluate predicates ever in closure for this case?
#
# No. Predicates, including precedence predicates, are only
# evaluated when computing a DFA start state. I.e., only before
# the lookahead (but not parser) consumes a token.
#
# There are no epsilon edges allowed in LR rule alt blocks or in
# the "primary" part (ID here). If closure is in
# StarLoopEntryState any lookahead operation will have consumed a
# token as there are no epsilon-paths that lead to
# StarLoopEntryState. We do not have to evaluate predicates
# therefore if we are in the generated StarLoopEntryState of a LR
# rule. Note that when making a prediction starting at that
# decision point, decision d=2, compute-start-state performs
# closure starting at edges[0], edges[1] emanating from
# StarLoopEntryState. That means it is not performing closure on
# StarLoopEntryState during compute-start-state.
#
# How do we know this always gives same prediction answer?
#
# Without predicates, loop entry and exit paths are ambiguous
# upon remaining input +b (in, say, a+b). Either paths lead to
# valid parses. Closure can lead to consuming + immediately or by
# falling out of this call to expr back into expr and loop back
# again to StarLoopEntryState to match +b. In this special case,
# we choose the more efficient path, which is to take the bypass
# path.
#
# The lookahead language has not changed because closure chooses
# one path over the other. Both paths lead to consuming the same
# remaining input during a lookahead operation. If the next token
# is an operator, lookahead will enter the choice block with
# operators. If it is not, lookahead will exit expr. Same as if
# closure had chosen to enter the choice block immediately.
#
# Closure is examining one config (some loopentrystate, some alt,
# context) which means it is considering exactly one alt. Closure
# always copies the same alt to any derived configs.
#
# How do we know this optimization doesn't mess up precedence in
# our parse trees?
#
# Looking through expr from left edge of stat only has to confirm
# that an input, say, a+b+c; begins with any valid interpretation
# of an expression. The precedence actually doesn't matter when
# making a decision in stat seeing through expr. It is only when
# parsing rule expr that we must use the precedence to get the
# right interpretation and, hence, parse tree.
#
# @since 4.6
#
def canDropLoopEntryEdgeInLeftRecursiveRule(self, config):
# return False
p = config.state
# First check to see if we are in StarLoopEntryState generated during
# left-recursion elimination. For efficiency, also check if
# the context has an empty stack case. If so, it would mean
# global FOLLOW so we can't perform optimization
# Are we the special loop entry/exit state? or SLL wildcard
if p.stateType != ATNState.STAR_LOOP_ENTRY \
or not p.isPrecedenceDecision \
or config.context.isEmpty() \
or config.context.hasEmptyPath():
return False
# Require all return states to return back to the same rule
# that p is in.
numCtxs = len(config.context)
for i in range(0, numCtxs): # for each stack context
returnState = self.atn.states[config.context.getReturnState(i)]
if returnState.ruleIndex != p.ruleIndex:
return False
decisionStartState = p.transitions[0].target
blockEndStateNum = decisionStartState.endState.stateNumber
blockEndState = self.atn.states[blockEndStateNum]
# Verify that the top of each stack context leads to loop entry/exit
# state through epsilon edges and w/o leaving rule.
for i in range(0, numCtxs): # for each stack context
returnStateNumber = config.context.getReturnState(i)
returnState = self.atn.states[returnStateNumber]
# all states must have single outgoing epsilon edge
if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon:
return False
# Look for prefix op case like 'not expr', (' type ')' expr
returnStateTarget = returnState.transitions[0].target
if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p:
continue
# Look for 'expr op expr' or case where expr's return state is block end
# of (...)* internal block; the block end points to loop back
# which points to p but we don't need to check that
if returnState is blockEndState:
continue
# Look for ternary expr ? expr : expr. The return state points at block end,
# which points at loop entry state
if returnStateTarget is blockEndState:
continue
# Look for complex prefix 'between expr and expr' case where 2nd expr's
# return state points at block end state of (...)* internal block
if returnStateTarget.stateType == ATNState.BLOCK_END \
and len(returnStateTarget.transitions) == 1 \
and returnStateTarget.transitions[0].isEpsilon \
and returnStateTarget.transitions[0].target is p:
continue
# anything else ain't conforming
return False
return True
def getRuleName(self, index:int):
if self.parser is not None and index>=0:
return self.parser.ruleNames[index]

View File

@ -27,6 +27,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from antlr4.atn.ATNState import StarLoopEntryState
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNState import DecisionState
@ -49,6 +50,15 @@ class DFA(object):
# {@link #setPrecedenceDfa}.
self.precedenceDfa = False
if isinstance(atnStartState, StarLoopEntryState):
if atnStartState.isPrecedenceDecision:
self.precedenceDfa = True
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False
self.s0 = precedenceState
# Get the start state for a specific precedence value.
#
@ -112,7 +122,7 @@ class DFA(object):
if self.precedenceDfa != precedenceDfa:
self._states = dict()
if precedenceDfa:
precedenceState = DFAState(ATNConfigSet())
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False