Fixes #994 poor performance in left-recursive expressions with very large expression input phrases; builds off of @sharwell solution that explicitly checks for key return states in expr rules
This commit is contained in:
parent
fca5e458d3
commit
c182e3d5bf
|
@ -33,7 +33,6 @@ public class PerformanceDescriptors {
|
|||
*/
|
||||
@CommentHasStringValue
|
||||
public String grammar;
|
||||
|
||||
}
|
||||
|
||||
public static class ExpressionGrammar_1 extends ExpressionGrammar {
|
||||
|
@ -72,4 +71,158 @@ public class PerformanceDescriptors {
|
|||
@CommentHasStringValue
|
||||
public String input;
|
||||
}
|
||||
|
||||
/** Test for https://github.com/antlr/antlr4/issues/1398.
|
||||
* Seeing through a large expression takes 5 _minutes_ on
|
||||
* my fast box to complete. After fix, it's instantaneous.
|
||||
*/
|
||||
public static abstract class DropLoopEntryBranchInLRRule extends BaseParserTestDescriptor {
|
||||
public String grammarName = "Expr";
|
||||
public String startRule = "stat";
|
||||
|
||||
/**
|
||||
grammar Expr;
|
||||
|
||||
stat : expr ';'
|
||||
| expr '.'
|
||||
;
|
||||
|
||||
expr
|
||||
: ID
|
||||
| 'not' expr
|
||||
| expr 'and' expr
|
||||
| expr 'or' expr
|
||||
| '(' ID ')' expr
|
||||
| expr '?' expr ':' expr
|
||||
| 'between' expr 'and' expr
|
||||
;
|
||||
|
||||
ID: [a-zA-Z_][a-zA-Z_0-9]*;
|
||||
WS: [ \t\n\r\f]+ -> skip;
|
||||
*/
|
||||
@CommentHasStringValue
|
||||
public String grammar;
|
||||
|
||||
@Override
|
||||
public boolean ignore(String targetName) {
|
||||
return !targetName.equals("Java");
|
||||
}
|
||||
}
|
||||
|
||||
public static class DropLoopEntryBranchInLRRule_1 extends DropLoopEntryBranchInLRRule {
|
||||
/**
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7
|
||||
;
|
||||
*/
|
||||
@CommentHasStringValue
|
||||
public String input;
|
||||
}
|
||||
|
||||
public static class DropLoopEntryBranchInLRRule_2 extends DropLoopEntryBranchInLRRule {
|
||||
/**
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7 or
|
||||
X1 and X2 and X3 and X4 and X5 and X6 and X7
|
||||
.
|
||||
*/ // Different in final token
|
||||
@CommentHasStringValue
|
||||
public String input;
|
||||
}
|
||||
|
||||
public static class DropLoopEntryBranchInLRRule_3 extends DropLoopEntryBranchInLRRule {
|
||||
/**
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7 or
|
||||
not X1 and not X2 and not X3 and not X4 and not X5 and not X6 and not X7
|
||||
;
|
||||
*/
|
||||
@CommentHasStringValue
|
||||
public String input;
|
||||
}
|
||||
|
||||
public static class DropLoopEntryBranchInLRRule_4 extends DropLoopEntryBranchInLRRule {
|
||||
/**
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4 and
|
||||
between X1 and X2 or between X3 and X4
|
||||
;
|
||||
*/
|
||||
@CommentHasStringValue
|
||||
public String input;
|
||||
}
|
||||
|
||||
public static class DropLoopEntryBranchInLRRule_5 extends DropLoopEntryBranchInLRRule {
|
||||
/**
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z or
|
||||
X ? Y : Z
|
||||
;
|
||||
*/
|
||||
@CommentHasStringValue
|
||||
public String input;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,6 +58,8 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.antlr.v4.runtime.atn.ATNState.BLOCK_END;
|
||||
|
||||
/**
|
||||
* The embodiment of the adaptive LL(*), ALL(*), parsing strategy.
|
||||
*
|
||||
|
@ -291,6 +293,8 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
public static final boolean dfa_debug = false;
|
||||
public static final boolean retry_debug = false;
|
||||
|
||||
public static final boolean TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = Boolean.parseBoolean(System.getProperty("antlr.no_lr_loop_entry_opt"));
|
||||
|
||||
protected final Parser parser;
|
||||
|
||||
public final DFA[] decisionToDFA;
|
||||
|
@ -788,7 +792,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
protected ATNConfigSet computeReachSet(ATNConfigSet closure, int t,
|
||||
boolean fullCtx)
|
||||
{
|
||||
if ( debug )
|
||||
if ( debug )
|
||||
System.out.println("in computeReachSet, starting closure: " + closure);
|
||||
|
||||
if (mergeCache == null) {
|
||||
|
@ -1552,59 +1556,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
}
|
||||
|
||||
for (int i=0; i<p.getNumberOfTransitions(); i++) {
|
||||
// This block implements first-edge elimination of ambiguous LR
|
||||
// alternatives as part of dynamic disambiguation during prediction.
|
||||
// See antlr/antlr4#1398.
|
||||
if (i == 0
|
||||
&& p.getStateType() == ATNState.STAR_LOOP_ENTRY
|
||||
&& ((StarLoopEntryState)p).isPrecedenceDecision
|
||||
&& !config.context.hasEmptyPath()) {
|
||||
|
||||
// When suppress is true, it means the outgoing edge i==0 is
|
||||
// ambiguous with the outgoing edge i==1, and thus the closure
|
||||
// operation can dynamically disambiguate by suppressing this
|
||||
// edge during the closure operation.
|
||||
boolean suppress = true;
|
||||
|
||||
// Require all return states to return back to the same rule
|
||||
// that p is in.
|
||||
int limit = config.context.size();
|
||||
for (int j = 0; j < limit; j++) {
|
||||
ATNState returnState = atn.states.get(config.context.getReturnState(j));
|
||||
suppress = suppress && returnState.ruleIndex == p.ruleIndex;
|
||||
}
|
||||
|
||||
// Further check to make sure this isn't a 0-precedence entry.
|
||||
// See antlr/antlr4#679.
|
||||
if (suppress) {
|
||||
RuleStopState ruleStopState = atn.ruleToStopState[p.ruleIndex];
|
||||
for (int j = 0; j < limit; j++) {
|
||||
for (Transition transition : ruleStopState.transitions) {
|
||||
if (transition.getSerializationType() != Transition.EPSILON) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((EpsilonTransition)transition).outermostPrecedenceReturn() != p.ruleIndex) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int returnStateNumber = config.context.getReturnState(j);
|
||||
suppress = returnStateNumber != transition.target.stateNumber;
|
||||
if (!suppress) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!suppress) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (suppress) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if ( i==0 && canDropLoopEntryEdgeInLeftRecursiveRule(config) ) continue;
|
||||
|
||||
Transition t = p.transition(i);
|
||||
boolean continueCollecting =
|
||||
|
@ -1657,6 +1609,163 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
}
|
||||
}
|
||||
|
||||
/** Implements first-edge (loop entry) elimination as an optimization
|
||||
* during closure operations. See antlr/antlr4#1398.
|
||||
*
|
||||
* The optimization is to avoid adding the loop entry config when
|
||||
* the exit path can only lead back to the same
|
||||
* StarLoopEntryState after popping context at the rule end state
|
||||
* (traversing only epsilon edges, so we're still in closure, in
|
||||
* this same rule).
|
||||
*
|
||||
* We need to detect any state that can reach loop entry on
|
||||
* epsilon w/o exiting rule. We don't have to look at FOLLOW
|
||||
* links, just ensure that all stack tops for config refer to key
|
||||
* states in LR rule.
|
||||
*
|
||||
* To verify we are in the right situation we must first check
|
||||
* closure is at a StarLoopEntryState generated during LR removal.
|
||||
* Then we check that each stack top of context is a return state
|
||||
* from one of these cases:
|
||||
*
|
||||
* 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
|
||||
* 2. expr op expr. The return state is the block end of internal block of (...)*
|
||||
* 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
|
||||
* That state points at block end of internal block of (...)*.
|
||||
* 4. expr '?' expr ':' expr. The return state points at block end,
|
||||
* which points at loop entry state.
|
||||
*
|
||||
* If any is true for each stack top, then closure does not add a
|
||||
* config to the current config set for edge[0], the loop entry branch.
|
||||
*
|
||||
* Conditions fail if any context for the current config is:
|
||||
*
|
||||
* a. empty (we'd fall out of expr to do a global FOLLOW which could
|
||||
* even be to some weird spot in expr) or,
|
||||
* b. lies outside of expr or,
|
||||
* c. lies within expr but at a state not the BlockEndState
|
||||
* generated during LR removal
|
||||
*
|
||||
* Do we need to evaluate predicates ever in closure for this case?
|
||||
*
|
||||
* No. Predicates, including precedence predicates, are only
|
||||
* evaluated when computing a DFA start state. I.e., only before
|
||||
* the lookahead (but not parser) consumes a token.
|
||||
*
|
||||
* There are no epsilon edges allowed in LR rule alt blocks or in
|
||||
* the "primary" part (ID here). If closure is in
|
||||
* StarLoopEntryState any lookahead operation will have consumed a
|
||||
* token as there are no epsilon-paths that lead to
|
||||
* StarLoopEntryState. We do not have to evaluate predicates
|
||||
* therefore if we are in the generated StarLoopEntryState of a LR
|
||||
* rule. Note that when making a prediction starting at that
|
||||
* decision point, decision d=2, compute-start-state performs
|
||||
* closure starting at edges[0], edges[1] emanating from
|
||||
* StarLoopEntryState. That means it is not performing closure on
|
||||
* StarLoopEntryState during compute-start-state.
|
||||
*
|
||||
* How do we know this always gives same prediction answer?
|
||||
*
|
||||
* Without predicates, loop entry and exit paths are ambiguous
|
||||
* upon remaining input +b (in, say, a+b). Either paths lead to
|
||||
* valid parses. Closure can lead to consuming + immediately or by
|
||||
* falling out of this call to expr back into expr and loop back
|
||||
* again to StarLoopEntryState to match +b. In this special case,
|
||||
* we choose the more efficient path, which is to take the bypass
|
||||
* path.
|
||||
*
|
||||
* The lookahead language has not changed because closure chooses
|
||||
* one path over the other. Both paths lead to consuming the same
|
||||
* remaining input during a lookahead operation. If the next token
|
||||
* is an operator, lookahead will enter the choice block with
|
||||
* operators. If it is not, lookahead will exit expr. Same as if
|
||||
* closure had chosen to enter the choice block immediately.
|
||||
*
|
||||
* Closure is examining one config (some loopentrystate, some alt,
|
||||
* context) which means it is considering exactly one alt. Closure
|
||||
* always copies the same alt to any derived configs.
|
||||
*
|
||||
* How do we know this optimization doesn't mess up precedence in
|
||||
* our parse trees?
|
||||
*
|
||||
* Looking through expr from left edge of stat only has to confirm
|
||||
* that an input, say, a+b+c; begins with any valid interpretation
|
||||
* of an expression. The precedence actually doesn't matter when
|
||||
* making a decision in stat seeing through expr. It is only when
|
||||
* parsing rule expr that we must use the precedence to get the
|
||||
* right interpretation and, hence, parse tree.
|
||||
*/
|
||||
protected boolean canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig config) {
|
||||
if ( TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT ) return false;
|
||||
ATNState p = config.state;
|
||||
// First check to see if we are in StarLoopEntryState generated during
|
||||
// left-recursion elimination. For efficiency, also check if
|
||||
// the context has an empty stack case. If so, it would mean
|
||||
// global FOLLOW so we can't perform optimization
|
||||
if ( p.getStateType() != ATNState.STAR_LOOP_ENTRY ||
|
||||
!((StarLoopEntryState)p).isPrecedenceDecision || // Are we the special loop entry/exit state?
|
||||
config.context.isEmpty() || // If SLL wildcard
|
||||
config.context.hasEmptyPath())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Require all return states to return back to the same rule
|
||||
// that p is in.
|
||||
int numCtxs = config.context.size();
|
||||
for (int i = 0; i < numCtxs; i++) { // for each stack context
|
||||
ATNState returnState = atn.states.get(config.context.getReturnState(i));
|
||||
if ( returnState.ruleIndex != p.ruleIndex ) return false;
|
||||
}
|
||||
|
||||
BlockStartState decisionStartState = (BlockStartState)p.transition(0).target;
|
||||
int blockEndStateNum = decisionStartState.endState.stateNumber;
|
||||
BlockEndState blockEndState = (BlockEndState)atn.states.get(blockEndStateNum);
|
||||
|
||||
// Verify that the top of each stack context leads to loop entry/exit
|
||||
// state through epsilon edges and w/o leaving rule.
|
||||
for (int i = 0; i < numCtxs; i++) { // for each stack context
|
||||
int returnStateNumber = config.context.getReturnState(i);
|
||||
ATNState returnState = atn.states.get(returnStateNumber);
|
||||
// all states must have single outgoing epsilon edge
|
||||
if ( returnState.getNumberOfTransitions()!=1 ||
|
||||
!returnState.transition(0).isEpsilon() )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Look for prefix op case like 'not expr', (' type ')' expr
|
||||
ATNState returnStateTarget = returnState.transition(0).target;
|
||||
if ( returnState.getStateType()==BLOCK_END && returnStateTarget==p ) {
|
||||
continue;
|
||||
}
|
||||
// Look for 'expr op expr' or case where expr's return state is block end
|
||||
// of (...)* internal block; the block end points to loop back
|
||||
// which points to p but we don't need to check that
|
||||
if ( returnState==blockEndState ) {
|
||||
continue;
|
||||
}
|
||||
// Look for ternary expr ? expr : expr. The return state points at block end,
|
||||
// which points at loop entry state
|
||||
if ( returnStateTarget==blockEndState ) {
|
||||
continue;
|
||||
}
|
||||
// Look for complex prefix 'between expr and expr' case where 2nd expr's
|
||||
// return state points at block end state of (...)* internal block
|
||||
if ( returnStateTarget.getStateType() == BLOCK_END &&
|
||||
returnStateTarget.getNumberOfTransitions()==1 &&
|
||||
returnStateTarget.transition(0).isEpsilon() &&
|
||||
returnStateTarget.transition(0).target == p )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// anything else ain't conforming
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public String getRuleName(int index) {
|
||||
if ( parser!=null && index>=0 ) return parser.getRuleNames()[index];
|
||||
|
@ -2090,4 +2199,4 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
public Parser getParser() {
|
||||
return parser;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -117,12 +117,13 @@ public abstract class PredictionContext {
|
|||
|
||||
public abstract int getReturnState(int index);
|
||||
|
||||
/** This means only the {@link #EMPTY} context is in set. */
|
||||
/** This means only the {@link #EMPTY} (wildcard? not sure) context is in set. */
|
||||
public boolean isEmpty() {
|
||||
return this == EMPTY;
|
||||
}
|
||||
|
||||
public boolean hasEmptyPath() {
|
||||
// since EMPTY_RETURN_STATE can only appear in the last position, we check last one
|
||||
return getReturnState(size() - 1) == EMPTY_RETURN_STATE;
|
||||
}
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ binary
|
|||
|
||||
prefix
|
||||
: ^( ALT elementOptions?
|
||||
({!((CommonTree)input.LT(1)).getText().equals(ruleName)}? element)+
|
||||
element+
|
||||
recurse epsilonElement*
|
||||
)
|
||||
{setAltAssoc((AltAST)$ALT,currentOuterAltNumber);}
|
||||
|
|
Loading…
Reference in New Issue