diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java index cec4c94d7..d44fdcd3a 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java @@ -113,7 +113,7 @@ public class PerformanceDescriptors { @Override public boolean ignore(String targetName) { - return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp").contains(targetName); } } diff --git a/runtime/Cpp/runtime/src/atn/ParserATNSimulator.cpp b/runtime/Cpp/runtime/src/atn/ParserATNSimulator.cpp index 8041c3734..09cd2584d 100755 --- a/runtime/Cpp/runtime/src/atn/ParserATNSimulator.cpp +++ b/runtime/Cpp/runtime/src/atn/ParserATNSimulator.cpp @@ -21,6 +21,11 @@ #include "atn/RuleStopState.h" #include "atn/ATNConfigSet.h" #include "atn/ATNConfig.h" + +#include "atn/StarLoopEntryState.h" +#include "atn/BlockStartState.h" +#include "atn/BlockEndState.h" + #include "misc/Interval.h" #include "ANTLRErrorListener.h" @@ -39,6 +44,8 @@ using namespace antlr4::atn; using namespace antlrcpp; +const bool ParserATNSimulator::TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = ParserATNSimulator::getLrLoopSetting(); + ParserATNSimulator::ParserATNSimulator(const ATN &atn, std::vector &decisionToDFA, PredictionContextCache &sharedContextCache) : ParserATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { @@ -875,6 +882,9 @@ void ParserATNSimulator::closure_(Ref const& config, ATNConfigSet *co } for (size_t i = 0; i < p->transitions.size(); i++) { + if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config.get())) + continue; + Transition *t = p->transitions[i]; bool continueCollecting = !is(t) && collectPredicates; Ref c = getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon); @@ -932,6 +942,84 @@ void ParserATNSimulator::closure_(Ref const& config, ATNConfigSet *co } } +bool ParserATNSimulator::canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const { + if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT) + return false; + + ATNState *p = config->state; + + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if ( p->getStateType() != ATNState::STAR_LOOP_ENTRY || + !((StarLoopEntryState *)p)->isPrecedenceDecision || // Are we the special loop entry/exit state? + config->context->isEmpty() || // If SLL wildcard + config->context->hasEmptyPath()) + { + return false; + } + + // Require all return states to return back to the same rule + // that p is in. + size_t numCtxs = config->context->size(); + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + if (returnState->ruleIndex != p->ruleIndex) + return false; + } + + BlockStartState *decisionStartState = (BlockStartState *)p->transitions[0]->target; + size_t blockEndStateNum = decisionStartState->endState->stateNumber; + BlockEndState *blockEndState = (BlockEndState *)atn.states[blockEndStateNum]; + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + size_t returnStateNumber = config->context->getReturnState(i); + ATNState *returnState = atn.states[returnStateNumber]; + // All states must have single outgoing epsilon edge. + if (returnState->transitions.size() != 1 || !returnState->transitions[0]->isEpsilon()) + { + return false; + } + + // Look for prefix op case like 'not expr', (' type ')' expr + ATNState *returnStateTarget = returnState->transitions[0]->target; + if (returnState->getStateType() == ATNState::BLOCK_END && returnStateTarget == p) { + continue; + } + + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if (returnState == blockEndState) { + continue; + } + + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if (returnStateTarget == blockEndState) { + continue; + } + + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if (returnStateTarget->getStateType() == ATNState::BLOCK_END && + returnStateTarget->transitions.size() == 1 && + returnStateTarget->transitions[0]->isEpsilon() && + returnStateTarget->transitions[0]->target == p) + { + continue; + } + + // Anything else ain't conforming. + return false; + } + + return true; +} + std::string ParserATNSimulator::getRuleName(size_t index) { if (parser != nullptr) { return parser->getRuleNames()[index]; @@ -1253,6 +1341,14 @@ Parser* ParserATNSimulator::getParser() { return parser; } +bool ParserATNSimulator::getLrLoopSetting() { + char *var = std::getenv("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"); + if (var == nullptr) + return false; + std::string value(var); + return value == "true" || value == "1"; +} + void ParserATNSimulator::InitializeInstanceFields() { mode = PredictionMode::LL; _startIndex = 0; diff --git a/runtime/Cpp/runtime/src/atn/ParserATNSimulator.h b/runtime/Cpp/runtime/src/atn/ParserATNSimulator.h index cfa65f17b..e501077f4 100755 --- a/runtime/Cpp/runtime/src/atn/ParserATNSimulator.h +++ b/runtime/Cpp/runtime/src/atn/ParserATNSimulator.h @@ -247,6 +247,8 @@ namespace atn { Parser *const parser; public: + static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT; + std::vector &decisionToDFA; /// @@ -676,6 +678,93 @@ namespace atn { bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); public: + /** Implements first-edge (loop entry) elimination as an optimization + * during closure operations. See antlr/antlr4#1398. + * + * The optimization is to avoid adding the loop entry config when + * the exit path can only lead back to the same + * StarLoopEntryState after popping context at the rule end state + * (traversing only epsilon edges, so we're still in closure, in + * this same rule). + * + * We need to detect any state that can reach loop entry on + * epsilon w/o exiting rule. We don't have to look at FOLLOW + * links, just ensure that all stack tops for config refer to key + * states in LR rule. + * + * To verify we are in the right situation we must first check + * closure is at a StarLoopEntryState generated during LR removal. + * Then we check that each stack top of context is a return state + * from one of these cases: + * + * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + * 2. expr op expr. The return state is the block end of internal block of (...)* + * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + * That state points at block end of internal block of (...)*. + * 4. expr '?' expr ':' expr. The return state points at block end, + * which points at loop entry state. + * + * If any is true for each stack top, then closure does not add a + * config to the current config set for edge[0], the loop entry branch. + * + * Conditions fail if any context for the current config is: + * + * a. empty (we'd fall out of expr to do a global FOLLOW which could + * even be to some weird spot in expr) or, + * b. lies outside of expr or, + * c. lies within expr but at a state not the BlockEndState + * generated during LR removal + * + * Do we need to evaluate predicates ever in closure for this case? + * + * No. Predicates, including precedence predicates, are only + * evaluated when computing a DFA start state. I.e., only before + * the lookahead (but not parser) consumes a token. + * + * There are no epsilon edges allowed in LR rule alt blocks or in + * the "primary" part (ID here). If closure is in + * StarLoopEntryState any lookahead operation will have consumed a + * token as there are no epsilon-paths that lead to + * StarLoopEntryState. We do not have to evaluate predicates + * therefore if we are in the generated StarLoopEntryState of a LR + * rule. Note that when making a prediction starting at that + * decision point, decision d=2, compute-start-state performs + * closure starting at edges[0], edges[1] emanating from + * StarLoopEntryState. That means it is not performing closure on + * StarLoopEntryState during compute-start-state. + * + * How do we know this always gives same prediction answer? + * + * Without predicates, loop entry and exit paths are ambiguous + * upon remaining input +b (in, say, a+b). Either paths lead to + * valid parses. Closure can lead to consuming + immediately or by + * falling out of this call to expr back into expr and loop back + * again to StarLoopEntryState to match +b. In this special case, + * we choose the more efficient path, which is to take the bypass + * path. + * + * The lookahead language has not changed because closure chooses + * one path over the other. Both paths lead to consuming the same + * remaining input during a lookahead operation. If the next token + * is an operator, lookahead will enter the choice block with + * operators. If it is not, lookahead will exit expr. Same as if + * closure had chosen to enter the choice block immediately. + * + * Closure is examining one config (some loopentrystate, some alt, + * context) which means it is considering exactly one alt. Closure + * always copies the same alt to any derived configs. + * + * How do we know this optimization doesn't mess up precedence in + * our parse trees? + * + * Looking through expr from left edge of stat only has to confirm + * that an input, say, a+b+c; begins with any valid interpretation + * of an expression. The precedence actually doesn't matter when + * making a decision in stat seeing through expr. It is only when + * parsing rule expr that we must use the precedence to get the + * right interpretation and, hence, parse tree. + */ + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const; virtual std::string getRuleName(size_t index); protected: @@ -819,6 +908,7 @@ namespace atn { Parser* getParser(); private: + static bool getLrLoopSetting(); void InitializeInstanceFields(); }; diff --git a/runtime/Cpp/runtime/src/atn/PredictionContext.cpp b/runtime/Cpp/runtime/src/atn/PredictionContext.cpp index ddb7516cf..df62ef73a 100755 --- a/runtime/Cpp/runtime/src/atn/PredictionContext.cpp +++ b/runtime/Cpp/runtime/src/atn/PredictionContext.cpp @@ -57,6 +57,7 @@ bool PredictionContext::isEmpty() const { } bool PredictionContext::hasEmptyPath() const { + // since EMPTY_RETURN_STATE can only appear in the last position, we check last one return getReturnState(size() - 1) == EMPTY_RETURN_STATE; } @@ -504,11 +505,12 @@ Ref PredictionContext::getCachedContext(const RefgetReturnState(0)); + contextCache.insert(updated); } else { updated = std::make_shared(parents, std::dynamic_pointer_cast(context)->returnStates); + contextCache.insert(updated); } - contextCache.insert(updated); visited[updated] = updated; visited[context] = updated; diff --git a/runtime/Cpp/runtime/src/atn/PredictionContext.h b/runtime/Cpp/runtime/src/atn/PredictionContext.h index f956c57c0..bde7f6d4d 100755 --- a/runtime/Cpp/runtime/src/atn/PredictionContext.h +++ b/runtime/Cpp/runtime/src/atn/PredictionContext.h @@ -78,7 +78,7 @@ namespace atn { virtual bool operator == (const PredictionContext &o) const = 0; virtual bool operator != (const PredictionContext &o) const; - /// This means only the EMPTY context is in set. + /// This means only the EMPTY (wildcard? not sure) context is in set. virtual bool isEmpty() const; virtual bool hasEmptyPath() const; virtual size_t hashCode() const;