forked from jasder/antlr
Initial implementation of the LR loop optimization in the C++ target.
There is an issue with the tests. Some hang forever. Need to investigate yet.
This commit is contained in:
parent
0ea8ede5b9
commit
1eeaf20aa0
|
@ -113,7 +113,7 @@ public class PerformanceDescriptors {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean ignore(String targetName) {
|
public boolean ignore(String targetName) {
|
||||||
return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node").contains(targetName);
|
return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp").contains(targetName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,11 @@
|
||||||
#include "atn/RuleStopState.h"
|
#include "atn/RuleStopState.h"
|
||||||
#include "atn/ATNConfigSet.h"
|
#include "atn/ATNConfigSet.h"
|
||||||
#include "atn/ATNConfig.h"
|
#include "atn/ATNConfig.h"
|
||||||
|
|
||||||
|
#include "atn/StarLoopEntryState.h"
|
||||||
|
#include "atn/BlockStartState.h"
|
||||||
|
#include "atn/BlockEndState.h"
|
||||||
|
|
||||||
#include "misc/Interval.h"
|
#include "misc/Interval.h"
|
||||||
#include "ANTLRErrorListener.h"
|
#include "ANTLRErrorListener.h"
|
||||||
|
|
||||||
|
@ -39,6 +44,8 @@ using namespace antlr4::atn;
|
||||||
|
|
||||||
using namespace antlrcpp;
|
using namespace antlrcpp;
|
||||||
|
|
||||||
|
const bool ParserATNSimulator::TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = ParserATNSimulator::getLrLoopSetting();
|
||||||
|
|
||||||
ParserATNSimulator::ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
|
ParserATNSimulator::ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
|
||||||
PredictionContextCache &sharedContextCache)
|
PredictionContextCache &sharedContextCache)
|
||||||
: ParserATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) {
|
: ParserATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) {
|
||||||
|
@ -875,6 +882,9 @@ void ParserATNSimulator::closure_(Ref<ATNConfig> const& config, ATNConfigSet *co
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < p->transitions.size(); i++) {
|
for (size_t i = 0; i < p->transitions.size(); i++) {
|
||||||
|
if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config.get()))
|
||||||
|
continue;
|
||||||
|
|
||||||
Transition *t = p->transitions[i];
|
Transition *t = p->transitions[i];
|
||||||
bool continueCollecting = !is<ActionTransition*>(t) && collectPredicates;
|
bool continueCollecting = !is<ActionTransition*>(t) && collectPredicates;
|
||||||
Ref<ATNConfig> c = getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon);
|
Ref<ATNConfig> c = getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon);
|
||||||
|
@ -932,6 +942,84 @@ void ParserATNSimulator::closure_(Ref<ATNConfig> const& config, ATNConfigSet *co
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ParserATNSimulator::canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const {
|
||||||
|
if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ATNState *p = config->state;
|
||||||
|
|
||||||
|
// First check to see if we are in StarLoopEntryState generated during
|
||||||
|
// left-recursion elimination. For efficiency, also check if
|
||||||
|
// the context has an empty stack case. If so, it would mean
|
||||||
|
// global FOLLOW so we can't perform optimization
|
||||||
|
if ( p->getStateType() != ATNState::STAR_LOOP_ENTRY ||
|
||||||
|
!((StarLoopEntryState *)p)->isPrecedenceDecision || // Are we the special loop entry/exit state?
|
||||||
|
config->context->isEmpty() || // If SLL wildcard
|
||||||
|
config->context->hasEmptyPath())
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Require all return states to return back to the same rule
|
||||||
|
// that p is in.
|
||||||
|
size_t numCtxs = config->context->size();
|
||||||
|
for (size_t i = 0; i < numCtxs; i++) { // for each stack context
|
||||||
|
ATNState *returnState = atn.states[config->context->getReturnState(i)];
|
||||||
|
if (returnState->ruleIndex != p->ruleIndex)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
BlockStartState *decisionStartState = (BlockStartState *)p->transitions[0]->target;
|
||||||
|
size_t blockEndStateNum = decisionStartState->endState->stateNumber;
|
||||||
|
BlockEndState *blockEndState = (BlockEndState *)atn.states[blockEndStateNum];
|
||||||
|
|
||||||
|
// Verify that the top of each stack context leads to loop entry/exit
|
||||||
|
// state through epsilon edges and w/o leaving rule.
|
||||||
|
for (size_t i = 0; i < numCtxs; i++) { // for each stack context
|
||||||
|
size_t returnStateNumber = config->context->getReturnState(i);
|
||||||
|
ATNState *returnState = atn.states[returnStateNumber];
|
||||||
|
// All states must have single outgoing epsilon edge.
|
||||||
|
if (returnState->transitions.size() != 1 || !returnState->transitions[0]->isEpsilon())
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for prefix op case like 'not expr', (' type ')' expr
|
||||||
|
ATNState *returnStateTarget = returnState->transitions[0]->target;
|
||||||
|
if (returnState->getStateType() == ATNState::BLOCK_END && returnStateTarget == p) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for 'expr op expr' or case where expr's return state is block end
|
||||||
|
// of (...)* internal block; the block end points to loop back
|
||||||
|
// which points to p but we don't need to check that
|
||||||
|
if (returnState == blockEndState) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for ternary expr ? expr : expr. The return state points at block end,
|
||||||
|
// which points at loop entry state
|
||||||
|
if (returnStateTarget == blockEndState) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for complex prefix 'between expr and expr' case where 2nd expr's
|
||||||
|
// return state points at block end state of (...)* internal block
|
||||||
|
if (returnStateTarget->getStateType() == ATNState::BLOCK_END &&
|
||||||
|
returnStateTarget->transitions.size() == 1 &&
|
||||||
|
returnStateTarget->transitions[0]->isEpsilon() &&
|
||||||
|
returnStateTarget->transitions[0]->target == p)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Anything else ain't conforming.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
std::string ParserATNSimulator::getRuleName(size_t index) {
|
std::string ParserATNSimulator::getRuleName(size_t index) {
|
||||||
if (parser != nullptr) {
|
if (parser != nullptr) {
|
||||||
return parser->getRuleNames()[index];
|
return parser->getRuleNames()[index];
|
||||||
|
@ -1253,6 +1341,14 @@ Parser* ParserATNSimulator::getParser() {
|
||||||
return parser;
|
return parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ParserATNSimulator::getLrLoopSetting() {
|
||||||
|
char *var = std::getenv("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT");
|
||||||
|
if (var == nullptr)
|
||||||
|
return false;
|
||||||
|
std::string value(var);
|
||||||
|
return value == "true" || value == "1";
|
||||||
|
}
|
||||||
|
|
||||||
void ParserATNSimulator::InitializeInstanceFields() {
|
void ParserATNSimulator::InitializeInstanceFields() {
|
||||||
mode = PredictionMode::LL;
|
mode = PredictionMode::LL;
|
||||||
_startIndex = 0;
|
_startIndex = 0;
|
||||||
|
|
|
@ -247,6 +247,8 @@ namespace atn {
|
||||||
Parser *const parser;
|
Parser *const parser;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT;
|
||||||
|
|
||||||
std::vector<dfa::DFA> &decisionToDFA;
|
std::vector<dfa::DFA> &decisionToDFA;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -676,6 +678,93 @@ namespace atn {
|
||||||
bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon);
|
bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/** Implements first-edge (loop entry) elimination as an optimization
|
||||||
|
* during closure operations. See antlr/antlr4#1398.
|
||||||
|
*
|
||||||
|
* The optimization is to avoid adding the loop entry config when
|
||||||
|
* the exit path can only lead back to the same
|
||||||
|
* StarLoopEntryState after popping context at the rule end state
|
||||||
|
* (traversing only epsilon edges, so we're still in closure, in
|
||||||
|
* this same rule).
|
||||||
|
*
|
||||||
|
* We need to detect any state that can reach loop entry on
|
||||||
|
* epsilon w/o exiting rule. We don't have to look at FOLLOW
|
||||||
|
* links, just ensure that all stack tops for config refer to key
|
||||||
|
* states in LR rule.
|
||||||
|
*
|
||||||
|
* To verify we are in the right situation we must first check
|
||||||
|
* closure is at a StarLoopEntryState generated during LR removal.
|
||||||
|
* Then we check that each stack top of context is a return state
|
||||||
|
* from one of these cases:
|
||||||
|
*
|
||||||
|
* 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
|
||||||
|
* 2. expr op expr. The return state is the block end of internal block of (...)*
|
||||||
|
* 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
|
||||||
|
* That state points at block end of internal block of (...)*.
|
||||||
|
* 4. expr '?' expr ':' expr. The return state points at block end,
|
||||||
|
* which points at loop entry state.
|
||||||
|
*
|
||||||
|
* If any is true for each stack top, then closure does not add a
|
||||||
|
* config to the current config set for edge[0], the loop entry branch.
|
||||||
|
*
|
||||||
|
* Conditions fail if any context for the current config is:
|
||||||
|
*
|
||||||
|
* a. empty (we'd fall out of expr to do a global FOLLOW which could
|
||||||
|
* even be to some weird spot in expr) or,
|
||||||
|
* b. lies outside of expr or,
|
||||||
|
* c. lies within expr but at a state not the BlockEndState
|
||||||
|
* generated during LR removal
|
||||||
|
*
|
||||||
|
* Do we need to evaluate predicates ever in closure for this case?
|
||||||
|
*
|
||||||
|
* No. Predicates, including precedence predicates, are only
|
||||||
|
* evaluated when computing a DFA start state. I.e., only before
|
||||||
|
* the lookahead (but not parser) consumes a token.
|
||||||
|
*
|
||||||
|
* There are no epsilon edges allowed in LR rule alt blocks or in
|
||||||
|
* the "primary" part (ID here). If closure is in
|
||||||
|
* StarLoopEntryState any lookahead operation will have consumed a
|
||||||
|
* token as there are no epsilon-paths that lead to
|
||||||
|
* StarLoopEntryState. We do not have to evaluate predicates
|
||||||
|
* therefore if we are in the generated StarLoopEntryState of a LR
|
||||||
|
* rule. Note that when making a prediction starting at that
|
||||||
|
* decision point, decision d=2, compute-start-state performs
|
||||||
|
* closure starting at edges[0], edges[1] emanating from
|
||||||
|
* StarLoopEntryState. That means it is not performing closure on
|
||||||
|
* StarLoopEntryState during compute-start-state.
|
||||||
|
*
|
||||||
|
* How do we know this always gives same prediction answer?
|
||||||
|
*
|
||||||
|
* Without predicates, loop entry and exit paths are ambiguous
|
||||||
|
* upon remaining input +b (in, say, a+b). Either paths lead to
|
||||||
|
* valid parses. Closure can lead to consuming + immediately or by
|
||||||
|
* falling out of this call to expr back into expr and loop back
|
||||||
|
* again to StarLoopEntryState to match +b. In this special case,
|
||||||
|
* we choose the more efficient path, which is to take the bypass
|
||||||
|
* path.
|
||||||
|
*
|
||||||
|
* The lookahead language has not changed because closure chooses
|
||||||
|
* one path over the other. Both paths lead to consuming the same
|
||||||
|
* remaining input during a lookahead operation. If the next token
|
||||||
|
* is an operator, lookahead will enter the choice block with
|
||||||
|
* operators. If it is not, lookahead will exit expr. Same as if
|
||||||
|
* closure had chosen to enter the choice block immediately.
|
||||||
|
*
|
||||||
|
* Closure is examining one config (some loopentrystate, some alt,
|
||||||
|
* context) which means it is considering exactly one alt. Closure
|
||||||
|
* always copies the same alt to any derived configs.
|
||||||
|
*
|
||||||
|
* How do we know this optimization doesn't mess up precedence in
|
||||||
|
* our parse trees?
|
||||||
|
*
|
||||||
|
* Looking through expr from left edge of stat only has to confirm
|
||||||
|
* that an input, say, a+b+c; begins with any valid interpretation
|
||||||
|
* of an expression. The precedence actually doesn't matter when
|
||||||
|
* making a decision in stat seeing through expr. It is only when
|
||||||
|
* parsing rule expr that we must use the precedence to get the
|
||||||
|
* right interpretation and, hence, parse tree.
|
||||||
|
*/
|
||||||
|
bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const;
|
||||||
virtual std::string getRuleName(size_t index);
|
virtual std::string getRuleName(size_t index);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -819,6 +908,7 @@ namespace atn {
|
||||||
Parser* getParser();
|
Parser* getParser();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static bool getLrLoopSetting();
|
||||||
void InitializeInstanceFields();
|
void InitializeInstanceFields();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ bool PredictionContext::isEmpty() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PredictionContext::hasEmptyPath() const {
|
bool PredictionContext::hasEmptyPath() const {
|
||||||
|
// since EMPTY_RETURN_STATE can only appear in the last position, we check last one
|
||||||
return getReturnState(size() - 1) == EMPTY_RETURN_STATE;
|
return getReturnState(size() - 1) == EMPTY_RETURN_STATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -504,11 +505,12 @@ Ref<PredictionContext> PredictionContext::getCachedContext(const Ref<PredictionC
|
||||||
updated = EMPTY;
|
updated = EMPTY;
|
||||||
} else if (parents.size() == 1) {
|
} else if (parents.size() == 1) {
|
||||||
updated = SingletonPredictionContext::create(parents[0], context->getReturnState(0));
|
updated = SingletonPredictionContext::create(parents[0], context->getReturnState(0));
|
||||||
|
contextCache.insert(updated);
|
||||||
} else {
|
} else {
|
||||||
updated = std::make_shared<ArrayPredictionContext>(parents, std::dynamic_pointer_cast<ArrayPredictionContext>(context)->returnStates);
|
updated = std::make_shared<ArrayPredictionContext>(parents, std::dynamic_pointer_cast<ArrayPredictionContext>(context)->returnStates);
|
||||||
|
contextCache.insert(updated);
|
||||||
}
|
}
|
||||||
|
|
||||||
contextCache.insert(updated);
|
|
||||||
visited[updated] = updated;
|
visited[updated] = updated;
|
||||||
visited[context] = updated;
|
visited[context] = updated;
|
||||||
|
|
||||||
|
|
|
@ -78,7 +78,7 @@ namespace atn {
|
||||||
virtual bool operator == (const PredictionContext &o) const = 0;
|
virtual bool operator == (const PredictionContext &o) const = 0;
|
||||||
virtual bool operator != (const PredictionContext &o) const;
|
virtual bool operator != (const PredictionContext &o) const;
|
||||||
|
|
||||||
/// This means only the EMPTY context is in set.
|
/// This means only the EMPTY (wildcard? not sure) context is in set.
|
||||||
virtual bool isEmpty() const;
|
virtual bool isEmpty() const;
|
||||||
virtual bool hasEmptyPath() const;
|
virtual bool hasEmptyPath() const;
|
||||||
virtual size_t hashCode() const;
|
virtual size_t hashCode() const;
|
||||||
|
|
Loading…
Reference in New Issue