From 126112fbbc5a4544bc3f81f05f2bec2bee50a8fc Mon Sep 17 00:00:00 2001 From: Mike Lischke Date: Fri, 24 Feb 2017 08:44:21 +0100 Subject: [PATCH] C++ implementation of issue #1665 and issue #1674 --- runtime/Cpp/runtime/src/ANTLRErrorStrategy.h | 5 +-- runtime/Cpp/runtime/src/Parser.cpp | 19 +++++++--- runtime/Cpp/runtime/src/Parser.h | 35 ++++++++++++++++--- runtime/Cpp/runtime/src/ParserInterpreter.cpp | 5 +-- runtime/Cpp/runtime/src/ParserRuleContext.cpp | 34 ++++++++++-------- runtime/Cpp/runtime/src/ParserRuleContext.h | 12 +++---- runtime/Cpp/runtime/src/tree/TerminalNode.h | 11 ++++++ .../Cpp/runtime/src/tree/TerminalNodeImpl.cpp | 5 +++ .../Cpp/runtime/src/tree/TerminalNodeImpl.h | 1 + 9 files changed, 92 insertions(+), 35 deletions(-) diff --git a/runtime/Cpp/runtime/src/ANTLRErrorStrategy.h b/runtime/Cpp/runtime/src/ANTLRErrorStrategy.h index 393dbcc15..f4367d024 100755 --- a/runtime/Cpp/runtime/src/ANTLRErrorStrategy.h +++ b/runtime/Cpp/runtime/src/ANTLRErrorStrategy.h @@ -93,8 +93,9 @@ namespace antlr4 { /// Tests whether or not {@code recognizer} is in the process of recovering /// from an error. In error recovery mode, adds /// symbols to the parse tree by calling - /// instead of - /// . + /// {@link Parser#createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} instead of + /// {@link Parser#createTerminalNode(ParserRuleContext, Token)}. /// /// the parser instance /// {@code true} if the parser is currently recovering from a parse diff --git a/runtime/Cpp/runtime/src/Parser.cpp b/runtime/Cpp/runtime/src/Parser.cpp index 1215127e4..1f6f752f5 100755 --- a/runtime/Cpp/runtime/src/Parser.cpp +++ b/runtime/Cpp/runtime/src/Parser.cpp @@ -8,6 +8,7 @@ #include "dfa/DFA.h" #include "ParserRuleContext.h" #include "tree/TerminalNode.h" +#include "tree/ErrorNodeImpl.h" #include "Lexer.h" #include "atn/ParserATNSimulator.h" #include "misc/IntervalSet.h" @@ -111,7 +112,7 @@ Token* Parser::match(size_t ttype) { if (_buildParseTrees && t->getTokenIndex() == INVALID_INDEX) { // we must have conjured up a new token during single token insertion // if it's not the current symbol - _ctx->addErrorNode(_tracker, t); + _ctx->addChild(createErrorNode(t)); } } return t; @@ -127,7 +128,7 @@ Token* Parser::matchWildcard() { if (_buildParseTrees && t->getTokenIndex() == INVALID_INDEX) { // we must have conjured up a new token during single token insertion // if it's not the current symbol - _ctx->addErrorNode(_tracker, t); + _ctx->addChild(createErrorNode(t)); } } @@ -293,17 +294,19 @@ Token* Parser::consume() { if (o->getType() != EOF) { getInputStream()->consume(); } + bool hasListener = _parseListeners.size() > 0 && !_parseListeners.empty(); if (_buildParseTrees || hasListener) { if (_errHandler->inErrorRecoveryMode(this)) { - tree::ErrorNode* node = _ctx->addErrorNode(_tracker, o); + tree::ErrorNode *node = createErrorNode(o); + _ctx->addChild(node); if (_parseListeners.size() > 0) { for (auto listener : _parseListeners) { listener->visitErrorNode(node); } } } else { - tree::TerminalNode *node = _ctx->addChild(_tracker, o); + tree::TerminalNode *node = _ctx->addChild(createTerminalNode(o)); if (_parseListeners.size() > 0) { for (auto listener : _parseListeners) { listener->visitTerminal(node); @@ -617,6 +620,14 @@ bool Parser::isTrace() const { return _tracer != nullptr; } +tree::TerminalNode *Parser::createTerminalNode(Token *t) { + return _tracker.createInstance(t); +} + +tree::ErrorNode *Parser::createErrorNode(Token *t) { + return _tracker.createInstance(t); +} + void Parser::InitializeInstanceFields() { _errHandler = std::make_shared(); _precedenceStack.clear(); diff --git a/runtime/Cpp/runtime/src/Parser.h b/runtime/Cpp/runtime/src/Parser.h index 47883c716..5da4e2146 100755 --- a/runtime/Cpp/runtime/src/Parser.h +++ b/runtime/Cpp/runtime/src/Parser.h @@ -54,13 +54,14 @@ namespace antlr4 { /// Match current input symbol against {@code ttype}. If the symbol type /// matches, and are /// called to complete the match process. - ///

+ /// /// If the symbol type does not match, /// is called on the current error /// strategy to attempt recovery. If is /// {@code true} and the token index of the symbol returned by /// is -1, the symbol is added to - /// the parse tree by calling . + /// the parse tree by calling {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)}. /// /// the token type to match /// the matched symbol @@ -258,11 +259,11 @@ namespace antlr4 { /// /// /// If the parser is not in error recovery mode, the consumed symbol is added - /// to the parse tree using , and + /// to the parse tree using , and /// is called on any parse listeners. /// If the parser is in error recovery mode, the consumed symbol is - /// added to the parse tree using - /// , and + /// added to the parse tree using {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} and /// is called on any parse /// listeners. virtual Token* consume(); @@ -376,6 +377,30 @@ namespace antlr4 { tree::ParseTreeTracker& getTreeTracker() { return _tracker; }; + /** How to create a token leaf node associated with a parent. + * Typically, the terminal node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link TerminalNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.6.1 + */ + tree::TerminalNode *createTerminalNode(Token *t); + + /** How to create an error node, given a token, associated with a parent. + * Typically, the error node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link ErrorNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.6.1 + */ + tree::ErrorNode *createErrorNode(Token *t); + protected: /// The ParserRuleContext object for the currently executing rule. /// This is always non-null during the parsing process. diff --git a/runtime/Cpp/runtime/src/ParserInterpreter.cpp b/runtime/Cpp/runtime/src/ParserInterpreter.cpp index cfb3b0f40..91c6fe106 100755 --- a/runtime/Cpp/runtime/src/ParserInterpreter.cpp +++ b/runtime/Cpp/runtime/src/ParserInterpreter.cpp @@ -23,6 +23,7 @@ #include "Vocabulary.h" #include "InputMismatchException.h" #include "CommonToken.h" +#include "tree/ErrorNode.h" #include "support/CPPUtils.h" @@ -288,14 +289,14 @@ void ParserInterpreter::recover(RecognitionException &e) { _errorToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() }, expectedTokenType, tok->getText(), Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, // invalid start/stop tok->getLine(), tok->getCharPositionInLine()); - _ctx->addErrorNode(_tracker, _errorToken.get()); + _ctx->addChild(createErrorNode(_errorToken.get())); } else { // NoViableAlt Token *tok = e.getOffendingToken(); _errorToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() }, Token::INVALID_TYPE, tok->getText(), Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, // invalid start/stop tok->getLine(), tok->getCharPositionInLine()); - _ctx->addErrorNode(_tracker, _errorToken.get()); + _ctx->addChild(createErrorNode(_errorToken.get())); } } } diff --git a/runtime/Cpp/runtime/src/ParserRuleContext.cpp b/runtime/Cpp/runtime/src/ParserRuleContext.cpp index bfa77172f..cdb172c67 100755 --- a/runtime/Cpp/runtime/src/ParserRuleContext.cpp +++ b/runtime/Cpp/runtime/src/ParserRuleContext.cpp @@ -3,7 +3,8 @@ * can be found in the LICENSE.txt file in the project root. */ -#include "tree/ErrorNodeImpl.h" +#include "tree/TerminalNode.h" +#include "tree/ErrorNode.h" #include "misc/Interval.h" #include "Parser.h" #include "Token.h" @@ -34,6 +35,22 @@ void ParserRuleContext::copyFrom(ParserRuleContext *ctx) { this->start = ctx->start; this->stop = ctx->stop; + + // copy any error nodes to alt label node + if (!ctx->children.empty()) { + for (auto child : ctx->children) { + auto errorNode = dynamic_cast(child); + if (errorNode != nullptr) { + errorNode->setParent(this); + children.push_back(errorNode); + } + } + + // Remove the just reparented error nodes from the source context. + ctx->children.erase(std::remove_if(ctx->children.begin(), ctx->children.end(), [this](tree::ParseTree *e) -> bool { + return std::find(children.begin(), children.end(), e) != children.end(); + }), ctx->children.end()); + } } void ParserRuleContext::enterRule(tree::ParseTreeListener * /*listener*/) { @@ -43,6 +60,7 @@ void ParserRuleContext::exitRule(tree::ParseTreeListener * /*listener*/) { } tree::TerminalNode* ParserRuleContext::addChild(tree::TerminalNode *t) { + t->setParent(this); children.push_back(t); return t; } @@ -58,20 +76,6 @@ void ParserRuleContext::removeLastChild() { } } -tree::TerminalNode* ParserRuleContext::addChild(ParseTreeTracker &tracker, Token *matchedToken) { - auto t = tracker.createInstance(matchedToken); - addChild(t); - t->parent = this; - return t; -} - -tree::ErrorNode* ParserRuleContext::addErrorNode(ParseTreeTracker &tracker, Token *badToken) { - auto t = tracker.createInstance(badToken); - addChild(t); - t->parent = this; - return t; -} - tree::TerminalNode* ParserRuleContext::getToken(size_t ttype, size_t i) { if (i >= children.size()) { return nullptr; diff --git a/runtime/Cpp/runtime/src/ParserRuleContext.h b/runtime/Cpp/runtime/src/ParserRuleContext.h index f8e3d856d..dec5ed995 100755 --- a/runtime/Cpp/runtime/src/ParserRuleContext.h +++ b/runtime/Cpp/runtime/src/ParserRuleContext.h @@ -70,7 +70,8 @@ namespace antlr4 { virtual ~ParserRuleContext() {} /** COPY a ctx (I'm deliberately not using copy constructor) to avoid - * confusion with creating node with parent. Does not copy children. + * confusion with creating node with parent. Does not copy children + * (except error leaves). */ virtual void copyFrom(ParserRuleContext *ctx); @@ -80,7 +81,7 @@ namespace antlr4 { virtual void enterRule(tree::ParseTreeListener *listener); virtual void exitRule(tree::ParseTreeListener *listener); - /// Does not set parent link; other add methods do that. + /** Add a token leaf node child and force its parent to be this node. */ tree::TerminalNode* addChild(tree::TerminalNode *t); RuleContext* addChild(RuleContext *ruleInvocation); @@ -89,9 +90,6 @@ namespace antlr4 { /// generic ruleContext object. virtual void removeLastChild(); - virtual tree::TerminalNode* addChild(tree::ParseTreeTracker &tracker, Token *matchedToken); - virtual tree::ErrorNode* addErrorNode(tree::ParseTreeTracker &tracker, Token *badToken); - virtual tree::TerminalNode* getToken(size_t ttype, std::size_t i); virtual std::vector getTokens(size_t ttype); @@ -132,14 +130,14 @@ namespace antlr4 { * Note that the range from start to stop is inclusive, so for rules that do not consume anything * (for example, zero length or error productions) this token may exceed stop. */ - virtual Token*getStart(); + virtual Token *getStart(); /** * Get the final token in this context. * Note that the range from start to stop is inclusive, so for rules that do not consume anything * (for example, zero length or error productions) this token may precede start. */ - virtual Token* getStop(); + virtual Token *getStop(); ///

/// Used for rule context info debugging during parse-time, not so much for ATN debugging diff --git a/runtime/Cpp/runtime/src/tree/TerminalNode.h b/runtime/Cpp/runtime/src/tree/TerminalNode.h index 90415ac3a..98341feb9 100755 --- a/runtime/Cpp/runtime/src/tree/TerminalNode.h +++ b/runtime/Cpp/runtime/src/tree/TerminalNode.h @@ -13,6 +13,17 @@ namespace tree { class ANTLR4CPP_PUBLIC TerminalNode : public ParseTree { public: virtual Token* getSymbol() = 0; + + /** Set the parent for this leaf node. + * + * Technically, this is not backward compatible as it changes + * the interface but no one was able to create custom + * TerminalNodes anyway so I'm adding as it improves internal + * code quality. + * + * @since 4.6.1 + */ + virtual void setParent(RuleContext *parent) = 0; }; } // namespace tree diff --git a/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.cpp b/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.cpp index 32b9bae0b..3e92be4e1 100755 --- a/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.cpp +++ b/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.cpp @@ -5,6 +5,7 @@ #include "misc/Interval.h" #include "Token.h" +#include "RuleContext.h" #include "tree/ParseTreeVisitor.h" #include "tree/TerminalNodeImpl.h" @@ -19,6 +20,10 @@ Token* TerminalNodeImpl::getSymbol() { return symbol; } +void TerminalNodeImpl::setParent(RuleContext *parent) { + this->parent = parent; +} + misc::Interval TerminalNodeImpl::getSourceInterval() { if (symbol == nullptr) { return misc::Interval::INVALID; diff --git a/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.h b/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.h index 1b3116021..905a98bd0 100755 --- a/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.h +++ b/runtime/Cpp/runtime/src/tree/TerminalNodeImpl.h @@ -17,6 +17,7 @@ namespace tree { TerminalNodeImpl(Token *symbol); virtual Token* getSymbol() override; + virtual void setParent(RuleContext *parent) override; virtual misc::Interval getSourceInterval() override; virtual antlrcpp::Any accept(ParseTreeVisitor *visitor) override;