Switching to current ANTLR revision, final part.

This commit is contained in:
Mike Lischke 2016-05-01 12:41:32 +02:00
parent d4ebdfa138
commit bfcb0a71cb
47 changed files with 1364 additions and 516 deletions

View File

@ -1,6 +1,6 @@
# C++ target for ANTLR 4
This fork provides C++ runtime support for C++. See [the canonical antlr4 repository](https://github.com/antlr/antlr4) for in depth detail about how to use Antlr4.
This folder contains the C++ runtime support for ANTLR. See [the canonical antlr4 repository](https://github.com/antlr/antlr4) for in depth detail about how to use ANTLR 4.
## Authors and major contributors

View File

@ -17,14 +17,6 @@
using namespace antlrcpptest;
using namespace org::antlr::v4::runtime;
class A {
public:
static void doit(const A &a) {
size_t i = a.counter;
}
private:
size_t counter;
};
int main(int argc, const char * argv[]) {
ANTLRInputStream input(L"divideŴ and conquer");

View File

@ -57,6 +57,10 @@
278A66FC1C95838E002D667E /* ANTLRErrorListener.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 278A66FA1C95838E002D667E /* ANTLRErrorListener.cpp */; };
27A23EA31CC2A8D60036D8A3 /* TLexer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A23EA11CC2A8D60036D8A3 /* TLexer.cpp */; };
27A23EA41CC2A8D60036D8A3 /* TLexer.h in Headers */ = {isa = PBXBuildFile; fileRef = 27A23EA21CC2A8D60036D8A3 /* TLexer.h */; };
27B4A79A1CD605BB00FCCD3E /* Predicate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27B4A7981CD605BB00FCCD3E /* Predicate.cpp */; };
27B4A79B1CD605BB00FCCD3E /* Predicate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27B4A7981CD605BB00FCCD3E /* Predicate.cpp */; };
27B4A79C1CD605BB00FCCD3E /* Predicate.h in Headers */ = {isa = PBXBuildFile; fileRef = 27B4A7991CD605BB00FCCD3E /* Predicate.h */; };
27B4A79D1CD605BB00FCCD3E /* Predicate.h in Headers */ = {isa = PBXBuildFile; fileRef = 27B4A7991CD605BB00FCCD3E /* Predicate.h */; };
27C62E261CD269C90088721B /* ParseInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27C62E241CD269C90088721B /* ParseInfo.cpp */; };
27C62E271CD269C90088721B /* ParseInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27C62E241CD269C90088721B /* ParseInfo.cpp */; };
27C62E281CD269C90088721B /* ParseInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 27C62E251CD269C90088721B /* ParseInfo.h */; };
@ -649,6 +653,8 @@
278A66FA1C95838E002D667E /* ANTLRErrorListener.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ANTLRErrorListener.cpp; path = ../../runtime/ANTLRErrorListener.cpp; sourceTree = SOURCE_ROOT; };
27A23EA11CC2A8D60036D8A3 /* TLexer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TLexer.cpp; path = ../generated/TLexer.cpp; sourceTree = "<group>"; wrapsLines = 0; };
27A23EA21CC2A8D60036D8A3 /* TLexer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TLexer.h; path = ../generated/TLexer.h; sourceTree = "<group>"; };
27B4A7981CD605BB00FCCD3E /* Predicate.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Predicate.cpp; sourceTree = "<group>"; };
27B4A7991CD605BB00FCCD3E /* Predicate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Predicate.h; sourceTree = "<group>"; };
27C62E241CD269C90088721B /* ParseInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParseInfo.cpp; sourceTree = "<group>"; };
27C62E251CD269C90088721B /* ParseInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParseInfo.h; sourceTree = "<group>"; };
27C62E2A1CD26C780088721B /* ProfilingATNSimulator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ProfilingATNSimulator.cpp; sourceTree = "<group>"; wrapsLines = 0; };
@ -889,7 +895,7 @@
27C669861C9585B80021E494 /* TerminalNodeImpl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TerminalNodeImpl.h; sourceTree = "<group>"; };
27C669871C9585B80021E494 /* Tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tree.cpp; sourceTree = "<group>"; };
27C669881C9585B80021E494 /* Tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Tree.h; sourceTree = "<group>"; };
27C669891C9585B80021E494 /* Trees.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Trees.cpp; sourceTree = "<group>"; };
27C669891C9585B80021E494 /* Trees.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Trees.cpp; sourceTree = "<group>"; wrapsLines = 0; };
27C6698A1C9585B80021E494 /* Trees.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Trees.h; sourceTree = "<group>"; wrapsLines = 0; };
27C669F01C958AB30021E494 /* Chunk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Chunk.cpp; path = pattern/Chunk.cpp; sourceTree = "<group>"; };
27C669F11C958AB30021E494 /* ParseTreeMatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ParseTreeMatch.cpp; path = pattern/ParseTreeMatch.cpp; sourceTree = "<group>"; wrapsLines = 0; };
@ -1136,6 +1142,8 @@
27C6687D1C9584E90021E494 /* misc */ = {
isa = PBXGroup;
children = (
27B4A7981CD605BB00FCCD3E /* Predicate.cpp */,
27B4A7991CD605BB00FCCD3E /* Predicate.h */,
27C668881C9584FA0021E494 /* Interval.cpp */,
27C668891C9584FA0021E494 /* Interval.h */,
27C6688A1C9584FA0021E494 /* IntervalSet.cpp */,
@ -1512,6 +1520,7 @@
27C668631C95846E0021E494 /* WildcardTransition.h in Headers */,
27C667C11C95846E0021E494 /* ATNDeserializationOptions.h in Headers */,
27C667BD1C95846E0021E494 /* ATNConfigSet.h in Headers */,
27B4A79D1CD605BB00FCCD3E /* Predicate.h in Headers */,
27C667F51C95846E0021E494 /* EpsilonTransition.h in Headers */,
27C667F91C95846E0021E494 /* LexerATNConfig.h in Headers */,
27C669031C9585230021E494 /* BitSet.h in Headers */,
@ -1596,6 +1605,7 @@
27C6683E1C95846E0021E494 /* RuleTransition.h in Headers */,
27C667BC1C95846E0021E494 /* ATNConfigSet.h in Headers */,
275ECC4E1CCCD95B00E79E2A /* LexerAction.h in Headers */,
27B4A79C1CD605BB00FCCD3E /* Predicate.h in Headers */,
275DB3E91CCD23C000D8C543 /* LexerModeAction.h in Headers */,
27C666F21C9584050021E494 /* InterpreterRuleContext.h in Headers */,
27C667F41C95846E0021E494 /* EpsilonTransition.h in Headers */,
@ -1892,6 +1902,7 @@
27C62E3F1CD272480088721B /* DecisionEventInfo.cpp in Sources */,
27C667311C9584050021E494 /* TokenSource.cpp in Sources */,
27C668CB1C9584FA0021E494 /* MurmurHash.cpp in Sources */,
27B4A79B1CD605BB00FCCD3E /* Predicate.cpp in Sources */,
27C669A81C9585B80021E494 /* ParseTreeWalker.cpp in Sources */,
27C62E511CD275C50088721B /* ContextSensitivityInfo.cpp in Sources */,
27C666ED1C9584050021E494 /* InputMismatchException.cpp in Sources */,
@ -2123,6 +2134,7 @@
27C66A0C1C958AB30021E494 /* TextChunk.cpp in Sources */,
27C6682C1C95846E0021E494 /* PredictionMode.cpp in Sources */,
27C667C61C95846E0021E494 /* ATNSerializer.cpp in Sources */,
27B4A79A1CD605BB00FCCD3E /* Predicate.cpp in Sources */,
27C668341C95846E0021E494 /* RuleStartState.cpp in Sources */,
27C668481C95846E0021E494 /* SingletonPredictionContext.cpp in Sources */,
27C668751C9584B60021E494 /* DFAState.cpp in Sources */,

View File

@ -0,0 +1,12 @@
# Demo application for the ANTLR 4 C++ target
This demo app shows how to build the ANTLR runtime both as dynamic and static library and how to use a parser generated from a simple demo grammar.
A few steps are necessary to get this to work:
- Download the current ANTLR jar and place it in this folder.
- Open the generation script for your platform (generate.cmd for Windows, generate.sh for *nix/OSX) and update the LOCATION var to the actual name of the jar you downloaded.
- Run the generation script. This will generate a test parser + lexer, along with listener + visitor classes in a subfolder named "generated". This is where the demo application looks for these files.
- Open the project in the folder that matches your system.
- Compile and run.

View File

@ -9,8 +9,8 @@ set -o errexit
# There are 2 ways of running the ANTLR generator here.
# 1) Running from jar. Use the given jar (or replace it by another one you built or downloaded) for generation.
#LOCATION=antlr-4.1.1-dev-complete.jar
#java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
LOCATION=antlr-4.1.1-dev-complete.jar
java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
#java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest -XdbgST TLexer.g4 TParser.g4
#java -jar $LOCATION -Dlanguage=Java -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
@ -21,9 +21,9 @@ set -o errexit
# Furthermore is assumed that the antlr3 folder is located side-by-side with the antlr4 folder. Adjust CLASSPATH if not.
# This approach is especially useful if you are working on a target stg file, as it doesn't require to regenerate the
# antlr jar over and over again.
CLASSPATH=../../../tool/resources/:ST-4.0.8.jar:../../../tool/target/classes:../../../runtime/Java/target/classes:../../../../antlr3/runtime/Java/target/classes
#CLASSPATH=../../../tool/resources/:ST-4.0.8.jar:../../../tool/target/classes:../../../runtime/Java/target/classes:../../../../antlr3/runtime/Java/target/classes
java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
#java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
#java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest -XdbgST TLexer.g4 TParser.g4
#java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Java -listener -visitor -o generated/ TLexer.g4 TParser.g4

View File

@ -80,86 +80,113 @@ namespace runtime {
virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
const std::wstring &msg, std::exception_ptr e) = 0;
/// <summary>
/// This method is called by the parser when a full-context prediction
/// results in an ambiguity.
/// <p/>
/// When {@code exact} is {@code true}, <em>all</em> of the alternatives in
/// {@code ambigAlts} are viable, i.e. this is reporting an exact ambiguity.
/// When {@code exact} is {@code false}, <em>at least two</em> of the
/// alternatives in {@code ambigAlts} are viable for the current input, but
/// the prediction algorithm terminated as soon as it determined that at
/// least the <em>minimum</em> alternative in {@code ambigAlts} is viable.
/// <p/>
/// When the <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> prediction mode
/// is used, the parser is required to identify exact ambiguities so
/// {@code exact} will always be {@code true}.
/// <p/>
/// This method is not used by lexers.
/// </summary>
/// <param name="recognizer"> the parser instance </param>
/// <param name="dfa"> the DFA for the current decision </param>
/// <param name="startIndex"> the input index where the decision started </param>
/// <param name="stopIndex"> the input input where the ambiguity is reported </param>
/// <param name="exact"> {@code true} if the ambiguity is exactly known, otherwise
/// {@code false}. This is always {@code true} when
/// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used. </param>
/// <param name="ambigAlts"> the potentially ambiguous alternatives </param>
/// <param name="configs"> the ATN configuration set where the ambiguity was
/// determined </param>
/**
* This method is called by the parser when a full-context prediction
* results in an ambiguity.
*
* <p>Each full-context prediction which does not result in a syntax error
* will call either {@link #reportContextSensitivity} or
* {@link #reportAmbiguity}.</p>
*
* <p>When {@code ambigAlts} is not null, it contains the set of potentially
* viable alternatives identified by the prediction algorithm. When
* {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the
* represented alternatives from the {@code configs} argument.</p>
*
* <p>When {@code exact} is {@code true}, <em>all</em> of the potentially
* viable alternatives are truly viable, i.e. this is reporting an exact
* ambiguity. When {@code exact} is {@code false}, <em>at least two</em> of
* the potentially viable alternatives are viable for the current input, but
* the prediction algorithm terminated as soon as it determined that at
* least the <em>minimum</em> potentially viable alternative is truly
* viable.</p>
*
* <p>When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction
* mode is used, the parser is required to identify exact ambiguities so
* {@code exact} will always be {@code true}.</p>
*
* <p>This method is not used by lexers.</p>
*
* @param recognizer the parser instance
* @param dfa the DFA for the current decision
* @param startIndex the input index where the decision started
* @param stopIndex the input input where the ambiguity was identified
* @param exact {@code true} if the ambiguity is exactly known, otherwise
* {@code false}. This is always {@code true} when
* {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used.
* @param ambigAlts the potentially ambiguous alternatives, or {@code null}
* to indicate that the potentially ambiguous alternatives are the complete
* set of represented alternatives in {@code configs}
* @param configs the ATN configuration set where the ambiguity was
* identified
*/
virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact,
const antlrcpp::BitSet &ambigAlts, Ref<atn::ATNConfigSet> configs) = 0;
/// <summary>
/// This method is called when an SLL conflict occurs and the parser is about
/// to use the full context information to make an LL decision.
/// <p/>
/// If one or more configurations in {@code configs} contains a semantic
/// predicate, the predicates are evaluated before this method is called. The
/// subset of alternatives which are still viable after predicates are
/// evaluated is reported in {@code conflictingAlts}.
/// <p/>
/// This method is not used by lexers.
/// </summary>
/// <param name="recognizer"> the parser instance </param>
/// <param name="dfa"> the DFA for the current decision </param>
/// <param name="startIndex"> the input index where the decision started </param>
/// <param name="stopIndex"> the input index where the SLL conflict occurred </param>
/// <param name="conflictingAlts"> The specific conflicting alternatives. If this is
/// {@code null}, the conflicting alternatives are all alternatives
/// represented in {@code configs}. </param>
/// <param name="configs"> the ATN configuration set where the SLL conflict was
/// detected </param>
/**
* This method is called when an SLL conflict occurs and the parser is about
* to use the full context information to make an LL decision.
*
* <p>If one or more configurations in {@code configs} contains a semantic
* predicate, the predicates are evaluated before this method is called. The
* subset of alternatives which are still viable after predicates are
* evaluated is reported in {@code conflictingAlts}.</p>
*
* <p>This method is not used by lexers.</p>
*
* @param recognizer the parser instance
* @param dfa the DFA for the current decision
* @param startIndex the input index where the decision started
* @param stopIndex the input index where the SLL conflict occurred
* @param conflictingAlts The specific conflicting alternatives. If this is
* {@code null}, the conflicting alternatives are all alternatives
* represented in {@code configs}. At the moment, conflictingAlts is non-null
* (for the reference implementation, but Sam's optimized version can see this
* as null).
* @param configs the ATN configuration set where the SLL conflict was
* detected
*/
virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex,
const antlrcpp::BitSet &conflictingAlts, Ref<atn::ATNConfigSet> configs) = 0;
/// <summary>
/// This method is called by the parser when a full-context prediction has a
/// unique result.
/// <p/>
/// For prediction implementations that only evaluate full-context
/// predictions when an SLL conflict is found (including the default
/// <seealso cref="ParserATNSimulator"/> implementation), this method reports cases
/// where SLL conflicts were resolved to unique full-context predictions,
/// i.e. the decision was context-sensitive. This report does not necessarily
/// indicate a problem, and it may appear even in completely unambiguous
/// grammars.
/// <p/>
/// {@code configs} may have more than one represented alternative if the
/// full-context prediction algorithm does not evaluate predicates before
/// beginning the full-context prediction. In all cases, the final prediction
/// is passed as the {@code prediction} argument.
/// <p/>
/// This method is not used by lexers.
/// </summary>
/// <param name="recognizer"> the parser instance </param>
/// <param name="dfa"> the DFA for the current decision </param>
/// <param name="startIndex"> the input index where the decision started </param>
/// <param name="stopIndex"> the input index where the context sensitivity was
/// finally determined </param>
/// <param name="prediction"> the unambiguous result of the full-context prediction </param>
/// <param name="configs"> the ATN configuration set where the unambiguous prediction
/// was determined </param>
/**
* This method is called by the parser when a full-context prediction has a
* unique result.
*
* <p>Each full-context prediction which does not result in a syntax error
* will call either {@link #reportContextSensitivity} or
* {@link #reportAmbiguity}.</p>
*
* <p>For prediction implementations that only evaluate full-context
* predictions when an SLL conflict is found (including the default
* {@link ParserATNSimulator} implementation), this method reports cases
* where SLL conflicts were resolved to unique full-context predictions,
* i.e. the decision was context-sensitive. This report does not necessarily
* indicate a problem, and it may appear even in completely unambiguous
* grammars.</p>
*
* <p>{@code configs} may have more than one represented alternative if the
* full-context prediction algorithm does not evaluate predicates before
* beginning the full-context prediction. In all cases, the final prediction
* is passed as the {@code prediction} argument.</p>
*
* <p>Note that the definition of "context sensitivity" in this method
* differs from the concept in {@link DecisionInfo#contextSensitivities}.
* This method reports all instances where an SLL conflict occurred but LL
* parsing produced a unique result, whether or not that unique result
* matches the minimum alternative in the SLL conflicting set.</p>
*
* <p>This method is not used by lexers.</p>
*
* @param recognizer the parser instance
* @param dfa the DFA for the current decision
* @param startIndex the input index where the decision started
* @param stopIndex the input index where the context sensitivity was
* finally determined
* @param prediction the unambiguous result of the full-context prediction
* @param configs the ATN configuration set where the unambiguous prediction
* was determined
*/
virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex,
int prediction, Ref<atn::ATNConfigSet> configs) = 0;
};

View File

@ -65,20 +65,24 @@ namespace runtime {
virtual void reset(Parser *recognizer) = 0;
/// <summary>
/// This method is called when an unexpected symbol is encountered during an
/// inline match operation, such as <seealso cref="Parser#match"/>. If the error
/// strategy successfully recovers from the match failure, this method
/// returns the <seealso cref="Token"/> instance which should be treated as the
/// successful result of the match.
/// <p/>
/// Note that the calling code will not report an error if this method
/// returns successfully. The error strategy implementation is responsible
/// for calling <seealso cref="Parser#notifyErrorListeners"/> as appropriate.
/// </summary>
/// <param name="recognizer"> the parser instance </param>
/// <exception cref="RecognitionException"> if the error strategy was not able to
/// recover from the unexpected input symbol </exception>
/**
* This method is called when an unexpected symbol is encountered during an
* inline match operation, such as {@link Parser#match}. If the error
* strategy successfully recovers from the match failure, this method
* returns the {@link Token} instance which should be treated as the
* successful result of the match.
*
* <p>This method handles the consumption of any tokens - the caller should
* <b>not</b> call {@link Parser#consume} after a successful recovery.</p>
*
* <p>Note that the calling code will not report an error if this method
* returns successfully. The error strategy implementation is responsible
* for calling {@link Parser#notifyErrorListeners} as appropriate.</p>
*
* @param recognizer the parser instance
* @throws RecognitionException if the error strategy was not able to
* recover from the unexpected input symbol
*/
virtual Ref<Token> recoverInline(Parser *recognizer) = 0;
/// <summary>

View File

@ -29,9 +29,10 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <algorithm>
#include "Exceptions.h"
#include "Interval.h"
#include "IntStream.h"
#include "Arrays.h"
#include "CPPUtils.h"
@ -139,8 +140,9 @@ void ANTLRInputStream::seek(size_t index) {
p = index; // just jump; don't update stream state (line, ...)
return;
}
// seek forward, consume until p hits index
while (p < index && index < data.size()) {
// seek forward, consume until p hits index or n (whichever comes first)
index = std::min(index, data.size());
while (p < index) {
consume();
}
}
@ -162,6 +164,9 @@ std::wstring ANTLRInputStream::getText(const Interval &interval) {
}
std::string ANTLRInputStream::getSourceName() const {
if (name.empty()) {
return IntStream::UNKNOWN_SOURCE_NAME;
}
return name;
}

View File

@ -38,7 +38,34 @@ namespace antlr {
namespace v4 {
namespace runtime {
/// Bail out of parser at first syntax error. Use myparser.setErrorHandler(..) to set a different strategy.
/**
* This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
* by immediately canceling the parse operation with a
* {@link ParseCancellationException}. The implementation ensures that the
* {@link ParserRuleContext#exception} field is set for all parse tree nodes
* that were not completed prior to encountering the error.
*
* <p>
* This error strategy is useful in the following scenarios.</p>
*
* <ul>
* <li><strong>Two-stage parsing:</strong> This error strategy allows the first
* stage of two-stage parsing to immediately terminate if an error is
* encountered, and immediately fall back to the second stage. In addition to
* avoiding wasted work by attempting to recover from errors here, the empty
* implementation of {@link BailErrorStrategy#sync} improves the performance of
* the first stage.</li>
* <li><strong>Silent validation:</strong> When syntax errors are not being
* reported or logged, and the parse result is simply ignored if errors occur,
* the {@link BailErrorStrategy} avoids wasting work on recovering from errors
* when the result will be ignored either way.</li>
* </ul>
*
* <p>
* {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
*
* @see Parser#setErrorHandler(ANTLRErrorStrategy)
*/
class BailErrorStrategy : public DefaultErrorStrategy {
/// <summary>
/// Instead of recovering from exception {@code e}, re-throw it wrapped

View File

@ -42,6 +42,11 @@ namespace antlr {
namespace v4 {
namespace runtime {
/**
* Provides an empty default implementation of {@link ANTLRErrorListener}. The
* default implementation of each method does nothing, but can be overridden as
* necessary.
*/
class BaseErrorListener : public ANTLRErrorListener {
virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,

View File

@ -75,7 +75,22 @@ size_t BufferedTokenStream::size() {
}
void BufferedTokenStream::consume() {
if (LA(1) == EOF) {
bool skipEofCheck = false;
if (!_needSetup) {
if (_fetchedEOF) {
// the last token in tokens is EOF. skip check if p indexes any
// fetched token except the last.
skipEofCheck = _p < _tokens.size() - 1;
} else {
// no EOF token in tokens. skip check if p indexes a fetched token.
skipEofCheck = _p < _tokens.size();
}
} else {
// not yet initialized
skipEofCheck = false;
}
if (!skipEofCheck && LA(1) == EOF) {
throw IllegalStateException("cannot consume EOF");
}
@ -246,13 +261,13 @@ std::vector<Ref<Token>> BufferedTokenStream::getTokens(int start, int stop, int
ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, int channel) {
sync(i);
if (i >= size()) {
return -1;
return size() - 1;
}
Ref<Token> token = _tokens[i];
while (token->getChannel() != channel) {
if (token->getType() == EOF) {
return -1;
return i;
}
i++;
sync(i);
@ -261,15 +276,24 @@ ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, int channel) {
return i;
}
ssize_t BufferedTokenStream::previousTokenOnChannel(ssize_t i, int channel) const {
do {
if (_tokens[(size_t)i]->getChannel() == channel)
ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, int channel) {
sync(i);
if (i >= size()) {
// the EOF token is on every channel
return size() - 1;
}
while (true) {
Ref<Token> token = _tokens[i];
if (token->getType() == EOF || token->getChannel() == channel) {
return i;
}
if (i == 0)
return -1;
return i;
i--;
} while (true);
return -1;
}
return i;
}
std::vector<Ref<Token>> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, int channel) {
@ -301,7 +325,12 @@ std::vector<Ref<Token>> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenI
throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
}
ssize_t prevOnChannel = previousTokenOnChannel((ssize_t)tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
if (tokenIndex == 0) {
// Obviously no tokens can appear before the first token.
return { };
}
ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
if (prevOnChannel == (ssize_t)tokenIndex - 1) {
return { };
}
@ -334,6 +363,10 @@ std::vector<Ref<Token>> BufferedTokenStream::filterForChannel(size_t from, size_
return hidden;
}
bool BufferedTokenStream::isInitialized() const {
return !_needSetup;
}
/**
* Get the text of all tokens in this buffer.
*/

View File

@ -38,16 +38,18 @@ namespace antlr {
namespace v4 {
namespace runtime {
/// Buffer all input tokens but do on-demand fetching of new tokens from lexer.
/// Useful when the parser or lexer has to set context/mode info before proper
/// lexing of future tokens. The ST template parser needs this, for example,
/// because it has to constantly flip back and forth between inside/output
/// templates. E.g., <names:{hi, <it>}> has to parse names as part of an
/// expression but "hi, <it>" as a nested template.
///
/// You can't use this stream if you pass whitespace or other off-channel tokens
/// to the parser. The stream can't ignore off-channel tokens.
/// (UnbufferedTokenStream is the same way.) Use CommonTokenStream.
/**
* This implementation of {@link TokenStream} loads tokens from a
* {@link TokenSource} on-demand, and places the tokens in a buffer to provide
* access to any previous token by index.
*
* <p>
* This token stream ignores the value of {@link Token#getChannel}. If your
* parser requires the token stream filter tokens to only those on a particular
* channel, such as {@link Token#DEFAULT_CHANNEL} or
* {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
* {@link CommonTokenStream}.</p>
*/
class BufferedTokenStream : public TokenStream {
public:
BufferedTokenStream(TokenSource *tokenSource);
@ -120,30 +122,46 @@ namespace runtime {
virtual void fill();
protected:
/**
* The {@link TokenSource} from which tokens for this stream are fetched.
*/
TokenSource *_tokenSource;
/// Record every single token pulled from the source so we can reproduce
/// chunks of it later. This list captures everything so we can access
/// complete input text.
// ml: we own the tokens produced by the token factory.
/**
* A collection of all tokens fetched from the token source. The list is
* considered a complete view of the input once {@link #fetchedEOF} is set
* to {@code true}.
*/
std::vector<Ref<Token>> _tokens;
/// <summary>
/// The index into <seealso cref="#tokens"/> of the current token (next token to
/// consume). <seealso cref="#tokens"/>{@code [}<seealso cref="#p"/>{@code ]} should be
/// <seealso cref="#LT LT(1)"/>. <seealso cref="#p"/>{@code =-1} indicates need to initialize
/// with first token. The constructor doesn't get a token. First call to
/// <seealso cref="#LT LT(1)"/> or whatever gets the first token and sets
/// <seealso cref="#p"/>{@code =0;}.
/// </summary>
/**
* The index into {@link #tokens} of the current token (next token to
* {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
* {@link #LT LT(1)}.
*
* <p>This field is set to -1 when the stream is first constructed or when
* {@link #setTokenSource} is called, indicating that the first token has
* not yet been fetched from the token source. For additional information,
* see the documentation of {@link IntStream} for a description of
* Initializing Methods.</p>
*/
// ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead.
// Use bool isInitialized() to find out if this stream has started reading.
size_t _p;
/// <summary>
/// Set to {@code true} when the EOF token is fetched. Do not continue fetching
/// tokens after that point, or multiple EOF tokens could end up in the
/// <seealso cref="#tokens"/> array.
/// </summary>
/// <seealso cref= #fetch </seealso>
/**
* Indicates whether the {@link Token#EOF} token has been fetched from
* {@link #tokenSource} and added to {@link #tokens}. This field improves
* performance for the following cases:
*
* <ul>
* <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
* consuming the EOF symbol is optimized by checking the values of
* {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
* <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
* {@link #tokens} is trivial with this field.</li>
* <ul>
*/
bool _fetchedEOF;
/// <summary>
@ -177,17 +195,30 @@ namespace runtime {
void lazyInit();
virtual void setup();
/// Given a starting index, return the index of the next token on channel.
/// Return i if tokens[i] is on channel. Return -1 if there are no tokens
/// on channel between i and EOF.
/**
* Given a starting index, return the index of the next token on channel.
* Return {@code i} if {@code tokens[i]} is on channel. Return the index of
* the EOF token if there are no tokens on channel between {@code i} and
* EOF.
*/
virtual ssize_t nextTokenOnChannel(size_t i, int channel);
/// Given a starting index, return the index of the previous token on channel.
/// Return i if tokens[i] is on channel. Return -1 if there are no tokens
/// on channel between i and 0.
virtual ssize_t previousTokenOnChannel(ssize_t i, int channel) const;
/**
* Given a starting index, return the index of the previous token on
* channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1
* if there are no tokens on channel between {@code i} and 0.
*
* <p>
* If {@code i} specifies an index at or after the EOF token, the EOF token
* index is returned. This is due to the fact that the EOF token is treated
* as though it were on every channel.</p>
*/
virtual ssize_t previousTokenOnChannel(size_t i, int channel);
virtual std::vector<Ref<Token>> filterForChannel(size_t from, size_t to, int channel);
bool isInitialized() const;
private:
bool _needSetup;
void InitializeInstanceFields();

View File

@ -70,7 +70,6 @@ CommonToken::CommonToken(int type, const std::wstring &text) {
CommonToken::CommonToken(Token *oldToken) {
InitializeInstanceFields();
_text = oldToken->getText();
_type = oldToken->getType();
_line = oldToken->getLine();
_index = oldToken->getTokenIndex();
@ -79,9 +78,11 @@ CommonToken::CommonToken(Token *oldToken) {
_start = oldToken->getStartIndex();
_stop = oldToken->getStopIndex();
if (is<CommonToken*>(oldToken)) {
_source = (static_cast<CommonToken*>(oldToken))->_source;
if (is<CommonToken *>(oldToken)) {
_text = (static_cast<CommonToken *>(oldToken))->_text;
_source = (static_cast<CommonToken *>(oldToken))->_source;
} else {
_text = oldToken->getText();
_source = { oldToken->getTokenSource(), oldToken->getInputStream() };
}
}
@ -179,4 +180,5 @@ void CommonToken::InitializeInstanceFields() {
_index = -1;
_start = 0;
_stop = 0;
_source = EMPTY_SOURCE;
}

View File

@ -40,50 +40,121 @@ namespace runtime {
class CommonToken : public WritableToken {
protected:
/**
* An empty {@link Pair} which is used as the default value of
* {@link #source} for tokens that do not have a source.
*/
static const std::pair<TokenSource*, CharStream*> EMPTY_SOURCE;
/**
* This is the backing field for {@link #getType} and {@link #setType}.
*/
int _type;
int _line;
int _charPositionInLine; // set to invalid position
int _channel;
std::pair<TokenSource*, CharStream*> _source; // Pure references, usually from statically allocated classes.
/// We need to be able to change the text once in a while. If
/// this is non-empty, then getText should return this. Note that
/// start/stop are not affected by changing this.
///
// TO_DO: can store these in map in token stream rather than as field here
/**
* This is the backing field for {@link #getLine} and {@link #setLine}.
*/
int _line;
/**
* This is the backing field for {@link #getCharPositionInLine} and
* {@link #setCharPositionInLine}.
*/
int _charPositionInLine; // set to invalid position
/**
* This is the backing field for {@link #getChannel} and
* {@link #setChannel}.
*/
int _channel;
/**
* This is the backing field for {@link #getTokenSource} and
* {@link #getInputStream}.
*
* <p>
* These properties share a field to reduce the memory footprint of
* {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from
* the same source and input stream share a reference to the same
* {@link Pair} containing these values.</p>
*/
std::pair<TokenSource*, CharStream*> _source; // ml: pure references, usually from statically allocated classes.
/**
* This is the backing field for {@link #getText} when the token text is
* explicitly set in the constructor or via {@link #setText}.
*
* @see #getText()
*/
std::wstring _text;
/// <summary>
/// What token number is this from 0..n-1 tokens; < 0 implies invalid index </summary>
/**
* This is the backing field for {@link #getTokenIndex} and
* {@link #setTokenIndex}.
*/
int _index;
/// <summary>
/// The char position into the input buffer where this token starts </summary>
/**
* This is the backing field for {@link #getStartIndex} and
* {@link #setStartIndex}.
*/
int _start;
/// <summary>
/// The char position into the input buffer where this token stops </summary>
/**
* This is the backing field for {@link #getStopIndex} and
* {@link #setStopIndex}.
*/
int _stop;
public:
/**
* Constructs a new {@link CommonToken} with the specified token type.
*
* @param type The token type.
*/
CommonToken(int type);
CommonToken(std::pair<TokenSource*, CharStream*> source, int type, int channel, int start, int stop);
/**
* Constructs a new {@link CommonToken} with the specified token type and
* text.
*
* @param type The token type.
* @param text The text of the token.
*/
CommonToken(int type, const std::wstring &text);
/**
* Constructs a new {@link CommonToken} as a copy of another {@link Token}.
*
* <p>
* If {@code oldToken} is also a {@link CommonToken} instance, the newly
* constructed token will share a reference to the {@link #text} field and
* the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will
* be assigned the result of calling {@link #getText}, and {@link #source}
* will be constructed from the result of {@link Token#getTokenSource} and
* {@link Token#getInputStream}.</p>
*
* @param oldToken The token to copy.
*/
CommonToken(Token *oldToken);
virtual int getType() const override;
virtual void setLine(int line) override;
/**
* Explicitly set the text for this token. If {code text} is not
* {@code null}, then {@link #getText} will return this value rather than
* extracting the text from the input.
*
* @param text The explicit text of the token, or {@code null} if the text
* should be obtained from the input along with the start and stop indexes
* of the token.
*/
virtual void setText(const std::wstring &text) override;
virtual std::wstring getText() override;
/// <summary>
/// Override the text for this token. getText() will return this text
/// rather than pulling from the buffer. Note that this does not mean
/// that start/stop indexes are not valid. It means that that input
/// was converted to a new string in the token object.
/// </summary>
virtual void setText(const std::wstring &text) override;
virtual void setLine(int line) override;
virtual int getLine() override;
virtual int getCharPositionInLine() override;

View File

@ -38,22 +38,60 @@ namespace antlr {
namespace v4 {
namespace runtime {
/**
* This default implementation of {@link TokenFactory} creates
* {@link CommonToken} objects.
*/
class CommonTokenFactory : public TokenFactory<CommonToken> {
public:
/**
* The default {@link CommonTokenFactory} instance.
*
* <p>
* This token factory does not explicitly copy token text when constructing
* tokens.</p>
*/
static const Ref<TokenFactory<CommonToken>> DEFAULT;
/// <summary>
/// Copy text for token out of input char stream. Useful when input
/// stream is unbuffered. </summary>
/// <seealso cref= UnbufferedCharStream </seealso>
protected:
/**
* Indicates whether {@link CommonToken#setText} should be called after
* constructing tokens to explicitly set the text. This is useful for cases
* where the input stream might not be able to provide arbitrary substrings
* of text from the input after the lexer creates a token (e.g. the
* implementation of {@link CharStream#getText} in
* {@link UnbufferedCharStream} throws an
* {@link UnsupportedOperationException}). Explicitly setting the token text
* allows {@link Token#getText} to be called at any time regardless of the
* input stream implementation.
*
* <p>
* The default value is {@code false} to avoid the performance and memory
* overhead of copying text for every token unless explicitly requested.</p>
*/
const bool copyText;
public:
/// Create factory and indicate whether or not the factory copy
/// text out of the char stream.
/**
* Constructs a {@link CommonTokenFactory} with the specified value for
* {@link #copyText}.
*
* <p>
* When {@code copyText} is {@code false}, the {@link #DEFAULT} instance
* should be used instead of constructing a new instance.</p>
*
* @param copyText The value for {@link #copyText}.
*/
CommonTokenFactory(bool copyText);
/**
* Constructs a {@link CommonTokenFactory} with {@link #copyText} set to
* {@code false}.
*
* <p>
* The {@link #DEFAULT} instance should be used instead of calling this
* directly.</p>
*/
CommonTokenFactory();
virtual Ref<CommonToken> create(std::pair<TokenSource*, CharStream*> source, int type,

View File

@ -38,31 +38,60 @@ namespace antlr {
namespace v4 {
namespace runtime {
/// <summary>
/// The most common stream of tokens where every token is buffered up
/// and tokens are filtered for a certain channel (the parser will only
/// see these tokens).
///
/// Even though it buffers all of the tokens, this token stream pulls tokens
/// from the tokens source on demand. In other words, until you ask for a
/// token using consume(), LT(), etc. the stream does not pull from the lexer.
///
/// The only difference between this stream and <seealso cref="BufferedTokenStream"/> superclass
/// is that this stream knows how to ignore off channel tokens. There may be
/// a performance advantage to using the superclass if you don't pass
/// whitespace and comments etc. to the parser on a hidden channel (i.e.,
/// you set {@code $channel} instead of calling {@code skip()} in lexer rules.)
/// </summary>
/// <seealso cref= UnbufferedTokenStream </seealso>
/// <seealso cref= BufferedTokenStream </seealso>
/**
* This class extends {@link BufferedTokenStream} with functionality to filter
* token streams to tokens on a particular channel (tokens where
* {@link Token#getChannel} returns a particular value).
*
* <p>
* This token stream provides access to all tokens by index or when calling
* methods like {@link #getText}. The channel filtering is only used for code
* accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and
* {@link #LB}.</p>
*
* <p>
* By default, tokens are placed on the default channel
* ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the
* {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to
* call {@link Lexer#setChannel}.
* </p>
*
* <p>
* Note: lexer rules which use the {@code ->skip} lexer command or call
* {@link Lexer#skip} do not produce tokens at all, so input text matched by
* such a rule will not be available as part of the token stream, regardless of
* channel.</p>
*/
class CommonTokenStream : public BufferedTokenStream {
/// <summary>
/// Skip tokens on any channel but this one; this is how we skip whitespace... </summary>
protected:
/**
* Specifies the channel to use for filtering tokens.
*
* <p>
* The default value is {@link Token#DEFAULT_CHANNEL}, which matches the
* default channel assigned to tokens created by the lexer.</p>
*/
int channel;
public:
/**
* Constructs a new {@link CommonTokenStream} using the specified token
* source and the default token channel ({@link Token#DEFAULT_CHANNEL}).
*
* @param tokenSource The token source.
*/
CommonTokenStream(TokenSource *tokenSource);
/**
* Constructs a new {@link CommonTokenStream} using the specified token
* source and filtering tokens to the specified channel. Only tokens whose
* {@link Token#getChannel} matches {@code channel} or have the
* {@link Token#getType} equal to {@link Token#EOF} will be returned by the
* token stream lookahead methods.
*
* @param tokenSource The token source.
* @param channel The channel to use for filtering tokens.
*/
CommonTokenStream(TokenSource *tokenSource, int channel);
protected:

View File

@ -40,8 +40,23 @@ namespace runtime {
class ConsoleErrorListener : public BaseErrorListener {
public:
/**
* Provides a default instance of {@link ConsoleErrorListener}.
*/
static ConsoleErrorListener INSTANCE;
/**
* {@inheritDoc}
*
* <p>
* This implementation prints messages to {@link System#err} containing the
* values of {@code line}, {@code charPositionInLine}, and {@code msg} using
* the following format.</p>
*
* <pre>
* line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
* </pre>
*/
virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
const std::wstring &msg, std::exception_ptr e) override;
};

View File

@ -41,6 +41,7 @@
#include "Parser.h"
#include "Strings.h"
#include "CommonToken.h"
#include "Vocabulary.h"
#include "DefaultErrorStrategy.h"
@ -174,7 +175,7 @@ void DefaultErrorStrategy::reportNoViableAlternative(Parser *recognizer, const N
void DefaultErrorStrategy::reportInputMismatch(Parser *recognizer, const InputMismatchException &e) {
std::wstring msg = std::wstring(L"mismatched input ") + getTokenErrorDisplay(e.getOffendingToken()) +
std::wstring(L" expecting ") + e.getExpectedTokens().toString(recognizer->getTokenNames());
std::wstring(L" expecting ") + e.getExpectedTokens().toString(recognizer->getVocabulary());
recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e));
}
@ -195,7 +196,8 @@ void DefaultErrorStrategy::reportUnwantedToken(Parser *recognizer) {
std::wstring tokenName = getTokenErrorDisplay(t);
misc::IntervalSet expecting = getExpectedTokens(recognizer);
std::wstring msg = std::wstring(L"extraneous input ") + tokenName + std::wstring(L" expecting ") + expecting.toString(recognizer->getTokenNames());
std::wstring msg = std::wstring(L"extraneous input ") + tokenName + std::wstring(L" expecting ") +
expecting.toString(recognizer->getVocabulary());
recognizer->notifyErrorListeners(t, msg, nullptr);
}
@ -208,7 +210,7 @@ void DefaultErrorStrategy::reportMissingToken(Parser *recognizer) {
Ref<Token> t = recognizer->getCurrentToken();
misc::IntervalSet expecting = getExpectedTokens(recognizer);
std::wstring msg = std::wstring(L"missing ") + expecting.toString(recognizer->getTokenNames()) + std::wstring(L" at ") + getTokenErrorDisplay(t);
std::wstring msg = L"missing " + expecting.toString(recognizer->getVocabulary()) + L" at " + getTokenErrorDisplay(t);
recognizer->notifyErrorListeners(t, msg, nullptr);
}
@ -271,7 +273,7 @@ Ref<Token> DefaultErrorStrategy::getMissingSymbol(Parser *recognizer) {
if (expectedTokenType == EOF) {
tokenText = L"<missing EOF>";
} else {
tokenText = std::wstring(L"<missing ") + recognizer->getTokenNames()[(size_t)expectedTokenType] + std::wstring(L">");
tokenText = L"<missing " + recognizer->getVocabulary()->getDisplayName(expectedTokenType) + L">";
}
Ref<Token> current = currentSymbol;
Ref<Token> lookback = recognizer->getTokenStream()->LT(-1);

View File

@ -39,32 +39,32 @@ namespace antlr {
namespace v4 {
namespace runtime {
/// <summary>
/// This is the default error handling mechanism for ANTLR parsers
/// and tree parsers.
/// </summary>
/**
* This is the default implementation of {@link ANTLRErrorStrategy} used for
* error reporting and recovery in ANTLR parsers.
*/
class DefaultErrorStrategy : public ANTLRErrorStrategy {
public:
DefaultErrorStrategy() {
InitializeInstanceFields();
}
/// <summary>
/// This is true after we see an error and before having successfully
/// matched a token. Prevents generation of more than one error message
/// per error.
/// </summary>
/// <seealso cref= #inErrorRecoveryMode </seealso>
protected:
/**
* Indicates whether the error strategy is currently "recovering from an
* error". This is used to suppress reporting multiple error messages while
* attempting to recover from a detected syntax error.
*
* @see #inErrorRecoveryMode
*/
bool errorRecoveryMode;
/// <summary>
/// The index into the input stream where the last error occurred.
/// This is used to prevent infinite loops where an error is found
/// but no token is consumed during recovery...another error is found,
/// ad nauseum. This is a failsafe mechanism to guarantee that at least
/// one token/tree node is consumed for two errors.
/// </summary>
/** The index into the input stream where the last error occurred.
* This is used to prevent infinite loops where an error is found
* but no token is consumed during recovery...another error is found,
* ad nauseum. This is a failsafe mechanism to guarantee that at least
* one token/tree node is consumed for two errors.
*/
int lastErrorIndex;
misc::IntervalSet lastErrorStates;
@ -138,52 +138,52 @@ namespace runtime {
/// </summary>
virtual void recover(Parser *recognizer, const RecognitionException &e) override;
/// <summary>
/// The default implementation of <seealso cref="ANTLRErrorStrategy#sync"/> makes sure
/// that the current lookahead symbol is consistent with what were expecting
/// at this point in the ATN. You can call this anytime but ANTLR only
/// generates code to check before subrules/loops and each iteration.
/// <p/>
/// Implements Jim Idle's magic sync mechanism in closures and optional
/// subrules. E.g.,
///
/// <pre>
/// a : sync ( stuff sync )* ;
/// sync : {consume to what can follow sync} ;
/// </pre>
///
/// At the start of a sub rule upon error, <seealso cref="#sync"/> performs single
/// token deletion, if possible. If it can't do that, it bails on the current
/// rule and uses the default error recovery, which consumes until the
/// resynchronization set of the current rule.
/// <p/>
/// If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
/// with an empty alternative), then the expected set includes what follows
/// the subrule.
/// <p/>
/// During loop iteration, it consumes until it sees a token that can start a
/// sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
/// stay in the loop as long as possible.
/// <p/>
/// <strong>ORIGINS</strong>
/// <p/>
/// Previous versions of ANTLR did a poor job of their recovery within loops.
/// A single mismatch token or missing token would force the parser to bail
/// out of the entire rules surrounding the loop. So, for rule
///
/// <pre>
/// classDef : 'class' ID '{' member* '}'
/// </pre>
///
/// input with an extra token between members would force the parser to
/// consume until it found the next class definition rather than the next
/// member definition of the current class.
/// <p/>
/// This functionality cost a little bit of effort because the parser has to
/// compare token set at the start of the loop and at each iteration. If for
/// some reason speed is suffering for you, you can turn off this
/// functionality by simply overriding this method as a blank { }.
/// </summary>
/**
* The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
* that the current lookahead symbol is consistent with what were expecting
* at this point in the ATN. You can call this anytime but ANTLR only
* generates code to check before subrules/loops and each iteration.
*
* <p>Implements Jim Idle's magic sync mechanism in closures and optional
* subrules. E.g.,</p>
*
* <pre>
* a : sync ( stuff sync )* ;
* sync : {consume to what can follow sync} ;
* </pre>
*
* At the start of a sub rule upon error, {@link #sync} performs single
* token deletion, if possible. If it can't do that, it bails on the current
* rule and uses the default error recovery, which consumes until the
* resynchronization set of the current rule.
*
* <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
* with an empty alternative), then the expected set includes what follows
* the subrule.</p>
*
* <p>During loop iteration, it consumes until it sees a token that can start a
* sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
* stay in the loop as long as possible.</p>
*
* <p><strong>ORIGINS</strong></p>
*
* <p>Previous versions of ANTLR did a poor job of their recovery within loops.
* A single mismatch token or missing token would force the parser to bail
* out of the entire rules surrounding the loop. So, for rule</p>
*
* <pre>
* classDef : 'class' ID '{' member* '}'
* </pre>
*
* input with an extra token between members would force the parser to
* consume until it found the next class definition rather than the next
* member definition of the current class.
*
* <p>This functionality cost a little bit of effort because the parser has to
* compare token set at the start of the loop and at each iteration. If for
* some reason speed is suffering for you, you can turn off this
* functionality by simply overriding this method as a blank { }.</p>
*/
virtual void sync(Parser *recognizer) override;
/// <summary>
@ -217,94 +217,96 @@ namespace runtime {
/// <param name="e"> the recognition exception </param>
virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e);
/// <summary>
/// This method is called to report a syntax error which requires the removal
/// of a token from the input stream. At the time this method is called, the
/// erroneous symbol is current {@code LT(1)} symbol and has not yet been
/// removed from the input stream. When this method returns,
/// {@code recognizer} is in error recovery mode.
/// <p/>
/// This method is called when <seealso cref="#singleTokenDeletion"/> identifies
/// single-token deletion as a viable recovery strategy for a mismatched
/// input error.
/// <p/>
/// The default implementation simply returns if the handler is already in
/// error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> to
/// enter error recovery mode, followed by calling
/// <seealso cref="Parser#notifyErrorListeners"/>.
/// </summary>
/// <param name="recognizer"> the parser instance </param>
/**
* This method is called to report a syntax error which requires the removal
* of a token from the input stream. At the time this method is called, the
* erroneous symbol is current {@code LT(1)} symbol and has not yet been
* removed from the input stream. When this method returns,
* {@code recognizer} is in error recovery mode.
*
* <p>This method is called when {@link #singleTokenDeletion} identifies
* single-token deletion as a viable recovery strategy for a mismatched
* input error.</p>
*
* <p>The default implementation simply returns if the handler is already in
* error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
* enter error recovery mode, followed by calling
* {@link Parser#notifyErrorListeners}.</p>
*
* @param recognizer the parser instance
*/
virtual void reportUnwantedToken(Parser *recognizer);
/// <summary>
/// This method is called to report a syntax error which requires the
/// insertion of a missing token into the input stream. At the time this
/// method is called, the missing token has not yet been inserted. When this
/// method returns, {@code recognizer} is in error recovery mode.
/// <p/>
/// This method is called when <seealso cref="#singleTokenInsertion"/> identifies
/// single-token insertion as a viable recovery strategy for a mismatched
/// input error.
/// <p/>
/// The default implementation simply returns if the handler is already in
/// error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> to
/// enter error recovery mode, followed by calling
/// <seealso cref="Parser#notifyErrorListeners"/>.
/// </summary>
/// <param name="recognizer"> the parser instance </param>
/**
* This method is called to report a syntax error which requires the
* insertion of a missing token into the input stream. At the time this
* method is called, the missing token has not yet been inserted. When this
* method returns, {@code recognizer} is in error recovery mode.
*
* <p>This method is called when {@link #singleTokenInsertion} identifies
* single-token insertion as a viable recovery strategy for a mismatched
* input error.</p>
*
* <p>The default implementation simply returns if the handler is already in
* error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
* enter error recovery mode, followed by calling
* {@link Parser#notifyErrorListeners}.</p>
*
* @param recognizer the parser instance
*/
virtual void reportMissingToken(Parser *recognizer);
/// <summary>
/// {@inheritDoc}
/// <p/>
/// The default implementation attempts to recover from the mismatched input
/// by using single token insertion and deletion as described below. If the
/// recovery attempt fails, this method throws an
/// <seealso cref="InputMismatchException"/>.
/// <p/>
/// <strong>EXTRA TOKEN</strong> (single token deletion)
/// <p/>
/// {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
/// right token, however, then assume {@code LA(1)} is some extra spurious
/// token and delete it. Then consume and return the next token (which was
/// the {@code LA(2)} token) as the successful result of the match operation.
/// <p/>
/// This recovery strategy is implemented by <seealso cref="#singleTokenDeletion"/>.
/// <p/>
/// <strong>MISSING TOKEN</strong> (single token insertion)
/// <p/>
/// If current token (at {@code LA(1)}) is consistent with what could come
/// after the expected {@code LA(1)} token, then assume the token is missing
/// and use the parser's <seealso cref="TokenFactory"/> to create it on the fly. The
/// "insertion" is performed by returning the created token as the successful
/// result of the match operation.
/// <p/>
/// This recovery strategy is implemented by <seealso cref="#singleTokenInsertion"/>.
/// <p/>
/// <strong>EXAMPLE</strong>
/// <p/>
/// For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
/// the parser returns from the nested call to {@code expr}, it will have
/// call chain:
///
/// <pre>
/// stat -> expr -> atom
/// </pre>
///
/// and it will be trying to match the {@code ')'} at this point in the
/// derivation:
///
/// <pre>
/// => ID '=' '(' INT ')' ('+' atom)* ';'
/// ^
/// </pre>
///
/// The attempt to match {@code ')'} will fail when it sees {@code ';'} and
/// call <seealso cref="#recoverInline"/>. To recover, it sees that {@code LA(1)==';'}
/// is in the set of tokens that can follow the {@code ')'} token reference
/// in rule {@code atom}. It can assume that you forgot the {@code ')'}.
/// </summary>
public:
/**
* {@inheritDoc}
*
* <p>The default implementation attempts to recover from the mismatched input
* by using single token insertion and deletion as described below. If the
* recovery attempt fails, this method throws an
* {@link InputMismatchException}.</p>
*
* <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
*
* <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
* right token, however, then assume {@code LA(1)} is some extra spurious
* token and delete it. Then consume and return the next token (which was
* the {@code LA(2)} token) as the successful result of the match operation.</p>
*
* <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
*
* <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
*
* <p>If current token (at {@code LA(1)}) is consistent with what could come
* after the expected {@code LA(1)} token, then assume the token is missing
* and use the parser's {@link TokenFactory} to create it on the fly. The
* "insertion" is performed by returning the created token as the successful
* result of the match operation.</p>
*
* <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
*
* <p><strong>EXAMPLE</strong></p>
*
* <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
* the parser returns from the nested call to {@code expr}, it will have
* call chain:</p>
*
* <pre>
* stat &rarr; expr &rarr; atom
* </pre>
*
* and it will be trying to match the {@code ')'} at this point in the
* derivation:
*
* <pre>
* =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
* ^
* </pre>
*
* The attempt to match {@code ')'} will fail when it sees {@code ';'} and
* call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
* is in the set of tokens that can follow the {@code ')'} token reference
* in rule {@code atom}. It can assume that you forgot the {@code ')'}.
*/
virtual Ref<Token> recoverInline(Parser *recognizer) override;
/// <summary>

View File

@ -32,8 +32,7 @@
using namespace org::antlr::v4::runtime;
RuntimeException::RuntimeException(const std::string &msg)
: std::exception(), _message(msg) {
RuntimeException::RuntimeException(const std::string &msg) : std::exception(), _message(msg) {
}
const char* RuntimeException::what() const NOEXCEPT {

View File

@ -33,4 +33,4 @@
using namespace org::antlr::v4::runtime;
const std::wstring IntStream::UNKNOWN_SOURCE_NAME = L"<unknown>";
const std::string IntStream::UNKNOWN_SOURCE_NAME = "<unknown>";

View File

@ -63,7 +63,7 @@ namespace runtime {
/// The value returned by <seealso cref="#getSourceName"/> when the actual name of the
/// underlying source is not known.
/// </summary>
static const std::wstring UNKNOWN_SOURCE_NAME;
static const std::string UNKNOWN_SOURCE_NAME;
/// <summary>
/// Consumes the current symbol in the stream. This method has the following

View File

@ -33,10 +33,13 @@
using namespace org::antlr::v4::runtime;
InterpreterRuleContext::InterpreterRuleContext() {
}
InterpreterRuleContext::InterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber, ssize_t ruleIndex)
: ParserRuleContext(parent, invokingStateNumber), ruleIndex(ruleIndex) {
: ParserRuleContext(parent, invokingStateNumber), _ruleIndex(ruleIndex) {
}
ssize_t InterpreterRuleContext::getRuleIndex() const {
return ruleIndex;
return _ruleIndex;
}

View File

@ -38,20 +38,38 @@ namespace antlr {
namespace v4 {
namespace runtime {
/// <summary>
/// This object is used by the ParserInterpreter and is the same as a regular
/// ParserRuleContext except that we need to track the rule index of the
/// current context so that we can build parse trees.
/// </summary>
/**
* This class extends {@link ParserRuleContext} by allowing the value of
* {@link #getRuleIndex} to be explicitly set for the context.
*
* <p>
* {@link ParserRuleContext} does not include field storage for the rule index
* since the context classes created by the code generator override the
* {@link #getRuleIndex} method to return the correct value for that context.
* Since the parser interpreter does not use the context classes generated for a
* parser, this class (with slightly more memory overhead per node) is used to
* provide equivalent functionality.</p>
*/
class InterpreterRuleContext : public ParserRuleContext {
private:
const ssize_t ruleIndex;
public:
InterpreterRuleContext();
/**
* Constructs a new {@link InterpreterRuleContext} with the specified
* parent, invoking state, and rule index.
*
* @param parent The parent context.
* @param invokingStateNumber The invoking state number.
* @param ruleIndex The rule index for the current context.
*/
InterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber, ssize_t ruleIndex);
virtual ssize_t getRuleIndex() const override;
};
protected:
/** This is the backing field for {@link #getRuleIndex}. */
const ssize_t _ruleIndex = -1;
};
} // namespace runtime
} // namespace v4

View File

@ -34,6 +34,7 @@
#include "DFA.h"
#include "EmptyPredictionContext.h"
#include "Exceptions.h"
#include "VocabularyImpl.h"
#include "LexerInterpreter.h"
@ -42,13 +43,23 @@ using namespace org::antlr::v4::runtime;
LexerInterpreter::LexerInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring> &tokenNames,
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames, const atn::ATN &atn,
CharStream *input)
: Lexer(input), grammarFileName(grammarFileName), _tokenNames(tokenNames), _ruleNames(ruleNames), _modeNames(modeNames),
_atn(atn) {
: LexerInterpreter(grammarFileName, dfa::VocabularyImpl::fromTokenNames(tokenNames), ruleNames, modeNames, atn, input) {
}
LexerInterpreter::LexerInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames, const atn::ATN &atn,
CharStream *input)
: Lexer(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _modeNames(modeNames),
_vocabulary(vocabulary) {
if (_atn.grammarType != atn::ATNType::LEXER) {
throw IllegalArgumentException("The ATN must be a lexer ATN.");
}
for (size_t i = 0; i < atn.maxTokenType; i++) {
_tokenNames.push_back(vocabulary->getDisplayName(i));
}
_sharedContextCache = std::make_shared<atn::PredictionContextCache>();
for (size_t i = 0; i < (size_t)atn.getNumberOfDecisions(); ++i) {
_decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState((int)i), (int)i));
@ -66,7 +77,7 @@ const atn::ATN& LexerInterpreter::getATN() const {
}
std::wstring LexerInterpreter::getGrammarFileName() const {
return grammarFileName;
return _grammarFileName;
}
const std::vector<std::wstring>& LexerInterpreter::getTokenNames() const {
@ -80,3 +91,11 @@ const std::vector<std::wstring>& LexerInterpreter::getRuleNames() const {
const std::vector<std::wstring>& LexerInterpreter::getModeNames() const {
return _modeNames;
}
Ref<dfa::Vocabulary> LexerInterpreter::getVocabulary() const {
if (_vocabulary != nullptr) {
return _vocabulary;
}
return Lexer::getVocabulary();
}

View File

@ -40,9 +40,14 @@ namespace runtime {
class LexerInterpreter : public Lexer {
public:
// @deprecated
LexerInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring> &tokenNames,
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames,
const atn::ATN &atn, CharStream *input);
LexerInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames,
const atn::ATN &atn, CharStream *input);
~LexerInterpreter();
virtual const atn::ATN& getATN() const override;
@ -51,16 +56,22 @@ namespace runtime {
virtual const std::vector<std::wstring>& getRuleNames() const override;
virtual const std::vector<std::wstring>& getModeNames() const override;
Ref<dfa::Vocabulary> getVocabulary() const;
protected:
const std::wstring grammarFileName;
const std::wstring _grammarFileName;
const atn::ATN &_atn;
const std::vector<std::wstring> &_tokenNames;
// @deprecated
std::vector<std::wstring> _tokenNames;
const std::vector<std::wstring> &_ruleNames;
const std::vector<std::wstring> &_modeNames;
std::vector<dfa::DFA> _decisionToDFA;
Ref<atn::PredictionContextCache> _sharedContextCache;
private:
Ref<dfa::Vocabulary> _vocabulary;
};
} // namespace runtime

View File

@ -45,40 +45,36 @@
#include "ATN.h"
#include "RuleStopState.h"
#include "Token.h"
#include "VocabularyImpl.h"
#include "InputMismatchException.h"
#include "CommonToken.h"
#include "CPPUtils.h"
#include "ParserInterpreter.h"
using namespace org::antlr::v4::runtime;
using namespace org::antlr::v4::runtime::atn;
using namespace antlrcpp;
ParserInterpreter::ParserInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring>& tokenNames,
const std::vector<std::wstring>& ruleNames, const atn::ATN &atn, TokenStream *input)
: Parser(input), _grammarFileName(grammarFileName), _tokenNames(tokenNames), _atn(atn), _ruleNames(ruleNames) {
: ParserInterpreter(grammarFileName, dfa::VocabularyImpl::fromTokenNames(tokenNames), ruleNames, atn, input) {
}
for (int i = 0; i < _atn.getNumberOfDecisions(); i++) {
_decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState(i), i));
ParserInterpreter::ParserInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
const std::vector<std::wstring> &ruleNames, const atn::ATN &atn, TokenStream *input)
: Parser(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _vocabulary(vocabulary) {
_sharedContextCache = std::make_shared<atn::PredictionContextCache>();
for (size_t i = 0; i < atn.maxTokenType; ++i) {
_tokenNames.push_back(vocabulary->getDisplayName(i));
}
// identify the ATN states where pushNewRecursionContext must be called
for (auto state : _atn.states) {
if (!is<atn::StarLoopEntryState*>(state)) {
continue;
}
atn::RuleStartState *ruleStartState = _atn.ruleToStartState[(size_t)state->ruleIndex];
if (!ruleStartState->isLeftRecursiveRule) {
continue;
}
atn::ATNState *maybeLoopEndState = state->transition(state->getNumberOfTransitions() - 1)->target;
if (!is<atn::LoopEndState*>(maybeLoopEndState)) {
continue;
}
if (maybeLoopEndState->epsilonOnlyTransitions && is<atn::RuleStopState*>(maybeLoopEndState->transition(0)->target)) {
_pushRecursionContextStates.set((size_t)state->stateNumber);
}
// init decision DFA
for (int i = 0; i < atn.getNumberOfDecisions(); ++i) {
atn::DecisionState *decisionState = atn.getDecisionState(i);
_decisionToDFA.push_back(dfa::DFA(decisionState, i));
}
// get atn simulator that knows how to do predictions
@ -89,6 +85,12 @@ ParserInterpreter::~ParserInterpreter() {
delete _interpreter;
}
void ParserInterpreter::reset() {
Parser::reset();
_overrideDecisionReached = false;
_overrideDecisionRoot = nullptr;
}
const atn::ATN& ParserInterpreter::getATN() const {
return _atn;
}
@ -97,6 +99,10 @@ const std::vector<std::wstring>& ParserInterpreter::getTokenNames() const {
return _tokenNames;
}
Ref<dfa::Vocabulary> ParserInterpreter::getVocabulary() const {
return _vocabulary;
}
const std::vector<std::wstring>& ParserInterpreter::getRuleNames() const {
return _ruleNames;
}
@ -108,13 +114,12 @@ std::wstring ParserInterpreter::getGrammarFileName() const {
Ref<ParserRuleContext> ParserInterpreter::parse(int startRuleIndex) {
atn::RuleStartState *startRuleStartState = _atn.ruleToStartState[(size_t)startRuleIndex];
Ref<InterpreterRuleContext> rootContext =
std::make_shared<InterpreterRuleContext>(std::weak_ptr<ParserRuleContext>(), atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex);
_rootContext = createInterpreterRuleContext(std::weak_ptr<ParserRuleContext>(), atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex);
if (startRuleStartState->isLeftRecursiveRule) {
enterRecursionRule(rootContext, startRuleStartState->stateNumber, startRuleIndex, 0);
enterRecursionRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex, 0);
} else {
enterRule(rootContext, startRuleStartState->stateNumber, startRuleIndex);
enterRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex);
}
while (true) {
@ -123,15 +128,32 @@ Ref<ParserRuleContext> ParserInterpreter::parse(int startRuleIndex) {
case atn::ATNState::RULE_STOP :
// pop; return from rule
if (_ctx->isEmpty()) {
exitRule();
return rootContext;
if (startRuleStartState->isLeftRecursiveRule) {
Ref<ParserRuleContext> result = _ctx;
auto parentContext = _parentContextStack.top();
_parentContextStack.pop();
unrollRecursionContexts(parentContext.first);
return result;
} else {
exitRule();
return _rootContext;
}
}
visitRuleStopState(p);
break;
default :
visitState(p);
try {
visitState(p);
}
catch (RecognitionException &e) {
setState(_atn.ruleToStopState[p->ruleIndex]->stateNumber);
getContext()->exception = std::make_exception_ptr(e);
getErrorHandler()->reportError(this, e);
recover(e);
}
break;
}
}
@ -142,26 +164,41 @@ void ParserInterpreter::enterRecursionRule(Ref<ParserRuleContext> localctx, int
Parser::enterRecursionRule(localctx, state, ruleIndex, precedence);
}
atn::ATNState *ParserInterpreter::getATNState() {
void ParserInterpreter::addDecisionOverride(int decision, int tokenIndex, int forcedAlt) {
_overrideDecision = decision;
_overrideDecisionInputIndex = tokenIndex;
_overrideDecisionAlt = forcedAlt;
}
Ref<InterpreterRuleContext> ParserInterpreter::getOverrideDecisionRoot() const {
return _overrideDecisionRoot;
}
Ref<InterpreterRuleContext> ParserInterpreter::getRootContext() {
return _rootContext;
}
atn::ATNState* ParserInterpreter::getATNState() {
return _atn.states[(size_t)getState()];
}
void ParserInterpreter::visitState(atn::ATNState *p) {
int edge;
if (p->getNumberOfTransitions() > 1) {
edge = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input, ((atn::DecisionState*)p)->decision, _ctx);
} else {
edge = 1;
int predictedAlt = 1;
if (is<DecisionState *>(p)) {
predictedAlt = visitDecisionState(dynamic_cast<DecisionState *>(p));
}
atn::Transition *transition = p->transition((size_t)edge - 1);
atn::Transition *transition = p->transition(predictedAlt - 1);
switch (transition->getSerializationType()) {
case atn::Transition::EPSILON:
if (_pushRecursionContextStates[(size_t)p->stateNumber] == 1 && is<atn::LoopEndState*>(transition->target)) {
Ref<InterpreterRuleContext> ruleContext = std::make_shared<InterpreterRuleContext>(_parentContextStack.top().first,
_parentContextStack.top().second, _ctx->getRuleIndex());
pushNewRecursionContext(ruleContext, _atn.ruleToStartState[(size_t)p->ruleIndex]->stateNumber,
(int)ruleContext->getRuleIndex());
if (p->getStateType() == ATNState::STAR_LOOP_ENTRY &&
(dynamic_cast<StarLoopEntryState *>(p))->isPrecedenceDecision &&
!is<LoopEndState *>(transition->target)) {
// We are at the start of a left recursive rule's (...)* loop
// and we're not taking the exit branch of loop.
Ref<InterpreterRuleContext> localctx = createInterpreterRuleContext(_parentContextStack.top().first,
_parentContextStack.top().second, (int)_ctx->getRuleIndex());
pushNewRecursionContext(localctx, _atn.ruleToStartState[p->ruleIndex]->stateNumber, (int)_ctx->getRuleIndex());
}
break;
@ -173,7 +210,7 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
case atn::Transition::SET:
case atn::Transition::NOT_SET:
if (!transition->matches((int)_input->LA(1), Token::MIN_USER_TOKEN_TYPE, 65535)) {
_errHandler->recoverInline(this);
recoverInline();
}
matchWildcard();
break;
@ -186,11 +223,11 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
{
atn::RuleStartState *ruleStartState = (atn::RuleStartState*)(transition->target);
int ruleIndex = ruleStartState->ruleIndex;
Ref<InterpreterRuleContext> ruleContext = std::make_shared<InterpreterRuleContext>(_ctx, p->stateNumber, ruleIndex);
Ref<InterpreterRuleContext> newctx = createInterpreterRuleContext(_ctx, p->stateNumber, ruleIndex);
if (ruleStartState->isLeftRecursiveRule) {
enterRecursionRule(ruleContext, ruleStartState->stateNumber, ruleIndex, ((atn::RuleTransition*)(transition))->precedence);
enterRecursionRule(newctx, ruleStartState->stateNumber, ruleIndex, ((atn::RuleTransition*)(transition))->precedence);
} else {
enterRule(_ctx, transition->target->stateNumber, ruleIndex);
enterRule(newctx, transition->target->stateNumber, ruleIndex);
}
}
break;
@ -226,6 +263,26 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
setState(transition->target->stateNumber);
}
int ParserInterpreter::visitDecisionState(DecisionState *p) {
int predictedAlt = 1;
if (p->getNumberOfTransitions() > 1) {
getErrorHandler()->sync(this);
int decision = p->decision;
if (decision == _overrideDecision && (int)_input->index() == _overrideDecisionInputIndex && !_overrideDecisionReached) {
predictedAlt = _overrideDecisionAlt;
_overrideDecisionReached = true;
} else {
predictedAlt = getInterpreter<ParserATNSimulator>()->adaptivePredict(_input, decision, _ctx);
}
}
return predictedAlt;
}
Ref<InterpreterRuleContext> ParserInterpreter::createInterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent,
int invokingStateNumber, int ruleIndex) {
return std::make_shared<InterpreterRuleContext>(parent, invokingStateNumber, ruleIndex);
}
void ParserInterpreter::visitRuleStopState(atn::ATNState *p) {
atn::RuleStartState *ruleStartState = _atn.ruleToStartState[(size_t)p->ruleIndex];
if (ruleStartState->isLeftRecursiveRule) {
@ -241,3 +298,32 @@ void ParserInterpreter::visitRuleStopState(atn::ATNState *p) {
atn::RuleTransition *ruleTransition = static_cast<atn::RuleTransition*>(_atn.states[(size_t)getState()]->transition(0));
setState(ruleTransition->followState->stateNumber);
}
void ParserInterpreter::recover(RecognitionException &e) {
size_t i = _input->index();
getErrorHandler()->recover(this, e);
if (_input->index() == i) {
// no input consumed, better add an error node
if (is<InputMismatchException>(e)) {
InputMismatchException &ime = (InputMismatchException&)e;
Ref<Token> tok = e.getOffendingToken();
int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element
auto errToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() },
expectedTokenType, tok->getText(), Token::DEFAULT_CHANNEL, -1, -1, // invalid start/stop
tok->getLine(), tok->getCharPositionInLine());
_ctx->addErrorNode(std::dynamic_pointer_cast<Token>(errToken));
}
else { // NoViableAlt
Ref<Token> tok = e.getOffendingToken();
auto errToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() },
Token::INVALID_TYPE, tok->getText(), Token::DEFAULT_CHANNEL, -1, -1, // invalid start/stop
tok->getLine(), tok->getCharPositionInLine());
_ctx->addErrorNode(std::dynamic_pointer_cast<Token>(errToken));
}
}
}
Ref<Token> ParserInterpreter::recoverInline() {
return _errHandler->recoverInline(this);
}

View File

@ -57,12 +57,22 @@ namespace runtime {
/// </summary>
class ParserInterpreter : public Parser {
public:
// @deprecated
ParserInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring>& tokenNames,
const std::vector<std::wstring>& ruleNames, const atn::ATN &atn, TokenStream *input);
ParserInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
const std::vector<std::wstring> &ruleNames, const atn::ATN &atn, TokenStream *input);
~ParserInterpreter();
virtual void reset() override;
virtual const atn::ATN& getATN() const override;
// @deprecated
virtual const std::vector<std::wstring>& getTokenNames() const override;
Ref<dfa::Vocabulary> getVocabulary() const;
virtual const std::vector<std::wstring>& getRuleNames() const override;
virtual std::wstring getGrammarFileName() const override;
@ -71,23 +81,127 @@ namespace runtime {
virtual void enterRecursionRule(Ref<ParserRuleContext> localctx, int state, int ruleIndex, int precedence) override;
/** Override this parser interpreters normal decision-making process
* at a particular decision and input token index. Instead of
* allowing the adaptive prediction mechanism to choose the
* first alternative within a block that leads to a successful parse,
* force it to take the alternative, 1..n for n alternatives.
*
* As an implementation limitation right now, you can only specify one
* override. This is sufficient to allow construction of different
* parse trees for ambiguous input. It means re-parsing the entire input
* in general because you're never sure where an ambiguous sequence would
* live in the various parse trees. For example, in one interpretation,
* an ambiguous input sequence would be matched completely in expression
* but in another it could match all the way back to the root.
*
* s : e '!'? ;
* e : ID
* | ID '!'
* ;
*
* Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
* case, the ambiguous sequence is fully contained only by the root.
* In the second case, the ambiguous sequences fully contained within just
* e, as in: (e ID !).
*
* Rather than trying to optimize this and make
* some intelligent decisions for optimization purposes, I settled on
* just re-parsing the whole input and then using
* {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
* subtree that contains the ambiguous sequence. I originally tried to
* record the call stack at the point the parser detected and ambiguity but
* left recursive rules create a parse tree stack that does not reflect
* the actual call stack. That impedance mismatch was enough to make
* it it challenging to restart the parser at a deeply nested rule
* invocation.
*
* Only parser interpreters can override decisions so as to avoid inserting
* override checking code in the critical ALL(*) prediction execution path.
*
* @since 4.5.1
*/
void addDecisionOverride(int decision, int tokenIndex, int forcedAlt);
Ref<InterpreterRuleContext> getOverrideDecisionRoot() const;
/** Return the root of the parse, which can be useful if the parser
* bails out. You still can access the top node. Note that,
* because of the way left recursive rules add children, it's possible
* that the root will not have any children if the start rule immediately
* called and left recursive rule that fails.
*
* @since 4.5.1
*/
Ref<InterpreterRuleContext> getRootContext();
protected:
const std::wstring _grammarFileName;
std::vector<std::wstring> _tokenNames;
const atn::ATN &_atn;
std::vector<std::wstring> _ruleNames;
antlrcpp::BitSet _pushRecursionContextStates;
std::vector<dfa::DFA> _decisionToDFA; // not shared like it is for generated parsers
Ref<atn::PredictionContextCache> _sharedContextCache;
/** This stack corresponds to the _parentctx, _parentState pair of locals
* that would exist on call stack frames with a recursive descent parser;
* in the generated function for a left-recursive rule you'd see:
*
* private EContext e(int _p) throws RecognitionException {
* ParserRuleContext _parentctx = _ctx; // Pair.a
* int _parentState = getState(); // Pair.b
* ...
* }
*
* Those values are used to create new recursive rule invocation contexts
* associated with left operand of an alt like "expr '*' expr".
*/
std::stack<std::pair<Ref<ParserRuleContext>, int>> _parentContextStack;
/** We need a map from (decision,inputIndex)->forced alt for computing ambiguous
* parse trees. For now, we allow exactly one override.
*/
int _overrideDecision = -1;
int _overrideDecisionInputIndex = -1;
int _overrideDecisionAlt = -1;
bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop
/** What is the current context when we override a decision? This tells
* us what the root of the parse tree is when using override
* for an ambiguity/lookahead check.
*/
Ref<InterpreterRuleContext> _overrideDecisionRoot;
Ref<InterpreterRuleContext> _rootContext;
virtual atn::ATNState *getATNState();
virtual void visitState(atn::ATNState *p);
/** Method visitDecisionState() is called when the interpreter reaches
* a decision state (instance of DecisionState). It gives an opportunity
* for subclasses to track interesting things.
*/
int visitDecisionState(atn::DecisionState *p);
/** Provide simple "factory" for InterpreterRuleContext's.
* @since 4.5.1
*/
Ref<InterpreterRuleContext> createInterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber,
int ruleIndex);
virtual void visitRuleStopState(atn::ATNState *p);
/** Rely on the error handler for this parser but, if no tokens are consumed
* to recover, add an error node. Otherwise, nothing is seen in the parse
* tree.
*/
void recover(RecognitionException &e);
Ref<Token> recoverInline();
private:
Ref<dfa::Vocabulary> _vocabulary;
};
} // namespace runtime

View File

@ -73,6 +73,19 @@ namespace runtime {
/// </summary>
static const size_t HIDDEN_CHANNEL = 1;
/**
* This is the minimum constant value which can be assigned to a
* user-defined token channel.
*
* <p>
* The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are
* assigned to the predefined channels {@link #DEFAULT_CHANNEL} and
* {@link #HIDDEN_CHANNEL}.</p>
*
* @see Token#getChannel()
*/
static const size_t MIN_USER_CHANNEL_VALUE = 2;
/// <summary>
/// Get the text of the token.
/// </summary>

View File

@ -269,12 +269,16 @@ std::wstring TokenStreamRewriter::getText() {
return getText(DEFAULT_PROGRAM_NAME, Interval(0, (int)tokens->size() - 1));
}
std::wstring TokenStreamRewriter::getText(std::wstring programName) {
return getText(programName, Interval(0, (int)tokens->size() - 1));
}
std::wstring TokenStreamRewriter::getText(const Interval &interval) {
return getText(DEFAULT_PROGRAM_NAME, interval);
}
std::wstring TokenStreamRewriter::getText(const std::wstring &programName, const Interval &interval) {
std::vector<TokenStreamRewriter::RewriteOperation*> rewrites = _programs.at(programName);
std::vector<TokenStreamRewriter::RewriteOperation*> rewrites = _programs[programName];
int start = interval.a;
int stop = interval.b;

View File

@ -36,68 +36,82 @@ namespace antlr {
namespace v4 {
namespace runtime {
/// <summary>
/// Useful for rewriting out a buffered input token stream after doing some
/// augmentation or other manipulations on it.
///
/// You can insert stuff, replace, and delete chunks. Note that the
/// operations are done lazily--only if you convert the buffer to a
/// String with getText(). This is very efficient because you are not moving
/// data around all the time. As the buffer of tokens is converted to strings,
/// the getText() method(s) scan the input token stream and check
/// to see if there is an operation at the current index.
/// If so, the operation is done and then normal String
/// rendering continues on the buffer. This is like having multiple Turing
/// machine instruction streams (programs) operating on a single input tape. :)
///
/// This rewriter makes no modifications to the token stream. It does not
/// ask the stream to fill itself up nor does it advance the input cursor.
/// The token stream index() will return the same value before and after
/// any getText() call.
///
/// The rewriter only works on tokens that you have in the buffer and
/// ignores the current input cursor. If you are buffering tokens on-demand,
/// calling getText() halfway through the input will only do rewrites
/// for those tokens in the first half of the file.
///
/// Since the operations are done lazily at getText-time, operations do not
/// screw up the token index values. That is, an insert operation at token
/// index i does not change the index values for tokens i+1..n-1.
///
/// Because operations never actually alter the buffer, you may always get
/// the original token stream back without undoing anything. Since
/// the instructions are queued up, you can easily simulate transactions and
/// roll back any changes if there is an error just by removing instructions.
/// For example,
///
/// CharStream input = new ANTLRFileStream("input");
/// TLexer lex = new TLexer(input);
/// CommonTokenStream tokens = new CommonTokenStream(lex);
/// T parser = new T(tokens);
/// TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
/// parser.startRule();
///
/// Then in the rules, you can execute (assuming rewriter is visible):
/// Token t,u;
/// ...
/// rewriter.insertAfter(t, "text to put after t");}
/// rewriter.insertAfter(u, "text after u");}
/// System.out.println(tokens.toString());
///
/// You can also have multiple "instruction streams" and get multiple
/// rewrites from a single pass over the input. Just name the instruction
/// streams and use that name again when printing the buffer. This could be
/// useful for generating a C file and also its header file--all from the
/// same buffer:
///
/// tokens.insertAfter("pass1", t, "text to put after t");}
/// tokens.insertAfter("pass2", u, "text after u");}
/// System.out.println(tokens.toString("pass1"));
/// System.out.println(tokens.toString("pass2"));
///
/// If you don't use named rewrite streams, a "default" stream is used as
/// the first example shows.
/// </summary>
/**
* Useful for rewriting out a buffered input token stream after doing some
* augmentation or other manipulations on it.
*
* <p>
* You can insert stuff, replace, and delete chunks. Note that the operations
* are done lazily--only if you convert the buffer to a {@link String} with
* {@link TokenStream#getText()}. This is very efficient because you are not
* moving data around all the time. As the buffer of tokens is converted to
* strings, the {@link #getText()} method(s) scan the input token stream and
* check to see if there is an operation at the current index. If so, the
* operation is done and then normal {@link String} rendering continues on the
* buffer. This is like having multiple Turing machine instruction streams
* (programs) operating on a single input tape. :)</p>
*
* <p>
* This rewriter makes no modifications to the token stream. It does not ask the
* stream to fill itself up nor does it advance the input cursor. The token
* stream {@link TokenStream#index()} will return the same value before and
* after any {@link #getText()} call.</p>
*
* <p>
* The rewriter only works on tokens that you have in the buffer and ignores the
* current input cursor. If you are buffering tokens on-demand, calling
* {@link #getText()} halfway through the input will only do rewrites for those
* tokens in the first half of the file.</p>
*
* <p>
* Since the operations are done lazily at {@link #getText}-time, operations do
* not screw up the token index values. That is, an insert operation at token
* index {@code i} does not change the index values for tokens
* {@code i}+1..n-1.</p>
*
* <p>
* Because operations never actually alter the buffer, you may always get the
* original token stream back without undoing anything. Since the instructions
* are queued up, you can easily simulate transactions and roll back any changes
* if there is an error just by removing instructions. For example,</p>
*
* <pre>
* CharStream input = new ANTLRFileStream("input");
* TLexer lex = new TLexer(input);
* CommonTokenStream tokens = new CommonTokenStream(lex);
* T parser = new T(tokens);
* TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
* parser.startRule();
* </pre>
*
* <p>
* Then in the rules, you can execute (assuming rewriter is visible):</p>
*
* <pre>
* Token t,u;
* ...
* rewriter.insertAfter(t, "text to put after t");}
* rewriter.insertAfter(u, "text after u");}
* System.out.println(rewriter.getText());
* </pre>
*
* <p>
* You can also have multiple "instruction streams" and get multiple rewrites
* from a single pass over the input. Just name the instruction streams and use
* that name again when printing the buffer. This could be useful for generating
* a C file and also its header file--all from the same buffer:</p>
*
* <pre>
* rewriter.insertAfter("pass1", t, "text to put after t");}
* rewriter.insertAfter("pass2", u, "text after u");}
* System.out.println(rewriter.getText("pass1"));
* System.out.println(rewriter.getText("pass2"));
* </pre>
*
* <p>
* If you don't use named rewrite streams, a "default" stream is used as the
* first example shows.</p>
*/
class TokenStreamRewriter {
public:
static const std::wstring DEFAULT_PROGRAM_NAME;
@ -153,6 +167,11 @@ namespace runtime {
/// instructions given to this rewriter.
virtual std::wstring getText();
/** Return the text from the original tokens altered per the
* instructions given to this rewriter in programName.
*/
std::wstring getText(std::wstring programName);
/// <summary>
/// Return the text associated with the tokens in the interval from the
/// original token stream but with the alterations given to this rewriter.

View File

@ -188,6 +188,10 @@ size_t UnbufferedCharStream::size() {
}
std::string UnbufferedCharStream::getSourceName() const {
if (name.empty()) {
return UNKNOWN_SOURCE_NAME;
}
return name;
}

View File

@ -141,7 +141,7 @@ int ATN::defineDecisionState(DecisionState *s) {
DecisionState *ATN::getDecisionState(int decision) const {
if (!decisionToState.empty()) {
return decisionToState.at((size_t)decision);
return decisionToState[(size_t)decision];
}
return nullptr;
}

View File

@ -0,0 +1,32 @@
/*
* [The "BSD license"]
* Copyright (c) 2016 Mike Lischke
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Dan McLaughlin
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "Predicate.h"

View File

@ -0,0 +1,50 @@
/*
* [The "BSD license"]
* Copyright (c) 2016 Mike Lischke
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Dan McLaughlin
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
namespace org {
namespace antlr {
namespace v4 {
namespace runtime {
namespace misc {
template<typename T>
class Predicate {
public:
virtual bool test(Ref<T> t) = 0;
};
} // namespace tree
} // namespace runtime
} // namespace v4
} // namespace antlr
} // namespace org

View File

@ -97,6 +97,7 @@ namespace org {
class MurmurHash;
class ParseCancellationException;
class Utils;
template <typename T> class Predicate;
}
namespace atn {
class ATN;

View File

@ -53,17 +53,19 @@ namespace tree {
return tree->accept(this);
}
/// <summary>
/// {@inheritDoc}
/// <p/>
/// The default implementation initializes the aggregate result to
/// <seealso cref="#defaultResult defaultResult()"/>. Before visiting each child, it
/// calls <seealso cref="#shouldVisitNextChild shouldVisitNextChild"/>; if the result
/// is {@code false} no more children are visited and the current aggregate
/// result is returned. After visiting a child, the aggregate result is
/// updated by calling <seealso cref="#aggregateResult aggregateResult"/> with the
/// previous aggregate result and the result of visiting the child.
/// </summary>
/**
* <p>The default implementation initializes the aggregate result to
* {@link #defaultResult defaultResult()}. Before visiting each child, it
* calls {@link #shouldVisitNextChild shouldVisitNextChild}; if the result
* is {@code false} no more children are visited and the current aggregate
* result is returned. After visiting a child, the aggregate result is
* updated by calling {@link #aggregateResult aggregateResult} with the
* previous aggregate result and the result of visiting the child.</p>
*
* <p>The default implementation is not safe for use in visitors that modify
* the tree structure. Visitors that modify the tree should override this
* method to behave properly in respect to the specific algorithm in use.</p>
*/
virtual T* visitChildren(RuleNode *node) override {
T* result = defaultResult();
size_t n = node->getChildCount();

View File

@ -37,6 +37,17 @@ namespace v4 {
namespace runtime {
namespace tree {
/** This interface describes the minimal core of methods triggered
* by {@link ParseTreeWalker}. E.g.,
*
* ParseTreeWalker walker = new ParseTreeWalker();
* walker.walk(myParseTreeListener, myParseTree); <-- triggers events in your listener
*
* If you want to trigger events in multiple listeners during a single
* tree walk, you can use the ParseTreeDispatcher object available at
*
* https://github.com/antlr/antlr4/issues/841
*/
class ParseTreeListener {
public:
virtual void visitTerminal(Ref<TerminalNode> node) = 0;

View File

@ -45,15 +45,23 @@ namespace tree {
/// between parse trees and other kinds of syntax trees we might want to create.
/// </summary>
class SyntaxTree : public Tree {
/// <summary>
/// Return an <seealso cref="Interval"/> indicating the index in the
/// <seealso cref="TokenStream"/> of the first and last token associated with this
/// subtree. If this node is a leaf, then the interval represents a single
/// token.
/// <p/>
/// If source interval is unknown, this returns <seealso cref="Interval#INVALID"/>.
/// </summary>
public:
/**
* Return an {@link Interval} indicating the index in the
* {@link TokenStream} of the first and last token associated with this
* subtree. If this node is a leaf, then the interval represents a single
* token and has interval i..i for token index i.
*
* <p>An interval of i..i-1 indicates an empty interval at position
* i in the input stream, where 0 &lt;= i &lt;= the size of the input
* token stream. Currently, the code base can only have i=0..n-1 but
* in concept one could have an empty interval after EOF. </p>
*
* <p>If source interval is unknown, this returns {@link Interval#INVALID}.</p>
*
* <p>As a weird special case, the source interval for rules matched after
* EOF is unspecified.</p>
*/
virtual misc::Interval getSourceInterval() = 0;
};

View File

@ -32,3 +32,7 @@
#include "Tree.h"
using namespace org::antlr::v4::runtime::tree;
bool Tree::operator == (const Tree &other) const {
return &other == this;
}

View File

@ -78,6 +78,8 @@ namespace tree {
virtual std::wstring toString() = 0;
virtual bool operator == (const Tree &other) const;
protected:
virtual std::weak_ptr<Tree> getParentReference() = 0;
virtual Ref<Tree> getChildReference(size_t i) = 0;

View File

@ -34,14 +34,22 @@
#include "ParserRuleContext.h"
#include "CPPUtils.h"
#include "TerminalNodeImpl.h"
#include "ATN.h"
#include "Interval.h"
#include "CommonToken.h"
#include "Predicate.h"
#include "Trees.h"
using namespace org::antlr::v4::runtime;
using namespace org::antlr::v4::runtime::misc;
using namespace org::antlr::v4::runtime::tree;
using namespace antlrcpp;
Trees::Trees() {
}
std::wstring Trees::toStringTree(Ref<Tree> t) {
return toStringTree(t, nullptr);
}
@ -76,11 +84,15 @@ std::wstring Trees::getNodeText(Ref<Tree> t, Parser *recog) {
std::wstring Trees::getNodeText(Ref<Tree> t, const std::vector<std::wstring> &ruleNames) {
if (ruleNames.size() > 0) {
if (is<RuleNode>(t)) {
ssize_t ruleIndex = (std::static_pointer_cast<RuleNode>(t))->getRuleContext()->getRuleIndex();
if (is<RuleContext>(t)) {
ssize_t ruleIndex = std::static_pointer_cast<RuleContext>(t)->getRuleContext()->getRuleIndex();
if (ruleIndex < 0)
return L"Invalid Rule Index";
std::wstring ruleName = ruleNames[(size_t)ruleIndex];
int altNumber = std::static_pointer_cast<RuleContext>(t)->getAltNumber();
if (altNumber != atn::ATN::INVALID_ALT_NUMBER) {
return ruleName + L":" + std::to_wstring(altNumber);
}
return ruleName;
} else if (is<ErrorNode>(t)) {
return t->toString();
@ -141,6 +153,21 @@ static void _findAllNodes(Ref<ParseTree> t, int index, bool findTokens, std::vec
}
}
bool Trees::isAncestorOf(Ref<Tree> t, Ref<Tree> u) {
if (t == nullptr || u == nullptr || t->getParent().expired()) {
return false;
}
Ref<Tree> p = u->getParent().lock();
while (p != nullptr) {
if (t == p) {
return true;
}
p = p->getParent().lock();
}
return false;
}
std::vector<Ref<ParseTree>> Trees::findAllTokenNodes(Ref<ParseTree> t, int ttype) {
return findAllNodes(t, ttype, true);
}
@ -155,12 +182,12 @@ std::vector<Ref<ParseTree>> Trees::findAllNodes(Ref<ParseTree> t, int index, boo
return nodes;
}
std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
std::vector<Ref<ParseTree>> Trees::getDescendants(Ref<ParseTree> t) {
std::vector<Ref<ParseTree>> nodes;
nodes.push_back(t);
std::size_t n = t->getChildCount();
for (size_t i = 0 ; i < n ; i++) {
auto descentants = descendants(t->getChild(i));
auto descentants = getDescendants(t->getChild(i));
for (auto entry: descentants) {
nodes.push_back(entry);
}
@ -168,5 +195,62 @@ std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
return nodes;
}
Trees::Trees() {
std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
return getDescendants(t);
}
Ref<ParserRuleContext> Trees::getRootOfSubtreeEnclosingRegion(Ref<ParseTree> t, size_t startTokenIndex,
size_t stopTokenIndex) {
size_t n = t->getChildCount();
for (size_t i = 0; i<n; i++) {
Ref<ParseTree> child = t->getChild(i);
Ref<ParserRuleContext> r = getRootOfSubtreeEnclosingRegion(child, startTokenIndex, stopTokenIndex);
if (r != nullptr) {
return r;
}
}
if (is<ParserRuleContext>(t)) {
Ref<ParserRuleContext> r = std::static_pointer_cast<ParserRuleContext>(t);
if ((int)startTokenIndex >= r->getStart()->getTokenIndex() && // is range fully contained in t?
(r->getStop() == nullptr || (int)stopTokenIndex <= r->getStop()->getTokenIndex())) {
// note: r.getStop()==null likely implies that we bailed out of parser and there's nothing to the right
return r;
}
}
return nullptr;
}
void Trees::stripChildrenOutOfRange(Ref<ParserRuleContext> t, Ref<ParserRuleContext> root, size_t startIndex, size_t stopIndex) {
if (t == nullptr) {
return;
}
for (size_t i = 0; i < t->getChildCount(); ++i) {
Ref<ParseTree> child = t->getChild(i);
Interval range = child->getSourceInterval();
if (is<ParserRuleContext>(child) && (range.b < (int)startIndex || range.a > (int)stopIndex)) {
if (isAncestorOf(child, root)) { // replace only if subtree doesn't have displayed root
Ref<CommonToken> abbrev = std::make_shared<CommonToken>(Token::INVALID_TYPE, L"...");
t->children[i] = std::make_shared<TerminalNodeImpl>(abbrev);
}
}
}
}
Ref<Tree> Trees::findNodeSuchThat(Ref<Tree> t, Ref<Predicate<Tree>> pred) {
if (pred->test(t)) {
return t;
}
size_t n = t->getChildCount();
for (size_t i = 0 ; i < n ; ++i) {
Ref<Tree> u = findNodeSuchThat(t->getChild(i), pred);
if (u != nullptr) {
return u;
}
}
return nullptr;
}

View File

@ -68,12 +68,52 @@ namespace tree {
/// Return a list of all ancestors of this node. The first node of
/// list is the root and the last is the parent of this node.
static std::vector<std::weak_ptr<Tree>> getAncestors(Ref<Tree> t);
/** Return true if t is u's parent or a node on path to root from u.
* Use == not equals().
*
* @since 4.5.1
*/
static bool isAncestorOf(Ref<Tree> t, Ref<Tree> u);
static std::vector<Ref<ParseTree>> findAllTokenNodes(Ref<ParseTree> t, int ttype);
static std::vector<Ref<ParseTree>> findAllRuleNodes(Ref<ParseTree> t, int ruleIndex);
static std::vector<Ref<ParseTree>> findAllNodes(Ref<ParseTree> t, int index, bool findTokens);
static std::vector<Ref<ParseTree>> descendants(Ref<ParseTree> t);
/** Get all descendents; includes t itself.
*
* @since 4.5.1
*/
static std::vector<Ref<ParseTree>> getDescendants(Ref<ParseTree> t);
/** @deprecated */
static std::vector<Ref<ParseTree>> descendants(Ref<ParseTree> t);
/** Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex
* inclusively using postorder traversal. Recursive depth-first-search.
*
* @since 4.5.1
*/
static Ref<ParserRuleContext> getRootOfSubtreeEnclosingRegion(Ref<ParseTree> t,
size_t startTokenIndex, // inclusive
size_t stopTokenIndex); // inclusive
/** Replace any subtree siblings of root that are completely to left
* or right of lookahead range with a CommonToken(Token.INVALID_TYPE,"...")
* node. The source interval for t is not altered to suit smaller range!
*
* WARNING: destructive to t.
*
* @since 4.5.1
*/
static void stripChildrenOutOfRange(Ref<ParserRuleContext> t, Ref<ParserRuleContext> root, size_t startIndex,
size_t stopIndex);
/** Return first node satisfying the pred
*
* @since 4.5.1
*/
static Ref<Tree> findNodeSuchThat(Ref<Tree> t, Ref<misc::Predicate<Tree>> pred);
private:
Trees();
};

View File

@ -47,7 +47,7 @@ ParseTreeMatch::ParseTreeMatch(Ref<ParseTree> tree, const ParseTreePattern &patt
Ref<ParseTree> ParseTreeMatch::get(const std::wstring &label) {
auto iterator = _labels.find(label);
if (iterator == _labels.end()) {
if (iterator == _labels.end() || iterator->second.empty()) {
return nullptr;
}

View File

@ -40,6 +40,7 @@
#include "TagChunk.h"
#include "ATN.h"
#include "Lexer.h"
#include "BailErrorStrategy.h"
#include "ListTokenSource.h"
#include "TextChunk.h"
@ -56,7 +57,7 @@ using namespace org::antlr::v4::runtime::tree;
using namespace org::antlr::v4::runtime::tree::pattern;
using namespace antlrcpp;
ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(std::exception e) {
ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(const RuntimeException &e) : RuntimeException(e.what()) {
}
ParseTreePatternMatcher::ParseTreePatternMatcher(Lexer *lexer, Parser *parser) : _lexer(lexer), _parser(parser) {
@ -109,12 +110,17 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::wstring &pattern, i
delete tokens;
});
ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getTokenNames(),
ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getVocabulary(),
_parser->getRuleNames(), _parser->getATNWithBypassAlts(), tokens);
Ref<ParserRuleContext> tree;
try {
Ref<ParserRuleContext> context = parserInterp.parse(patternRuleIndex);
return ParseTreePattern(this, pattern, patternRuleIndex, context);
parserInterp.setErrorHandler(std::make_shared<BailErrorStrategy>());
tree = parserInterp.parse(patternRuleIndex);
} catch (ParseCancellationException &e) {
std::rethrow_if_nested(e);
} catch (RecognitionException &re) {
throw re;
} catch (std::exception &e) {
#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026
// throw_with_nested is not available before VS 2015.
@ -124,6 +130,12 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::wstring &pattern, i
#endif
}
// Make sure tree pattern compilation checks for a complete parse
if (tokens->LA(1) != EOF) {
throw StartRuleDoesNotConsumeFullPattern();
}
return ParseTreePattern(this, pattern, patternRuleIndex, tree);
}
Lexer* ParseTreePatternMatcher::getLexer() {

View File

@ -31,7 +31,7 @@
#pragma once
#include "Token.h"
#include "Exceptions.h"
namespace org {
namespace antlr {
@ -99,9 +99,14 @@ namespace pattern {
/// </summary>
class ParseTreePatternMatcher {
public:
class CannotInvokeStartRule : public std::exception {
class CannotInvokeStartRule : public RuntimeException {
public:
CannotInvokeStartRule(std::exception e);
CannotInvokeStartRule(const RuntimeException &e);
};
// Fixes https://github.com/antlr/antlr4/issues/413
// "Tree pattern compilation doesn't check for a complete parse"
class StartRuleDoesNotConsumeFullPattern : public RuntimeException {
};
/// Constructs a <seealso cref="ParseTreePatternMatcher"/> or from a <seealso cref="Lexer"/> and