forked from jasder/antlr
Switching to current ANTLR revision, final part.
This commit is contained in:
parent
d4ebdfa138
commit
bfcb0a71cb
|
@ -1,6 +1,6 @@
|
|||
# C++ target for ANTLR 4
|
||||
|
||||
This fork provides C++ runtime support for C++. See [the canonical antlr4 repository](https://github.com/antlr/antlr4) for in depth detail about how to use Antlr4.
|
||||
This folder contains the C++ runtime support for ANTLR. See [the canonical antlr4 repository](https://github.com/antlr/antlr4) for in depth detail about how to use ANTLR 4.
|
||||
|
||||
## Authors and major contributors
|
||||
|
||||
|
|
|
@ -17,14 +17,6 @@
|
|||
using namespace antlrcpptest;
|
||||
using namespace org::antlr::v4::runtime;
|
||||
|
||||
class A {
|
||||
public:
|
||||
static void doit(const A &a) {
|
||||
size_t i = a.counter;
|
||||
}
|
||||
private:
|
||||
size_t counter;
|
||||
};
|
||||
int main(int argc, const char * argv[]) {
|
||||
|
||||
ANTLRInputStream input(L"divideŴ and conquer");
|
||||
|
|
|
@ -57,6 +57,10 @@
|
|||
278A66FC1C95838E002D667E /* ANTLRErrorListener.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 278A66FA1C95838E002D667E /* ANTLRErrorListener.cpp */; };
|
||||
27A23EA31CC2A8D60036D8A3 /* TLexer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A23EA11CC2A8D60036D8A3 /* TLexer.cpp */; };
|
||||
27A23EA41CC2A8D60036D8A3 /* TLexer.h in Headers */ = {isa = PBXBuildFile; fileRef = 27A23EA21CC2A8D60036D8A3 /* TLexer.h */; };
|
||||
27B4A79A1CD605BB00FCCD3E /* Predicate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27B4A7981CD605BB00FCCD3E /* Predicate.cpp */; };
|
||||
27B4A79B1CD605BB00FCCD3E /* Predicate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27B4A7981CD605BB00FCCD3E /* Predicate.cpp */; };
|
||||
27B4A79C1CD605BB00FCCD3E /* Predicate.h in Headers */ = {isa = PBXBuildFile; fileRef = 27B4A7991CD605BB00FCCD3E /* Predicate.h */; };
|
||||
27B4A79D1CD605BB00FCCD3E /* Predicate.h in Headers */ = {isa = PBXBuildFile; fileRef = 27B4A7991CD605BB00FCCD3E /* Predicate.h */; };
|
||||
27C62E261CD269C90088721B /* ParseInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27C62E241CD269C90088721B /* ParseInfo.cpp */; };
|
||||
27C62E271CD269C90088721B /* ParseInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27C62E241CD269C90088721B /* ParseInfo.cpp */; };
|
||||
27C62E281CD269C90088721B /* ParseInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 27C62E251CD269C90088721B /* ParseInfo.h */; };
|
||||
|
@ -649,6 +653,8 @@
|
|||
278A66FA1C95838E002D667E /* ANTLRErrorListener.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ANTLRErrorListener.cpp; path = ../../runtime/ANTLRErrorListener.cpp; sourceTree = SOURCE_ROOT; };
|
||||
27A23EA11CC2A8D60036D8A3 /* TLexer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TLexer.cpp; path = ../generated/TLexer.cpp; sourceTree = "<group>"; wrapsLines = 0; };
|
||||
27A23EA21CC2A8D60036D8A3 /* TLexer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TLexer.h; path = ../generated/TLexer.h; sourceTree = "<group>"; };
|
||||
27B4A7981CD605BB00FCCD3E /* Predicate.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Predicate.cpp; sourceTree = "<group>"; };
|
||||
27B4A7991CD605BB00FCCD3E /* Predicate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Predicate.h; sourceTree = "<group>"; };
|
||||
27C62E241CD269C90088721B /* ParseInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParseInfo.cpp; sourceTree = "<group>"; };
|
||||
27C62E251CD269C90088721B /* ParseInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParseInfo.h; sourceTree = "<group>"; };
|
||||
27C62E2A1CD26C780088721B /* ProfilingATNSimulator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ProfilingATNSimulator.cpp; sourceTree = "<group>"; wrapsLines = 0; };
|
||||
|
@ -889,7 +895,7 @@
|
|||
27C669861C9585B80021E494 /* TerminalNodeImpl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TerminalNodeImpl.h; sourceTree = "<group>"; };
|
||||
27C669871C9585B80021E494 /* Tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tree.cpp; sourceTree = "<group>"; };
|
||||
27C669881C9585B80021E494 /* Tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Tree.h; sourceTree = "<group>"; };
|
||||
27C669891C9585B80021E494 /* Trees.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Trees.cpp; sourceTree = "<group>"; };
|
||||
27C669891C9585B80021E494 /* Trees.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Trees.cpp; sourceTree = "<group>"; wrapsLines = 0; };
|
||||
27C6698A1C9585B80021E494 /* Trees.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Trees.h; sourceTree = "<group>"; wrapsLines = 0; };
|
||||
27C669F01C958AB30021E494 /* Chunk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Chunk.cpp; path = pattern/Chunk.cpp; sourceTree = "<group>"; };
|
||||
27C669F11C958AB30021E494 /* ParseTreeMatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ParseTreeMatch.cpp; path = pattern/ParseTreeMatch.cpp; sourceTree = "<group>"; wrapsLines = 0; };
|
||||
|
@ -1136,6 +1142,8 @@
|
|||
27C6687D1C9584E90021E494 /* misc */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
27B4A7981CD605BB00FCCD3E /* Predicate.cpp */,
|
||||
27B4A7991CD605BB00FCCD3E /* Predicate.h */,
|
||||
27C668881C9584FA0021E494 /* Interval.cpp */,
|
||||
27C668891C9584FA0021E494 /* Interval.h */,
|
||||
27C6688A1C9584FA0021E494 /* IntervalSet.cpp */,
|
||||
|
@ -1512,6 +1520,7 @@
|
|||
27C668631C95846E0021E494 /* WildcardTransition.h in Headers */,
|
||||
27C667C11C95846E0021E494 /* ATNDeserializationOptions.h in Headers */,
|
||||
27C667BD1C95846E0021E494 /* ATNConfigSet.h in Headers */,
|
||||
27B4A79D1CD605BB00FCCD3E /* Predicate.h in Headers */,
|
||||
27C667F51C95846E0021E494 /* EpsilonTransition.h in Headers */,
|
||||
27C667F91C95846E0021E494 /* LexerATNConfig.h in Headers */,
|
||||
27C669031C9585230021E494 /* BitSet.h in Headers */,
|
||||
|
@ -1596,6 +1605,7 @@
|
|||
27C6683E1C95846E0021E494 /* RuleTransition.h in Headers */,
|
||||
27C667BC1C95846E0021E494 /* ATNConfigSet.h in Headers */,
|
||||
275ECC4E1CCCD95B00E79E2A /* LexerAction.h in Headers */,
|
||||
27B4A79C1CD605BB00FCCD3E /* Predicate.h in Headers */,
|
||||
275DB3E91CCD23C000D8C543 /* LexerModeAction.h in Headers */,
|
||||
27C666F21C9584050021E494 /* InterpreterRuleContext.h in Headers */,
|
||||
27C667F41C95846E0021E494 /* EpsilonTransition.h in Headers */,
|
||||
|
@ -1892,6 +1902,7 @@
|
|||
27C62E3F1CD272480088721B /* DecisionEventInfo.cpp in Sources */,
|
||||
27C667311C9584050021E494 /* TokenSource.cpp in Sources */,
|
||||
27C668CB1C9584FA0021E494 /* MurmurHash.cpp in Sources */,
|
||||
27B4A79B1CD605BB00FCCD3E /* Predicate.cpp in Sources */,
|
||||
27C669A81C9585B80021E494 /* ParseTreeWalker.cpp in Sources */,
|
||||
27C62E511CD275C50088721B /* ContextSensitivityInfo.cpp in Sources */,
|
||||
27C666ED1C9584050021E494 /* InputMismatchException.cpp in Sources */,
|
||||
|
@ -2123,6 +2134,7 @@
|
|||
27C66A0C1C958AB30021E494 /* TextChunk.cpp in Sources */,
|
||||
27C6682C1C95846E0021E494 /* PredictionMode.cpp in Sources */,
|
||||
27C667C61C95846E0021E494 /* ATNSerializer.cpp in Sources */,
|
||||
27B4A79A1CD605BB00FCCD3E /* Predicate.cpp in Sources */,
|
||||
27C668341C95846E0021E494 /* RuleStartState.cpp in Sources */,
|
||||
27C668481C95846E0021E494 /* SingletonPredictionContext.cpp in Sources */,
|
||||
27C668751C9584B60021E494 /* DFAState.cpp in Sources */,
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
# Demo application for the ANTLR 4 C++ target
|
||||
|
||||
This demo app shows how to build the ANTLR runtime both as dynamic and static library and how to use a parser generated from a simple demo grammar.
|
||||
|
||||
A few steps are necessary to get this to work:
|
||||
|
||||
- Download the current ANTLR jar and place it in this folder.
|
||||
- Open the generation script for your platform (generate.cmd for Windows, generate.sh for *nix/OSX) and update the LOCATION var to the actual name of the jar you downloaded.
|
||||
- Run the generation script. This will generate a test parser + lexer, along with listener + visitor classes in a subfolder named "generated". This is where the demo application looks for these files.
|
||||
- Open the project in the folder that matches your system.
|
||||
- Compile and run.
|
||||
|
Binary file not shown.
|
@ -9,8 +9,8 @@ set -o errexit
|
|||
# There are 2 ways of running the ANTLR generator here.
|
||||
|
||||
# 1) Running from jar. Use the given jar (or replace it by another one you built or downloaded) for generation.
|
||||
#LOCATION=antlr-4.1.1-dev-complete.jar
|
||||
#java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
|
||||
LOCATION=antlr-4.1.1-dev-complete.jar
|
||||
java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
|
||||
#java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest -XdbgST TLexer.g4 TParser.g4
|
||||
#java -jar $LOCATION -Dlanguage=Java -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
|
||||
|
||||
|
@ -21,9 +21,9 @@ set -o errexit
|
|||
# Furthermore is assumed that the antlr3 folder is located side-by-side with the antlr4 folder. Adjust CLASSPATH if not.
|
||||
# This approach is especially useful if you are working on a target stg file, as it doesn't require to regenerate the
|
||||
# antlr jar over and over again.
|
||||
CLASSPATH=../../../tool/resources/:ST-4.0.8.jar:../../../tool/target/classes:../../../runtime/Java/target/classes:../../../../antlr3/runtime/Java/target/classes
|
||||
#CLASSPATH=../../../tool/resources/:ST-4.0.8.jar:../../../tool/target/classes:../../../runtime/Java/target/classes:../../../../antlr3/runtime/Java/target/classes
|
||||
|
||||
java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
|
||||
#java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
|
||||
#java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest -XdbgST TLexer.g4 TParser.g4
|
||||
#java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Java -listener -visitor -o generated/ TLexer.g4 TParser.g4
|
||||
|
||||
|
|
|
@ -80,86 +80,113 @@ namespace runtime {
|
|||
virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
|
||||
const std::wstring &msg, std::exception_ptr e) = 0;
|
||||
|
||||
/// <summary>
|
||||
/// This method is called by the parser when a full-context prediction
|
||||
/// results in an ambiguity.
|
||||
/// <p/>
|
||||
/// When {@code exact} is {@code true}, <em>all</em> of the alternatives in
|
||||
/// {@code ambigAlts} are viable, i.e. this is reporting an exact ambiguity.
|
||||
/// When {@code exact} is {@code false}, <em>at least two</em> of the
|
||||
/// alternatives in {@code ambigAlts} are viable for the current input, but
|
||||
/// the prediction algorithm terminated as soon as it determined that at
|
||||
/// least the <em>minimum</em> alternative in {@code ambigAlts} is viable.
|
||||
/// <p/>
|
||||
/// When the <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> prediction mode
|
||||
/// is used, the parser is required to identify exact ambiguities so
|
||||
/// {@code exact} will always be {@code true}.
|
||||
/// <p/>
|
||||
/// This method is not used by lexers.
|
||||
/// </summary>
|
||||
/// <param name="recognizer"> the parser instance </param>
|
||||
/// <param name="dfa"> the DFA for the current decision </param>
|
||||
/// <param name="startIndex"> the input index where the decision started </param>
|
||||
/// <param name="stopIndex"> the input input where the ambiguity is reported </param>
|
||||
/// <param name="exact"> {@code true} if the ambiguity is exactly known, otherwise
|
||||
/// {@code false}. This is always {@code true} when
|
||||
/// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used. </param>
|
||||
/// <param name="ambigAlts"> the potentially ambiguous alternatives </param>
|
||||
/// <param name="configs"> the ATN configuration set where the ambiguity was
|
||||
/// determined </param>
|
||||
/**
|
||||
* This method is called by the parser when a full-context prediction
|
||||
* results in an ambiguity.
|
||||
*
|
||||
* <p>Each full-context prediction which does not result in a syntax error
|
||||
* will call either {@link #reportContextSensitivity} or
|
||||
* {@link #reportAmbiguity}.</p>
|
||||
*
|
||||
* <p>When {@code ambigAlts} is not null, it contains the set of potentially
|
||||
* viable alternatives identified by the prediction algorithm. When
|
||||
* {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the
|
||||
* represented alternatives from the {@code configs} argument.</p>
|
||||
*
|
||||
* <p>When {@code exact} is {@code true}, <em>all</em> of the potentially
|
||||
* viable alternatives are truly viable, i.e. this is reporting an exact
|
||||
* ambiguity. When {@code exact} is {@code false}, <em>at least two</em> of
|
||||
* the potentially viable alternatives are viable for the current input, but
|
||||
* the prediction algorithm terminated as soon as it determined that at
|
||||
* least the <em>minimum</em> potentially viable alternative is truly
|
||||
* viable.</p>
|
||||
*
|
||||
* <p>When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction
|
||||
* mode is used, the parser is required to identify exact ambiguities so
|
||||
* {@code exact} will always be {@code true}.</p>
|
||||
*
|
||||
* <p>This method is not used by lexers.</p>
|
||||
*
|
||||
* @param recognizer the parser instance
|
||||
* @param dfa the DFA for the current decision
|
||||
* @param startIndex the input index where the decision started
|
||||
* @param stopIndex the input input where the ambiguity was identified
|
||||
* @param exact {@code true} if the ambiguity is exactly known, otherwise
|
||||
* {@code false}. This is always {@code true} when
|
||||
* {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used.
|
||||
* @param ambigAlts the potentially ambiguous alternatives, or {@code null}
|
||||
* to indicate that the potentially ambiguous alternatives are the complete
|
||||
* set of represented alternatives in {@code configs}
|
||||
* @param configs the ATN configuration set where the ambiguity was
|
||||
* identified
|
||||
*/
|
||||
virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact,
|
||||
const antlrcpp::BitSet &ambigAlts, Ref<atn::ATNConfigSet> configs) = 0;
|
||||
|
||||
/// <summary>
|
||||
/// This method is called when an SLL conflict occurs and the parser is about
|
||||
/// to use the full context information to make an LL decision.
|
||||
/// <p/>
|
||||
/// If one or more configurations in {@code configs} contains a semantic
|
||||
/// predicate, the predicates are evaluated before this method is called. The
|
||||
/// subset of alternatives which are still viable after predicates are
|
||||
/// evaluated is reported in {@code conflictingAlts}.
|
||||
/// <p/>
|
||||
/// This method is not used by lexers.
|
||||
/// </summary>
|
||||
/// <param name="recognizer"> the parser instance </param>
|
||||
/// <param name="dfa"> the DFA for the current decision </param>
|
||||
/// <param name="startIndex"> the input index where the decision started </param>
|
||||
/// <param name="stopIndex"> the input index where the SLL conflict occurred </param>
|
||||
/// <param name="conflictingAlts"> The specific conflicting alternatives. If this is
|
||||
/// {@code null}, the conflicting alternatives are all alternatives
|
||||
/// represented in {@code configs}. </param>
|
||||
/// <param name="configs"> the ATN configuration set where the SLL conflict was
|
||||
/// detected </param>
|
||||
/**
|
||||
* This method is called when an SLL conflict occurs and the parser is about
|
||||
* to use the full context information to make an LL decision.
|
||||
*
|
||||
* <p>If one or more configurations in {@code configs} contains a semantic
|
||||
* predicate, the predicates are evaluated before this method is called. The
|
||||
* subset of alternatives which are still viable after predicates are
|
||||
* evaluated is reported in {@code conflictingAlts}.</p>
|
||||
*
|
||||
* <p>This method is not used by lexers.</p>
|
||||
*
|
||||
* @param recognizer the parser instance
|
||||
* @param dfa the DFA for the current decision
|
||||
* @param startIndex the input index where the decision started
|
||||
* @param stopIndex the input index where the SLL conflict occurred
|
||||
* @param conflictingAlts The specific conflicting alternatives. If this is
|
||||
* {@code null}, the conflicting alternatives are all alternatives
|
||||
* represented in {@code configs}. At the moment, conflictingAlts is non-null
|
||||
* (for the reference implementation, but Sam's optimized version can see this
|
||||
* as null).
|
||||
* @param configs the ATN configuration set where the SLL conflict was
|
||||
* detected
|
||||
*/
|
||||
virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex,
|
||||
const antlrcpp::BitSet &conflictingAlts, Ref<atn::ATNConfigSet> configs) = 0;
|
||||
|
||||
/// <summary>
|
||||
/// This method is called by the parser when a full-context prediction has a
|
||||
/// unique result.
|
||||
/// <p/>
|
||||
/// For prediction implementations that only evaluate full-context
|
||||
/// predictions when an SLL conflict is found (including the default
|
||||
/// <seealso cref="ParserATNSimulator"/> implementation), this method reports cases
|
||||
/// where SLL conflicts were resolved to unique full-context predictions,
|
||||
/// i.e. the decision was context-sensitive. This report does not necessarily
|
||||
/// indicate a problem, and it may appear even in completely unambiguous
|
||||
/// grammars.
|
||||
/// <p/>
|
||||
/// {@code configs} may have more than one represented alternative if the
|
||||
/// full-context prediction algorithm does not evaluate predicates before
|
||||
/// beginning the full-context prediction. In all cases, the final prediction
|
||||
/// is passed as the {@code prediction} argument.
|
||||
/// <p/>
|
||||
/// This method is not used by lexers.
|
||||
/// </summary>
|
||||
/// <param name="recognizer"> the parser instance </param>
|
||||
/// <param name="dfa"> the DFA for the current decision </param>
|
||||
/// <param name="startIndex"> the input index where the decision started </param>
|
||||
/// <param name="stopIndex"> the input index where the context sensitivity was
|
||||
/// finally determined </param>
|
||||
/// <param name="prediction"> the unambiguous result of the full-context prediction </param>
|
||||
/// <param name="configs"> the ATN configuration set where the unambiguous prediction
|
||||
/// was determined </param>
|
||||
/**
|
||||
* This method is called by the parser when a full-context prediction has a
|
||||
* unique result.
|
||||
*
|
||||
* <p>Each full-context prediction which does not result in a syntax error
|
||||
* will call either {@link #reportContextSensitivity} or
|
||||
* {@link #reportAmbiguity}.</p>
|
||||
*
|
||||
* <p>For prediction implementations that only evaluate full-context
|
||||
* predictions when an SLL conflict is found (including the default
|
||||
* {@link ParserATNSimulator} implementation), this method reports cases
|
||||
* where SLL conflicts were resolved to unique full-context predictions,
|
||||
* i.e. the decision was context-sensitive. This report does not necessarily
|
||||
* indicate a problem, and it may appear even in completely unambiguous
|
||||
* grammars.</p>
|
||||
*
|
||||
* <p>{@code configs} may have more than one represented alternative if the
|
||||
* full-context prediction algorithm does not evaluate predicates before
|
||||
* beginning the full-context prediction. In all cases, the final prediction
|
||||
* is passed as the {@code prediction} argument.</p>
|
||||
*
|
||||
* <p>Note that the definition of "context sensitivity" in this method
|
||||
* differs from the concept in {@link DecisionInfo#contextSensitivities}.
|
||||
* This method reports all instances where an SLL conflict occurred but LL
|
||||
* parsing produced a unique result, whether or not that unique result
|
||||
* matches the minimum alternative in the SLL conflicting set.</p>
|
||||
*
|
||||
* <p>This method is not used by lexers.</p>
|
||||
*
|
||||
* @param recognizer the parser instance
|
||||
* @param dfa the DFA for the current decision
|
||||
* @param startIndex the input index where the decision started
|
||||
* @param stopIndex the input index where the context sensitivity was
|
||||
* finally determined
|
||||
* @param prediction the unambiguous result of the full-context prediction
|
||||
* @param configs the ATN configuration set where the unambiguous prediction
|
||||
* was determined
|
||||
*/
|
||||
virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex,
|
||||
int prediction, Ref<atn::ATNConfigSet> configs) = 0;
|
||||
};
|
||||
|
|
|
@ -65,20 +65,24 @@ namespace runtime {
|
|||
|
||||
virtual void reset(Parser *recognizer) = 0;
|
||||
|
||||
/// <summary>
|
||||
/// This method is called when an unexpected symbol is encountered during an
|
||||
/// inline match operation, such as <seealso cref="Parser#match"/>. If the error
|
||||
/// strategy successfully recovers from the match failure, this method
|
||||
/// returns the <seealso cref="Token"/> instance which should be treated as the
|
||||
/// successful result of the match.
|
||||
/// <p/>
|
||||
/// Note that the calling code will not report an error if this method
|
||||
/// returns successfully. The error strategy implementation is responsible
|
||||
/// for calling <seealso cref="Parser#notifyErrorListeners"/> as appropriate.
|
||||
/// </summary>
|
||||
/// <param name="recognizer"> the parser instance </param>
|
||||
/// <exception cref="RecognitionException"> if the error strategy was not able to
|
||||
/// recover from the unexpected input symbol </exception>
|
||||
/**
|
||||
* This method is called when an unexpected symbol is encountered during an
|
||||
* inline match operation, such as {@link Parser#match}. If the error
|
||||
* strategy successfully recovers from the match failure, this method
|
||||
* returns the {@link Token} instance which should be treated as the
|
||||
* successful result of the match.
|
||||
*
|
||||
* <p>This method handles the consumption of any tokens - the caller should
|
||||
* <b>not</b> call {@link Parser#consume} after a successful recovery.</p>
|
||||
*
|
||||
* <p>Note that the calling code will not report an error if this method
|
||||
* returns successfully. The error strategy implementation is responsible
|
||||
* for calling {@link Parser#notifyErrorListeners} as appropriate.</p>
|
||||
*
|
||||
* @param recognizer the parser instance
|
||||
* @throws RecognitionException if the error strategy was not able to
|
||||
* recover from the unexpected input symbol
|
||||
*/
|
||||
virtual Ref<Token> recoverInline(Parser *recognizer) = 0;
|
||||
|
||||
/// <summary>
|
||||
|
|
|
@ -29,9 +29,10 @@
|
|||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include "Exceptions.h"
|
||||
#include "Interval.h"
|
||||
#include "IntStream.h"
|
||||
|
||||
#include "Arrays.h"
|
||||
#include "CPPUtils.h"
|
||||
|
||||
|
@ -139,8 +140,9 @@ void ANTLRInputStream::seek(size_t index) {
|
|||
p = index; // just jump; don't update stream state (line, ...)
|
||||
return;
|
||||
}
|
||||
// seek forward, consume until p hits index
|
||||
while (p < index && index < data.size()) {
|
||||
// seek forward, consume until p hits index or n (whichever comes first)
|
||||
index = std::min(index, data.size());
|
||||
while (p < index) {
|
||||
consume();
|
||||
}
|
||||
}
|
||||
|
@ -162,6 +164,9 @@ std::wstring ANTLRInputStream::getText(const Interval &interval) {
|
|||
}
|
||||
|
||||
std::string ANTLRInputStream::getSourceName() const {
|
||||
if (name.empty()) {
|
||||
return IntStream::UNKNOWN_SOURCE_NAME;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,34 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/// Bail out of parser at first syntax error. Use myparser.setErrorHandler(..) to set a different strategy.
|
||||
/**
|
||||
* This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
|
||||
* by immediately canceling the parse operation with a
|
||||
* {@link ParseCancellationException}. The implementation ensures that the
|
||||
* {@link ParserRuleContext#exception} field is set for all parse tree nodes
|
||||
* that were not completed prior to encountering the error.
|
||||
*
|
||||
* <p>
|
||||
* This error strategy is useful in the following scenarios.</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li><strong>Two-stage parsing:</strong> This error strategy allows the first
|
||||
* stage of two-stage parsing to immediately terminate if an error is
|
||||
* encountered, and immediately fall back to the second stage. In addition to
|
||||
* avoiding wasted work by attempting to recover from errors here, the empty
|
||||
* implementation of {@link BailErrorStrategy#sync} improves the performance of
|
||||
* the first stage.</li>
|
||||
* <li><strong>Silent validation:</strong> When syntax errors are not being
|
||||
* reported or logged, and the parse result is simply ignored if errors occur,
|
||||
* the {@link BailErrorStrategy} avoids wasting work on recovering from errors
|
||||
* when the result will be ignored either way.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
|
||||
*
|
||||
* @see Parser#setErrorHandler(ANTLRErrorStrategy)
|
||||
*/
|
||||
class BailErrorStrategy : public DefaultErrorStrategy {
|
||||
/// <summary>
|
||||
/// Instead of recovering from exception {@code e}, re-throw it wrapped
|
||||
|
|
|
@ -42,6 +42,11 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/**
|
||||
* Provides an empty default implementation of {@link ANTLRErrorListener}. The
|
||||
* default implementation of each method does nothing, but can be overridden as
|
||||
* necessary.
|
||||
*/
|
||||
class BaseErrorListener : public ANTLRErrorListener {
|
||||
|
||||
virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
|
||||
|
|
|
@ -75,7 +75,22 @@ size_t BufferedTokenStream::size() {
|
|||
}
|
||||
|
||||
void BufferedTokenStream::consume() {
|
||||
if (LA(1) == EOF) {
|
||||
bool skipEofCheck = false;
|
||||
if (!_needSetup) {
|
||||
if (_fetchedEOF) {
|
||||
// the last token in tokens is EOF. skip check if p indexes any
|
||||
// fetched token except the last.
|
||||
skipEofCheck = _p < _tokens.size() - 1;
|
||||
} else {
|
||||
// no EOF token in tokens. skip check if p indexes a fetched token.
|
||||
skipEofCheck = _p < _tokens.size();
|
||||
}
|
||||
} else {
|
||||
// not yet initialized
|
||||
skipEofCheck = false;
|
||||
}
|
||||
|
||||
if (!skipEofCheck && LA(1) == EOF) {
|
||||
throw IllegalStateException("cannot consume EOF");
|
||||
}
|
||||
|
||||
|
@ -246,13 +261,13 @@ std::vector<Ref<Token>> BufferedTokenStream::getTokens(int start, int stop, int
|
|||
ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, int channel) {
|
||||
sync(i);
|
||||
if (i >= size()) {
|
||||
return -1;
|
||||
return size() - 1;
|
||||
}
|
||||
|
||||
Ref<Token> token = _tokens[i];
|
||||
while (token->getChannel() != channel) {
|
||||
if (token->getType() == EOF) {
|
||||
return -1;
|
||||
return i;
|
||||
}
|
||||
i++;
|
||||
sync(i);
|
||||
|
@ -261,15 +276,24 @@ ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, int channel) {
|
|||
return i;
|
||||
}
|
||||
|
||||
ssize_t BufferedTokenStream::previousTokenOnChannel(ssize_t i, int channel) const {
|
||||
do {
|
||||
if (_tokens[(size_t)i]->getChannel() == channel)
|
||||
ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, int channel) {
|
||||
sync(i);
|
||||
if (i >= size()) {
|
||||
// the EOF token is on every channel
|
||||
return size() - 1;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
Ref<Token> token = _tokens[i];
|
||||
if (token->getType() == EOF || token->getChannel() == channel) {
|
||||
return i;
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
return -1;
|
||||
return i;
|
||||
i--;
|
||||
} while (true);
|
||||
return -1;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
std::vector<Ref<Token>> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, int channel) {
|
||||
|
@ -301,7 +325,12 @@ std::vector<Ref<Token>> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenI
|
|||
throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
|
||||
}
|
||||
|
||||
ssize_t prevOnChannel = previousTokenOnChannel((ssize_t)tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
|
||||
if (tokenIndex == 0) {
|
||||
// Obviously no tokens can appear before the first token.
|
||||
return { };
|
||||
}
|
||||
|
||||
ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
|
||||
if (prevOnChannel == (ssize_t)tokenIndex - 1) {
|
||||
return { };
|
||||
}
|
||||
|
@ -334,6 +363,10 @@ std::vector<Ref<Token>> BufferedTokenStream::filterForChannel(size_t from, size_
|
|||
return hidden;
|
||||
}
|
||||
|
||||
bool BufferedTokenStream::isInitialized() const {
|
||||
return !_needSetup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the text of all tokens in this buffer.
|
||||
*/
|
||||
|
|
|
@ -38,16 +38,18 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/// Buffer all input tokens but do on-demand fetching of new tokens from lexer.
|
||||
/// Useful when the parser or lexer has to set context/mode info before proper
|
||||
/// lexing of future tokens. The ST template parser needs this, for example,
|
||||
/// because it has to constantly flip back and forth between inside/output
|
||||
/// templates. E.g., <names:{hi, <it>}> has to parse names as part of an
|
||||
/// expression but "hi, <it>" as a nested template.
|
||||
///
|
||||
/// You can't use this stream if you pass whitespace or other off-channel tokens
|
||||
/// to the parser. The stream can't ignore off-channel tokens.
|
||||
/// (UnbufferedTokenStream is the same way.) Use CommonTokenStream.
|
||||
/**
|
||||
* This implementation of {@link TokenStream} loads tokens from a
|
||||
* {@link TokenSource} on-demand, and places the tokens in a buffer to provide
|
||||
* access to any previous token by index.
|
||||
*
|
||||
* <p>
|
||||
* This token stream ignores the value of {@link Token#getChannel}. If your
|
||||
* parser requires the token stream filter tokens to only those on a particular
|
||||
* channel, such as {@link Token#DEFAULT_CHANNEL} or
|
||||
* {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
|
||||
* {@link CommonTokenStream}.</p>
|
||||
*/
|
||||
class BufferedTokenStream : public TokenStream {
|
||||
public:
|
||||
BufferedTokenStream(TokenSource *tokenSource);
|
||||
|
@ -120,30 +122,46 @@ namespace runtime {
|
|||
virtual void fill();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* The {@link TokenSource} from which tokens for this stream are fetched.
|
||||
*/
|
||||
TokenSource *_tokenSource;
|
||||
|
||||
/// Record every single token pulled from the source so we can reproduce
|
||||
/// chunks of it later. This list captures everything so we can access
|
||||
/// complete input text.
|
||||
// ml: we own the tokens produced by the token factory.
|
||||
/**
|
||||
* A collection of all tokens fetched from the token source. The list is
|
||||
* considered a complete view of the input once {@link #fetchedEOF} is set
|
||||
* to {@code true}.
|
||||
*/
|
||||
std::vector<Ref<Token>> _tokens;
|
||||
|
||||
/// <summary>
|
||||
/// The index into <seealso cref="#tokens"/> of the current token (next token to
|
||||
/// consume). <seealso cref="#tokens"/>{@code [}<seealso cref="#p"/>{@code ]} should be
|
||||
/// <seealso cref="#LT LT(1)"/>. <seealso cref="#p"/>{@code =-1} indicates need to initialize
|
||||
/// with first token. The constructor doesn't get a token. First call to
|
||||
/// <seealso cref="#LT LT(1)"/> or whatever gets the first token and sets
|
||||
/// <seealso cref="#p"/>{@code =0;}.
|
||||
/// </summary>
|
||||
/**
|
||||
* The index into {@link #tokens} of the current token (next token to
|
||||
* {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
|
||||
* {@link #LT LT(1)}.
|
||||
*
|
||||
* <p>This field is set to -1 when the stream is first constructed or when
|
||||
* {@link #setTokenSource} is called, indicating that the first token has
|
||||
* not yet been fetched from the token source. For additional information,
|
||||
* see the documentation of {@link IntStream} for a description of
|
||||
* Initializing Methods.</p>
|
||||
*/
|
||||
// ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead.
|
||||
// Use bool isInitialized() to find out if this stream has started reading.
|
||||
size_t _p;
|
||||
|
||||
/// <summary>
|
||||
/// Set to {@code true} when the EOF token is fetched. Do not continue fetching
|
||||
/// tokens after that point, or multiple EOF tokens could end up in the
|
||||
/// <seealso cref="#tokens"/> array.
|
||||
/// </summary>
|
||||
/// <seealso cref= #fetch </seealso>
|
||||
/**
|
||||
* Indicates whether the {@link Token#EOF} token has been fetched from
|
||||
* {@link #tokenSource} and added to {@link #tokens}. This field improves
|
||||
* performance for the following cases:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
|
||||
* consuming the EOF symbol is optimized by checking the values of
|
||||
* {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
|
||||
* <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
|
||||
* {@link #tokens} is trivial with this field.</li>
|
||||
* <ul>
|
||||
*/
|
||||
bool _fetchedEOF;
|
||||
|
||||
/// <summary>
|
||||
|
@ -177,17 +195,30 @@ namespace runtime {
|
|||
void lazyInit();
|
||||
virtual void setup();
|
||||
|
||||
/// Given a starting index, return the index of the next token on channel.
|
||||
/// Return i if tokens[i] is on channel. Return -1 if there are no tokens
|
||||
/// on channel between i and EOF.
|
||||
/**
|
||||
* Given a starting index, return the index of the next token on channel.
|
||||
* Return {@code i} if {@code tokens[i]} is on channel. Return the index of
|
||||
* the EOF token if there are no tokens on channel between {@code i} and
|
||||
* EOF.
|
||||
*/
|
||||
virtual ssize_t nextTokenOnChannel(size_t i, int channel);
|
||||
|
||||
/// Given a starting index, return the index of the previous token on channel.
|
||||
/// Return i if tokens[i] is on channel. Return -1 if there are no tokens
|
||||
/// on channel between i and 0.
|
||||
virtual ssize_t previousTokenOnChannel(ssize_t i, int channel) const;
|
||||
/**
|
||||
* Given a starting index, return the index of the previous token on
|
||||
* channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1
|
||||
* if there are no tokens on channel between {@code i} and 0.
|
||||
*
|
||||
* <p>
|
||||
* If {@code i} specifies an index at or after the EOF token, the EOF token
|
||||
* index is returned. This is due to the fact that the EOF token is treated
|
||||
* as though it were on every channel.</p>
|
||||
*/
|
||||
virtual ssize_t previousTokenOnChannel(size_t i, int channel);
|
||||
|
||||
virtual std::vector<Ref<Token>> filterForChannel(size_t from, size_t to, int channel);
|
||||
|
||||
bool isInitialized() const;
|
||||
|
||||
private:
|
||||
bool _needSetup;
|
||||
void InitializeInstanceFields();
|
||||
|
|
|
@ -70,7 +70,6 @@ CommonToken::CommonToken(int type, const std::wstring &text) {
|
|||
|
||||
CommonToken::CommonToken(Token *oldToken) {
|
||||
InitializeInstanceFields();
|
||||
_text = oldToken->getText();
|
||||
_type = oldToken->getType();
|
||||
_line = oldToken->getLine();
|
||||
_index = oldToken->getTokenIndex();
|
||||
|
@ -79,9 +78,11 @@ CommonToken::CommonToken(Token *oldToken) {
|
|||
_start = oldToken->getStartIndex();
|
||||
_stop = oldToken->getStopIndex();
|
||||
|
||||
if (is<CommonToken*>(oldToken)) {
|
||||
_source = (static_cast<CommonToken*>(oldToken))->_source;
|
||||
if (is<CommonToken *>(oldToken)) {
|
||||
_text = (static_cast<CommonToken *>(oldToken))->_text;
|
||||
_source = (static_cast<CommonToken *>(oldToken))->_source;
|
||||
} else {
|
||||
_text = oldToken->getText();
|
||||
_source = { oldToken->getTokenSource(), oldToken->getInputStream() };
|
||||
}
|
||||
}
|
||||
|
@ -179,4 +180,5 @@ void CommonToken::InitializeInstanceFields() {
|
|||
_index = -1;
|
||||
_start = 0;
|
||||
_stop = 0;
|
||||
_source = EMPTY_SOURCE;
|
||||
}
|
||||
|
|
|
@ -40,50 +40,121 @@ namespace runtime {
|
|||
|
||||
class CommonToken : public WritableToken {
|
||||
protected:
|
||||
/**
|
||||
* An empty {@link Pair} which is used as the default value of
|
||||
* {@link #source} for tokens that do not have a source.
|
||||
*/
|
||||
static const std::pair<TokenSource*, CharStream*> EMPTY_SOURCE;
|
||||
|
||||
/**
|
||||
* This is the backing field for {@link #getType} and {@link #setType}.
|
||||
*/
|
||||
int _type;
|
||||
int _line;
|
||||
int _charPositionInLine; // set to invalid position
|
||||
int _channel;
|
||||
std::pair<TokenSource*, CharStream*> _source; // Pure references, usually from statically allocated classes.
|
||||
|
||||
/// We need to be able to change the text once in a while. If
|
||||
/// this is non-empty, then getText should return this. Note that
|
||||
/// start/stop are not affected by changing this.
|
||||
///
|
||||
// TO_DO: can store these in map in token stream rather than as field here
|
||||
/**
|
||||
* This is the backing field for {@link #getLine} and {@link #setLine}.
|
||||
*/
|
||||
int _line;
|
||||
|
||||
/**
|
||||
* This is the backing field for {@link #getCharPositionInLine} and
|
||||
* {@link #setCharPositionInLine}.
|
||||
*/
|
||||
int _charPositionInLine; // set to invalid position
|
||||
|
||||
/**
|
||||
* This is the backing field for {@link #getChannel} and
|
||||
* {@link #setChannel}.
|
||||
*/
|
||||
int _channel;
|
||||
|
||||
/**
|
||||
* This is the backing field for {@link #getTokenSource} and
|
||||
* {@link #getInputStream}.
|
||||
*
|
||||
* <p>
|
||||
* These properties share a field to reduce the memory footprint of
|
||||
* {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from
|
||||
* the same source and input stream share a reference to the same
|
||||
* {@link Pair} containing these values.</p>
|
||||
*/
|
||||
|
||||
std::pair<TokenSource*, CharStream*> _source; // ml: pure references, usually from statically allocated classes.
|
||||
|
||||
/**
|
||||
* This is the backing field for {@link #getText} when the token text is
|
||||
* explicitly set in the constructor or via {@link #setText}.
|
||||
*
|
||||
* @see #getText()
|
||||
*/
|
||||
std::wstring _text;
|
||||
|
||||
/// <summary>
|
||||
/// What token number is this from 0..n-1 tokens; < 0 implies invalid index </summary>
|
||||
/**
|
||||
* This is the backing field for {@link #getTokenIndex} and
|
||||
* {@link #setTokenIndex}.
|
||||
*/
|
||||
int _index;
|
||||
|
||||
/// <summary>
|
||||
/// The char position into the input buffer where this token starts </summary>
|
||||
/**
|
||||
* This is the backing field for {@link #getStartIndex} and
|
||||
* {@link #setStartIndex}.
|
||||
*/
|
||||
int _start;
|
||||
|
||||
/// <summary>
|
||||
/// The char position into the input buffer where this token stops </summary>
|
||||
/**
|
||||
* This is the backing field for {@link #getStopIndex} and
|
||||
* {@link #setStopIndex}.
|
||||
*/
|
||||
int _stop;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a new {@link CommonToken} with the specified token type.
|
||||
*
|
||||
* @param type The token type.
|
||||
*/
|
||||
CommonToken(int type);
|
||||
CommonToken(std::pair<TokenSource*, CharStream*> source, int type, int channel, int start, int stop);
|
||||
|
||||
/**
|
||||
* Constructs a new {@link CommonToken} with the specified token type and
|
||||
* text.
|
||||
*
|
||||
* @param type The token type.
|
||||
* @param text The text of the token.
|
||||
*/
|
||||
CommonToken(int type, const std::wstring &text);
|
||||
|
||||
/**
|
||||
* Constructs a new {@link CommonToken} as a copy of another {@link Token}.
|
||||
*
|
||||
* <p>
|
||||
* If {@code oldToken} is also a {@link CommonToken} instance, the newly
|
||||
* constructed token will share a reference to the {@link #text} field and
|
||||
* the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will
|
||||
* be assigned the result of calling {@link #getText}, and {@link #source}
|
||||
* will be constructed from the result of {@link Token#getTokenSource} and
|
||||
* {@link Token#getInputStream}.</p>
|
||||
*
|
||||
* @param oldToken The token to copy.
|
||||
*/
|
||||
CommonToken(Token *oldToken);
|
||||
|
||||
virtual int getType() const override;
|
||||
virtual void setLine(int line) override;
|
||||
|
||||
/**
|
||||
* Explicitly set the text for this token. If {code text} is not
|
||||
* {@code null}, then {@link #getText} will return this value rather than
|
||||
* extracting the text from the input.
|
||||
*
|
||||
* @param text The explicit text of the token, or {@code null} if the text
|
||||
* should be obtained from the input along with the start and stop indexes
|
||||
* of the token.
|
||||
*/
|
||||
virtual void setText(const std::wstring &text) override;
|
||||
virtual std::wstring getText() override;
|
||||
|
||||
/// <summary>
|
||||
/// Override the text for this token. getText() will return this text
|
||||
/// rather than pulling from the buffer. Note that this does not mean
|
||||
/// that start/stop indexes are not valid. It means that that input
|
||||
/// was converted to a new string in the token object.
|
||||
/// </summary>
|
||||
virtual void setText(const std::wstring &text) override;
|
||||
virtual void setLine(int line) override;
|
||||
virtual int getLine() override;
|
||||
|
||||
virtual int getCharPositionInLine() override;
|
||||
|
|
|
@ -38,22 +38,60 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/**
|
||||
* This default implementation of {@link TokenFactory} creates
|
||||
* {@link CommonToken} objects.
|
||||
*/
|
||||
class CommonTokenFactory : public TokenFactory<CommonToken> {
|
||||
public:
|
||||
/**
|
||||
* The default {@link CommonTokenFactory} instance.
|
||||
*
|
||||
* <p>
|
||||
* This token factory does not explicitly copy token text when constructing
|
||||
* tokens.</p>
|
||||
*/
|
||||
static const Ref<TokenFactory<CommonToken>> DEFAULT;
|
||||
|
||||
/// <summary>
|
||||
/// Copy text for token out of input char stream. Useful when input
|
||||
/// stream is unbuffered. </summary>
|
||||
/// <seealso cref= UnbufferedCharStream </seealso>
|
||||
protected:
|
||||
/**
|
||||
* Indicates whether {@link CommonToken#setText} should be called after
|
||||
* constructing tokens to explicitly set the text. This is useful for cases
|
||||
* where the input stream might not be able to provide arbitrary substrings
|
||||
* of text from the input after the lexer creates a token (e.g. the
|
||||
* implementation of {@link CharStream#getText} in
|
||||
* {@link UnbufferedCharStream} throws an
|
||||
* {@link UnsupportedOperationException}). Explicitly setting the token text
|
||||
* allows {@link Token#getText} to be called at any time regardless of the
|
||||
* input stream implementation.
|
||||
*
|
||||
* <p>
|
||||
* The default value is {@code false} to avoid the performance and memory
|
||||
* overhead of copying text for every token unless explicitly requested.</p>
|
||||
*/
|
||||
const bool copyText;
|
||||
|
||||
public:
|
||||
/// Create factory and indicate whether or not the factory copy
|
||||
/// text out of the char stream.
|
||||
/**
|
||||
* Constructs a {@link CommonTokenFactory} with the specified value for
|
||||
* {@link #copyText}.
|
||||
*
|
||||
* <p>
|
||||
* When {@code copyText} is {@code false}, the {@link #DEFAULT} instance
|
||||
* should be used instead of constructing a new instance.</p>
|
||||
*
|
||||
* @param copyText The value for {@link #copyText}.
|
||||
*/
|
||||
CommonTokenFactory(bool copyText);
|
||||
|
||||
/**
|
||||
* Constructs a {@link CommonTokenFactory} with {@link #copyText} set to
|
||||
* {@code false}.
|
||||
*
|
||||
* <p>
|
||||
* The {@link #DEFAULT} instance should be used instead of calling this
|
||||
* directly.</p>
|
||||
*/
|
||||
CommonTokenFactory();
|
||||
|
||||
virtual Ref<CommonToken> create(std::pair<TokenSource*, CharStream*> source, int type,
|
||||
|
|
|
@ -38,31 +38,60 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/// <summary>
|
||||
/// The most common stream of tokens where every token is buffered up
|
||||
/// and tokens are filtered for a certain channel (the parser will only
|
||||
/// see these tokens).
|
||||
///
|
||||
/// Even though it buffers all of the tokens, this token stream pulls tokens
|
||||
/// from the tokens source on demand. In other words, until you ask for a
|
||||
/// token using consume(), LT(), etc. the stream does not pull from the lexer.
|
||||
///
|
||||
/// The only difference between this stream and <seealso cref="BufferedTokenStream"/> superclass
|
||||
/// is that this stream knows how to ignore off channel tokens. There may be
|
||||
/// a performance advantage to using the superclass if you don't pass
|
||||
/// whitespace and comments etc. to the parser on a hidden channel (i.e.,
|
||||
/// you set {@code $channel} instead of calling {@code skip()} in lexer rules.)
|
||||
/// </summary>
|
||||
/// <seealso cref= UnbufferedTokenStream </seealso>
|
||||
/// <seealso cref= BufferedTokenStream </seealso>
|
||||
/**
|
||||
* This class extends {@link BufferedTokenStream} with functionality to filter
|
||||
* token streams to tokens on a particular channel (tokens where
|
||||
* {@link Token#getChannel} returns a particular value).
|
||||
*
|
||||
* <p>
|
||||
* This token stream provides access to all tokens by index or when calling
|
||||
* methods like {@link #getText}. The channel filtering is only used for code
|
||||
* accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and
|
||||
* {@link #LB}.</p>
|
||||
*
|
||||
* <p>
|
||||
* By default, tokens are placed on the default channel
|
||||
* ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the
|
||||
* {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to
|
||||
* call {@link Lexer#setChannel}.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Note: lexer rules which use the {@code ->skip} lexer command or call
|
||||
* {@link Lexer#skip} do not produce tokens at all, so input text matched by
|
||||
* such a rule will not be available as part of the token stream, regardless of
|
||||
* channel.</p>
|
||||
*/
|
||||
class CommonTokenStream : public BufferedTokenStream {
|
||||
/// <summary>
|
||||
/// Skip tokens on any channel but this one; this is how we skip whitespace... </summary>
|
||||
protected:
|
||||
/**
|
||||
* Specifies the channel to use for filtering tokens.
|
||||
*
|
||||
* <p>
|
||||
* The default value is {@link Token#DEFAULT_CHANNEL}, which matches the
|
||||
* default channel assigned to tokens created by the lexer.</p>
|
||||
*/
|
||||
int channel;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a new {@link CommonTokenStream} using the specified token
|
||||
* source and the default token channel ({@link Token#DEFAULT_CHANNEL}).
|
||||
*
|
||||
* @param tokenSource The token source.
|
||||
*/
|
||||
CommonTokenStream(TokenSource *tokenSource);
|
||||
|
||||
/**
|
||||
* Constructs a new {@link CommonTokenStream} using the specified token
|
||||
* source and filtering tokens to the specified channel. Only tokens whose
|
||||
* {@link Token#getChannel} matches {@code channel} or have the
|
||||
* {@link Token#getType} equal to {@link Token#EOF} will be returned by the
|
||||
* token stream lookahead methods.
|
||||
*
|
||||
* @param tokenSource The token source.
|
||||
* @param channel The channel to use for filtering tokens.
|
||||
*/
|
||||
CommonTokenStream(TokenSource *tokenSource, int channel);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -40,8 +40,23 @@ namespace runtime {
|
|||
|
||||
class ConsoleErrorListener : public BaseErrorListener {
|
||||
public:
|
||||
/**
|
||||
* Provides a default instance of {@link ConsoleErrorListener}.
|
||||
*/
|
||||
static ConsoleErrorListener INSTANCE;
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* <p>
|
||||
* This implementation prints messages to {@link System#err} containing the
|
||||
* values of {@code line}, {@code charPositionInLine}, and {@code msg} using
|
||||
* the following format.</p>
|
||||
*
|
||||
* <pre>
|
||||
* line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
|
||||
* </pre>
|
||||
*/
|
||||
virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
|
||||
const std::wstring &msg, std::exception_ptr e) override;
|
||||
};
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "Parser.h"
|
||||
#include "Strings.h"
|
||||
#include "CommonToken.h"
|
||||
#include "Vocabulary.h"
|
||||
|
||||
#include "DefaultErrorStrategy.h"
|
||||
|
||||
|
@ -174,7 +175,7 @@ void DefaultErrorStrategy::reportNoViableAlternative(Parser *recognizer, const N
|
|||
|
||||
void DefaultErrorStrategy::reportInputMismatch(Parser *recognizer, const InputMismatchException &e) {
|
||||
std::wstring msg = std::wstring(L"mismatched input ") + getTokenErrorDisplay(e.getOffendingToken()) +
|
||||
std::wstring(L" expecting ") + e.getExpectedTokens().toString(recognizer->getTokenNames());
|
||||
std::wstring(L" expecting ") + e.getExpectedTokens().toString(recognizer->getVocabulary());
|
||||
recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e));
|
||||
}
|
||||
|
||||
|
@ -195,7 +196,8 @@ void DefaultErrorStrategy::reportUnwantedToken(Parser *recognizer) {
|
|||
std::wstring tokenName = getTokenErrorDisplay(t);
|
||||
misc::IntervalSet expecting = getExpectedTokens(recognizer);
|
||||
|
||||
std::wstring msg = std::wstring(L"extraneous input ") + tokenName + std::wstring(L" expecting ") + expecting.toString(recognizer->getTokenNames());
|
||||
std::wstring msg = std::wstring(L"extraneous input ") + tokenName + std::wstring(L" expecting ") +
|
||||
expecting.toString(recognizer->getVocabulary());
|
||||
recognizer->notifyErrorListeners(t, msg, nullptr);
|
||||
}
|
||||
|
||||
|
@ -208,7 +210,7 @@ void DefaultErrorStrategy::reportMissingToken(Parser *recognizer) {
|
|||
|
||||
Ref<Token> t = recognizer->getCurrentToken();
|
||||
misc::IntervalSet expecting = getExpectedTokens(recognizer);
|
||||
std::wstring msg = std::wstring(L"missing ") + expecting.toString(recognizer->getTokenNames()) + std::wstring(L" at ") + getTokenErrorDisplay(t);
|
||||
std::wstring msg = L"missing " + expecting.toString(recognizer->getVocabulary()) + L" at " + getTokenErrorDisplay(t);
|
||||
|
||||
recognizer->notifyErrorListeners(t, msg, nullptr);
|
||||
}
|
||||
|
@ -271,7 +273,7 @@ Ref<Token> DefaultErrorStrategy::getMissingSymbol(Parser *recognizer) {
|
|||
if (expectedTokenType == EOF) {
|
||||
tokenText = L"<missing EOF>";
|
||||
} else {
|
||||
tokenText = std::wstring(L"<missing ") + recognizer->getTokenNames()[(size_t)expectedTokenType] + std::wstring(L">");
|
||||
tokenText = L"<missing " + recognizer->getVocabulary()->getDisplayName(expectedTokenType) + L">";
|
||||
}
|
||||
Ref<Token> current = currentSymbol;
|
||||
Ref<Token> lookback = recognizer->getTokenStream()->LT(-1);
|
||||
|
|
|
@ -39,32 +39,32 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/// <summary>
|
||||
/// This is the default error handling mechanism for ANTLR parsers
|
||||
/// and tree parsers.
|
||||
/// </summary>
|
||||
/**
|
||||
* This is the default implementation of {@link ANTLRErrorStrategy} used for
|
||||
* error reporting and recovery in ANTLR parsers.
|
||||
*/
|
||||
class DefaultErrorStrategy : public ANTLRErrorStrategy {
|
||||
public:
|
||||
DefaultErrorStrategy() {
|
||||
InitializeInstanceFields();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This is true after we see an error and before having successfully
|
||||
/// matched a token. Prevents generation of more than one error message
|
||||
/// per error.
|
||||
/// </summary>
|
||||
/// <seealso cref= #inErrorRecoveryMode </seealso>
|
||||
protected:
|
||||
/**
|
||||
* Indicates whether the error strategy is currently "recovering from an
|
||||
* error". This is used to suppress reporting multiple error messages while
|
||||
* attempting to recover from a detected syntax error.
|
||||
*
|
||||
* @see #inErrorRecoveryMode
|
||||
*/
|
||||
bool errorRecoveryMode;
|
||||
|
||||
/// <summary>
|
||||
/// The index into the input stream where the last error occurred.
|
||||
/// This is used to prevent infinite loops where an error is found
|
||||
/// but no token is consumed during recovery...another error is found,
|
||||
/// ad nauseum. This is a failsafe mechanism to guarantee that at least
|
||||
/// one token/tree node is consumed for two errors.
|
||||
/// </summary>
|
||||
/** The index into the input stream where the last error occurred.
|
||||
* This is used to prevent infinite loops where an error is found
|
||||
* but no token is consumed during recovery...another error is found,
|
||||
* ad nauseum. This is a failsafe mechanism to guarantee that at least
|
||||
* one token/tree node is consumed for two errors.
|
||||
*/
|
||||
int lastErrorIndex;
|
||||
|
||||
misc::IntervalSet lastErrorStates;
|
||||
|
@ -138,52 +138,52 @@ namespace runtime {
|
|||
/// </summary>
|
||||
virtual void recover(Parser *recognizer, const RecognitionException &e) override;
|
||||
|
||||
/// <summary>
|
||||
/// The default implementation of <seealso cref="ANTLRErrorStrategy#sync"/> makes sure
|
||||
/// that the current lookahead symbol is consistent with what were expecting
|
||||
/// at this point in the ATN. You can call this anytime but ANTLR only
|
||||
/// generates code to check before subrules/loops and each iteration.
|
||||
/// <p/>
|
||||
/// Implements Jim Idle's magic sync mechanism in closures and optional
|
||||
/// subrules. E.g.,
|
||||
///
|
||||
/// <pre>
|
||||
/// a : sync ( stuff sync )* ;
|
||||
/// sync : {consume to what can follow sync} ;
|
||||
/// </pre>
|
||||
///
|
||||
/// At the start of a sub rule upon error, <seealso cref="#sync"/> performs single
|
||||
/// token deletion, if possible. If it can't do that, it bails on the current
|
||||
/// rule and uses the default error recovery, which consumes until the
|
||||
/// resynchronization set of the current rule.
|
||||
/// <p/>
|
||||
/// If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
|
||||
/// with an empty alternative), then the expected set includes what follows
|
||||
/// the subrule.
|
||||
/// <p/>
|
||||
/// During loop iteration, it consumes until it sees a token that can start a
|
||||
/// sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
|
||||
/// stay in the loop as long as possible.
|
||||
/// <p/>
|
||||
/// <strong>ORIGINS</strong>
|
||||
/// <p/>
|
||||
/// Previous versions of ANTLR did a poor job of their recovery within loops.
|
||||
/// A single mismatch token or missing token would force the parser to bail
|
||||
/// out of the entire rules surrounding the loop. So, for rule
|
||||
///
|
||||
/// <pre>
|
||||
/// classDef : 'class' ID '{' member* '}'
|
||||
/// </pre>
|
||||
///
|
||||
/// input with an extra token between members would force the parser to
|
||||
/// consume until it found the next class definition rather than the next
|
||||
/// member definition of the current class.
|
||||
/// <p/>
|
||||
/// This functionality cost a little bit of effort because the parser has to
|
||||
/// compare token set at the start of the loop and at each iteration. If for
|
||||
/// some reason speed is suffering for you, you can turn off this
|
||||
/// functionality by simply overriding this method as a blank { }.
|
||||
/// </summary>
|
||||
/**
|
||||
* The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
|
||||
* that the current lookahead symbol is consistent with what were expecting
|
||||
* at this point in the ATN. You can call this anytime but ANTLR only
|
||||
* generates code to check before subrules/loops and each iteration.
|
||||
*
|
||||
* <p>Implements Jim Idle's magic sync mechanism in closures and optional
|
||||
* subrules. E.g.,</p>
|
||||
*
|
||||
* <pre>
|
||||
* a : sync ( stuff sync )* ;
|
||||
* sync : {consume to what can follow sync} ;
|
||||
* </pre>
|
||||
*
|
||||
* At the start of a sub rule upon error, {@link #sync} performs single
|
||||
* token deletion, if possible. If it can't do that, it bails on the current
|
||||
* rule and uses the default error recovery, which consumes until the
|
||||
* resynchronization set of the current rule.
|
||||
*
|
||||
* <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
|
||||
* with an empty alternative), then the expected set includes what follows
|
||||
* the subrule.</p>
|
||||
*
|
||||
* <p>During loop iteration, it consumes until it sees a token that can start a
|
||||
* sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
|
||||
* stay in the loop as long as possible.</p>
|
||||
*
|
||||
* <p><strong>ORIGINS</strong></p>
|
||||
*
|
||||
* <p>Previous versions of ANTLR did a poor job of their recovery within loops.
|
||||
* A single mismatch token or missing token would force the parser to bail
|
||||
* out of the entire rules surrounding the loop. So, for rule</p>
|
||||
*
|
||||
* <pre>
|
||||
* classDef : 'class' ID '{' member* '}'
|
||||
* </pre>
|
||||
*
|
||||
* input with an extra token between members would force the parser to
|
||||
* consume until it found the next class definition rather than the next
|
||||
* member definition of the current class.
|
||||
*
|
||||
* <p>This functionality cost a little bit of effort because the parser has to
|
||||
* compare token set at the start of the loop and at each iteration. If for
|
||||
* some reason speed is suffering for you, you can turn off this
|
||||
* functionality by simply overriding this method as a blank { }.</p>
|
||||
*/
|
||||
virtual void sync(Parser *recognizer) override;
|
||||
|
||||
/// <summary>
|
||||
|
@ -217,94 +217,96 @@ namespace runtime {
|
|||
/// <param name="e"> the recognition exception </param>
|
||||
virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e);
|
||||
|
||||
/// <summary>
|
||||
/// This method is called to report a syntax error which requires the removal
|
||||
/// of a token from the input stream. At the time this method is called, the
|
||||
/// erroneous symbol is current {@code LT(1)} symbol and has not yet been
|
||||
/// removed from the input stream. When this method returns,
|
||||
/// {@code recognizer} is in error recovery mode.
|
||||
/// <p/>
|
||||
/// This method is called when <seealso cref="#singleTokenDeletion"/> identifies
|
||||
/// single-token deletion as a viable recovery strategy for a mismatched
|
||||
/// input error.
|
||||
/// <p/>
|
||||
/// The default implementation simply returns if the handler is already in
|
||||
/// error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> to
|
||||
/// enter error recovery mode, followed by calling
|
||||
/// <seealso cref="Parser#notifyErrorListeners"/>.
|
||||
/// </summary>
|
||||
/// <param name="recognizer"> the parser instance </param>
|
||||
/**
|
||||
* This method is called to report a syntax error which requires the removal
|
||||
* of a token from the input stream. At the time this method is called, the
|
||||
* erroneous symbol is current {@code LT(1)} symbol and has not yet been
|
||||
* removed from the input stream. When this method returns,
|
||||
* {@code recognizer} is in error recovery mode.
|
||||
*
|
||||
* <p>This method is called when {@link #singleTokenDeletion} identifies
|
||||
* single-token deletion as a viable recovery strategy for a mismatched
|
||||
* input error.</p>
|
||||
*
|
||||
* <p>The default implementation simply returns if the handler is already in
|
||||
* error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
|
||||
* enter error recovery mode, followed by calling
|
||||
* {@link Parser#notifyErrorListeners}.</p>
|
||||
*
|
||||
* @param recognizer the parser instance
|
||||
*/
|
||||
virtual void reportUnwantedToken(Parser *recognizer);
|
||||
|
||||
/// <summary>
|
||||
/// This method is called to report a syntax error which requires the
|
||||
/// insertion of a missing token into the input stream. At the time this
|
||||
/// method is called, the missing token has not yet been inserted. When this
|
||||
/// method returns, {@code recognizer} is in error recovery mode.
|
||||
/// <p/>
|
||||
/// This method is called when <seealso cref="#singleTokenInsertion"/> identifies
|
||||
/// single-token insertion as a viable recovery strategy for a mismatched
|
||||
/// input error.
|
||||
/// <p/>
|
||||
/// The default implementation simply returns if the handler is already in
|
||||
/// error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> to
|
||||
/// enter error recovery mode, followed by calling
|
||||
/// <seealso cref="Parser#notifyErrorListeners"/>.
|
||||
/// </summary>
|
||||
/// <param name="recognizer"> the parser instance </param>
|
||||
/**
|
||||
* This method is called to report a syntax error which requires the
|
||||
* insertion of a missing token into the input stream. At the time this
|
||||
* method is called, the missing token has not yet been inserted. When this
|
||||
* method returns, {@code recognizer} is in error recovery mode.
|
||||
*
|
||||
* <p>This method is called when {@link #singleTokenInsertion} identifies
|
||||
* single-token insertion as a viable recovery strategy for a mismatched
|
||||
* input error.</p>
|
||||
*
|
||||
* <p>The default implementation simply returns if the handler is already in
|
||||
* error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
|
||||
* enter error recovery mode, followed by calling
|
||||
* {@link Parser#notifyErrorListeners}.</p>
|
||||
*
|
||||
* @param recognizer the parser instance
|
||||
*/
|
||||
virtual void reportMissingToken(Parser *recognizer);
|
||||
|
||||
/// <summary>
|
||||
/// {@inheritDoc}
|
||||
/// <p/>
|
||||
/// The default implementation attempts to recover from the mismatched input
|
||||
/// by using single token insertion and deletion as described below. If the
|
||||
/// recovery attempt fails, this method throws an
|
||||
/// <seealso cref="InputMismatchException"/>.
|
||||
/// <p/>
|
||||
/// <strong>EXTRA TOKEN</strong> (single token deletion)
|
||||
/// <p/>
|
||||
/// {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
|
||||
/// right token, however, then assume {@code LA(1)} is some extra spurious
|
||||
/// token and delete it. Then consume and return the next token (which was
|
||||
/// the {@code LA(2)} token) as the successful result of the match operation.
|
||||
/// <p/>
|
||||
/// This recovery strategy is implemented by <seealso cref="#singleTokenDeletion"/>.
|
||||
/// <p/>
|
||||
/// <strong>MISSING TOKEN</strong> (single token insertion)
|
||||
/// <p/>
|
||||
/// If current token (at {@code LA(1)}) is consistent with what could come
|
||||
/// after the expected {@code LA(1)} token, then assume the token is missing
|
||||
/// and use the parser's <seealso cref="TokenFactory"/> to create it on the fly. The
|
||||
/// "insertion" is performed by returning the created token as the successful
|
||||
/// result of the match operation.
|
||||
/// <p/>
|
||||
/// This recovery strategy is implemented by <seealso cref="#singleTokenInsertion"/>.
|
||||
/// <p/>
|
||||
/// <strong>EXAMPLE</strong>
|
||||
/// <p/>
|
||||
/// For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
|
||||
/// the parser returns from the nested call to {@code expr}, it will have
|
||||
/// call chain:
|
||||
///
|
||||
/// <pre>
|
||||
/// stat -> expr -> atom
|
||||
/// </pre>
|
||||
///
|
||||
/// and it will be trying to match the {@code ')'} at this point in the
|
||||
/// derivation:
|
||||
///
|
||||
/// <pre>
|
||||
/// => ID '=' '(' INT ')' ('+' atom)* ';'
|
||||
/// ^
|
||||
/// </pre>
|
||||
///
|
||||
/// The attempt to match {@code ')'} will fail when it sees {@code ';'} and
|
||||
/// call <seealso cref="#recoverInline"/>. To recover, it sees that {@code LA(1)==';'}
|
||||
/// is in the set of tokens that can follow the {@code ')'} token reference
|
||||
/// in rule {@code atom}. It can assume that you forgot the {@code ')'}.
|
||||
/// </summary>
|
||||
public:
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* <p>The default implementation attempts to recover from the mismatched input
|
||||
* by using single token insertion and deletion as described below. If the
|
||||
* recovery attempt fails, this method throws an
|
||||
* {@link InputMismatchException}.</p>
|
||||
*
|
||||
* <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
|
||||
*
|
||||
* <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
|
||||
* right token, however, then assume {@code LA(1)} is some extra spurious
|
||||
* token and delete it. Then consume and return the next token (which was
|
||||
* the {@code LA(2)} token) as the successful result of the match operation.</p>
|
||||
*
|
||||
* <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
|
||||
*
|
||||
* <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
|
||||
*
|
||||
* <p>If current token (at {@code LA(1)}) is consistent with what could come
|
||||
* after the expected {@code LA(1)} token, then assume the token is missing
|
||||
* and use the parser's {@link TokenFactory} to create it on the fly. The
|
||||
* "insertion" is performed by returning the created token as the successful
|
||||
* result of the match operation.</p>
|
||||
*
|
||||
* <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
|
||||
*
|
||||
* <p><strong>EXAMPLE</strong></p>
|
||||
*
|
||||
* <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
|
||||
* the parser returns from the nested call to {@code expr}, it will have
|
||||
* call chain:</p>
|
||||
*
|
||||
* <pre>
|
||||
* stat → expr → atom
|
||||
* </pre>
|
||||
*
|
||||
* and it will be trying to match the {@code ')'} at this point in the
|
||||
* derivation:
|
||||
*
|
||||
* <pre>
|
||||
* => ID '=' '(' INT ')' ('+' atom)* ';'
|
||||
* ^
|
||||
* </pre>
|
||||
*
|
||||
* The attempt to match {@code ')'} will fail when it sees {@code ';'} and
|
||||
* call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
|
||||
* is in the set of tokens that can follow the {@code ')'} token reference
|
||||
* in rule {@code atom}. It can assume that you forgot the {@code ')'}.
|
||||
*/
|
||||
virtual Ref<Token> recoverInline(Parser *recognizer) override;
|
||||
|
||||
/// <summary>
|
||||
|
|
|
@ -32,8 +32,7 @@
|
|||
|
||||
using namespace org::antlr::v4::runtime;
|
||||
|
||||
RuntimeException::RuntimeException(const std::string &msg)
|
||||
: std::exception(), _message(msg) {
|
||||
RuntimeException::RuntimeException(const std::string &msg) : std::exception(), _message(msg) {
|
||||
}
|
||||
|
||||
const char* RuntimeException::what() const NOEXCEPT {
|
||||
|
|
|
@ -33,4 +33,4 @@
|
|||
|
||||
using namespace org::antlr::v4::runtime;
|
||||
|
||||
const std::wstring IntStream::UNKNOWN_SOURCE_NAME = L"<unknown>";
|
||||
const std::string IntStream::UNKNOWN_SOURCE_NAME = "<unknown>";
|
||||
|
|
|
@ -63,7 +63,7 @@ namespace runtime {
|
|||
/// The value returned by <seealso cref="#getSourceName"/> when the actual name of the
|
||||
/// underlying source is not known.
|
||||
/// </summary>
|
||||
static const std::wstring UNKNOWN_SOURCE_NAME;
|
||||
static const std::string UNKNOWN_SOURCE_NAME;
|
||||
|
||||
/// <summary>
|
||||
/// Consumes the current symbol in the stream. This method has the following
|
||||
|
|
|
@ -33,10 +33,13 @@
|
|||
|
||||
using namespace org::antlr::v4::runtime;
|
||||
|
||||
InterpreterRuleContext::InterpreterRuleContext() {
|
||||
}
|
||||
|
||||
InterpreterRuleContext::InterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber, ssize_t ruleIndex)
|
||||
: ParserRuleContext(parent, invokingStateNumber), ruleIndex(ruleIndex) {
|
||||
: ParserRuleContext(parent, invokingStateNumber), _ruleIndex(ruleIndex) {
|
||||
}
|
||||
|
||||
ssize_t InterpreterRuleContext::getRuleIndex() const {
|
||||
return ruleIndex;
|
||||
return _ruleIndex;
|
||||
}
|
||||
|
|
|
@ -38,20 +38,38 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/// <summary>
|
||||
/// This object is used by the ParserInterpreter and is the same as a regular
|
||||
/// ParserRuleContext except that we need to track the rule index of the
|
||||
/// current context so that we can build parse trees.
|
||||
/// </summary>
|
||||
/**
|
||||
* This class extends {@link ParserRuleContext} by allowing the value of
|
||||
* {@link #getRuleIndex} to be explicitly set for the context.
|
||||
*
|
||||
* <p>
|
||||
* {@link ParserRuleContext} does not include field storage for the rule index
|
||||
* since the context classes created by the code generator override the
|
||||
* {@link #getRuleIndex} method to return the correct value for that context.
|
||||
* Since the parser interpreter does not use the context classes generated for a
|
||||
* parser, this class (with slightly more memory overhead per node) is used to
|
||||
* provide equivalent functionality.</p>
|
||||
*/
|
||||
class InterpreterRuleContext : public ParserRuleContext {
|
||||
private:
|
||||
const ssize_t ruleIndex;
|
||||
|
||||
public:
|
||||
InterpreterRuleContext();
|
||||
|
||||
/**
|
||||
* Constructs a new {@link InterpreterRuleContext} with the specified
|
||||
* parent, invoking state, and rule index.
|
||||
*
|
||||
* @param parent The parent context.
|
||||
* @param invokingStateNumber The invoking state number.
|
||||
* @param ruleIndex The rule index for the current context.
|
||||
*/
|
||||
InterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber, ssize_t ruleIndex);
|
||||
|
||||
virtual ssize_t getRuleIndex() const override;
|
||||
};
|
||||
|
||||
protected:
|
||||
/** This is the backing field for {@link #getRuleIndex}. */
|
||||
const ssize_t _ruleIndex = -1;
|
||||
};
|
||||
|
||||
} // namespace runtime
|
||||
} // namespace v4
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "DFA.h"
|
||||
#include "EmptyPredictionContext.h"
|
||||
#include "Exceptions.h"
|
||||
#include "VocabularyImpl.h"
|
||||
|
||||
#include "LexerInterpreter.h"
|
||||
|
||||
|
@ -42,13 +43,23 @@ using namespace org::antlr::v4::runtime;
|
|||
LexerInterpreter::LexerInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring> &tokenNames,
|
||||
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames, const atn::ATN &atn,
|
||||
CharStream *input)
|
||||
: Lexer(input), grammarFileName(grammarFileName), _tokenNames(tokenNames), _ruleNames(ruleNames), _modeNames(modeNames),
|
||||
_atn(atn) {
|
||||
: LexerInterpreter(grammarFileName, dfa::VocabularyImpl::fromTokenNames(tokenNames), ruleNames, modeNames, atn, input) {
|
||||
}
|
||||
|
||||
LexerInterpreter::LexerInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
|
||||
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames, const atn::ATN &atn,
|
||||
CharStream *input)
|
||||
: Lexer(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _modeNames(modeNames),
|
||||
_vocabulary(vocabulary) {
|
||||
|
||||
if (_atn.grammarType != atn::ATNType::LEXER) {
|
||||
throw IllegalArgumentException("The ATN must be a lexer ATN.");
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < atn.maxTokenType; i++) {
|
||||
_tokenNames.push_back(vocabulary->getDisplayName(i));
|
||||
}
|
||||
|
||||
_sharedContextCache = std::make_shared<atn::PredictionContextCache>();
|
||||
for (size_t i = 0; i < (size_t)atn.getNumberOfDecisions(); ++i) {
|
||||
_decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState((int)i), (int)i));
|
||||
|
@ -66,7 +77,7 @@ const atn::ATN& LexerInterpreter::getATN() const {
|
|||
}
|
||||
|
||||
std::wstring LexerInterpreter::getGrammarFileName() const {
|
||||
return grammarFileName;
|
||||
return _grammarFileName;
|
||||
}
|
||||
|
||||
const std::vector<std::wstring>& LexerInterpreter::getTokenNames() const {
|
||||
|
@ -80,3 +91,11 @@ const std::vector<std::wstring>& LexerInterpreter::getRuleNames() const {
|
|||
const std::vector<std::wstring>& LexerInterpreter::getModeNames() const {
|
||||
return _modeNames;
|
||||
}
|
||||
|
||||
Ref<dfa::Vocabulary> LexerInterpreter::getVocabulary() const {
|
||||
if (_vocabulary != nullptr) {
|
||||
return _vocabulary;
|
||||
}
|
||||
|
||||
return Lexer::getVocabulary();
|
||||
}
|
||||
|
|
|
@ -40,9 +40,14 @@ namespace runtime {
|
|||
|
||||
class LexerInterpreter : public Lexer {
|
||||
public:
|
||||
// @deprecated
|
||||
LexerInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring> &tokenNames,
|
||||
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames,
|
||||
const atn::ATN &atn, CharStream *input);
|
||||
LexerInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
|
||||
const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames,
|
||||
const atn::ATN &atn, CharStream *input);
|
||||
|
||||
~LexerInterpreter();
|
||||
|
||||
virtual const atn::ATN& getATN() const override;
|
||||
|
@ -51,16 +56,22 @@ namespace runtime {
|
|||
virtual const std::vector<std::wstring>& getRuleNames() const override;
|
||||
virtual const std::vector<std::wstring>& getModeNames() const override;
|
||||
|
||||
Ref<dfa::Vocabulary> getVocabulary() const;
|
||||
|
||||
protected:
|
||||
const std::wstring grammarFileName;
|
||||
const std::wstring _grammarFileName;
|
||||
const atn::ATN &_atn;
|
||||
|
||||
const std::vector<std::wstring> &_tokenNames;
|
||||
// @deprecated
|
||||
std::vector<std::wstring> _tokenNames;
|
||||
const std::vector<std::wstring> &_ruleNames;
|
||||
const std::vector<std::wstring> &_modeNames;
|
||||
std::vector<dfa::DFA> _decisionToDFA;
|
||||
|
||||
Ref<atn::PredictionContextCache> _sharedContextCache;
|
||||
|
||||
private:
|
||||
Ref<dfa::Vocabulary> _vocabulary;
|
||||
};
|
||||
|
||||
} // namespace runtime
|
||||
|
|
|
@ -45,40 +45,36 @@
|
|||
#include "ATN.h"
|
||||
#include "RuleStopState.h"
|
||||
#include "Token.h"
|
||||
#include "VocabularyImpl.h"
|
||||
#include "InputMismatchException.h"
|
||||
#include "CommonToken.h"
|
||||
|
||||
#include "CPPUtils.h"
|
||||
|
||||
#include "ParserInterpreter.h"
|
||||
|
||||
using namespace org::antlr::v4::runtime;
|
||||
using namespace org::antlr::v4::runtime::atn;
|
||||
using namespace antlrcpp;
|
||||
|
||||
ParserInterpreter::ParserInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring>& tokenNames,
|
||||
const std::vector<std::wstring>& ruleNames, const atn::ATN &atn, TokenStream *input)
|
||||
: Parser(input), _grammarFileName(grammarFileName), _tokenNames(tokenNames), _atn(atn), _ruleNames(ruleNames) {
|
||||
: ParserInterpreter(grammarFileName, dfa::VocabularyImpl::fromTokenNames(tokenNames), ruleNames, atn, input) {
|
||||
}
|
||||
|
||||
for (int i = 0; i < _atn.getNumberOfDecisions(); i++) {
|
||||
_decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState(i), i));
|
||||
ParserInterpreter::ParserInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
|
||||
const std::vector<std::wstring> &ruleNames, const atn::ATN &atn, TokenStream *input)
|
||||
: Parser(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _vocabulary(vocabulary) {
|
||||
|
||||
_sharedContextCache = std::make_shared<atn::PredictionContextCache>();
|
||||
for (size_t i = 0; i < atn.maxTokenType; ++i) {
|
||||
_tokenNames.push_back(vocabulary->getDisplayName(i));
|
||||
}
|
||||
|
||||
// identify the ATN states where pushNewRecursionContext must be called
|
||||
for (auto state : _atn.states) {
|
||||
if (!is<atn::StarLoopEntryState*>(state)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
atn::RuleStartState *ruleStartState = _atn.ruleToStartState[(size_t)state->ruleIndex];
|
||||
if (!ruleStartState->isLeftRecursiveRule) {
|
||||
continue;
|
||||
}
|
||||
|
||||
atn::ATNState *maybeLoopEndState = state->transition(state->getNumberOfTransitions() - 1)->target;
|
||||
if (!is<atn::LoopEndState*>(maybeLoopEndState)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (maybeLoopEndState->epsilonOnlyTransitions && is<atn::RuleStopState*>(maybeLoopEndState->transition(0)->target)) {
|
||||
_pushRecursionContextStates.set((size_t)state->stateNumber);
|
||||
}
|
||||
// init decision DFA
|
||||
for (int i = 0; i < atn.getNumberOfDecisions(); ++i) {
|
||||
atn::DecisionState *decisionState = atn.getDecisionState(i);
|
||||
_decisionToDFA.push_back(dfa::DFA(decisionState, i));
|
||||
}
|
||||
|
||||
// get atn simulator that knows how to do predictions
|
||||
|
@ -89,6 +85,12 @@ ParserInterpreter::~ParserInterpreter() {
|
|||
delete _interpreter;
|
||||
}
|
||||
|
||||
void ParserInterpreter::reset() {
|
||||
Parser::reset();
|
||||
_overrideDecisionReached = false;
|
||||
_overrideDecisionRoot = nullptr;
|
||||
}
|
||||
|
||||
const atn::ATN& ParserInterpreter::getATN() const {
|
||||
return _atn;
|
||||
}
|
||||
|
@ -97,6 +99,10 @@ const std::vector<std::wstring>& ParserInterpreter::getTokenNames() const {
|
|||
return _tokenNames;
|
||||
}
|
||||
|
||||
Ref<dfa::Vocabulary> ParserInterpreter::getVocabulary() const {
|
||||
return _vocabulary;
|
||||
}
|
||||
|
||||
const std::vector<std::wstring>& ParserInterpreter::getRuleNames() const {
|
||||
return _ruleNames;
|
||||
}
|
||||
|
@ -108,13 +114,12 @@ std::wstring ParserInterpreter::getGrammarFileName() const {
|
|||
Ref<ParserRuleContext> ParserInterpreter::parse(int startRuleIndex) {
|
||||
atn::RuleStartState *startRuleStartState = _atn.ruleToStartState[(size_t)startRuleIndex];
|
||||
|
||||
Ref<InterpreterRuleContext> rootContext =
|
||||
std::make_shared<InterpreterRuleContext>(std::weak_ptr<ParserRuleContext>(), atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex);
|
||||
_rootContext = createInterpreterRuleContext(std::weak_ptr<ParserRuleContext>(), atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex);
|
||||
|
||||
if (startRuleStartState->isLeftRecursiveRule) {
|
||||
enterRecursionRule(rootContext, startRuleStartState->stateNumber, startRuleIndex, 0);
|
||||
enterRecursionRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex, 0);
|
||||
} else {
|
||||
enterRule(rootContext, startRuleStartState->stateNumber, startRuleIndex);
|
||||
enterRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
|
@ -123,15 +128,32 @@ Ref<ParserRuleContext> ParserInterpreter::parse(int startRuleIndex) {
|
|||
case atn::ATNState::RULE_STOP :
|
||||
// pop; return from rule
|
||||
if (_ctx->isEmpty()) {
|
||||
exitRule();
|
||||
return rootContext;
|
||||
if (startRuleStartState->isLeftRecursiveRule) {
|
||||
Ref<ParserRuleContext> result = _ctx;
|
||||
auto parentContext = _parentContextStack.top();
|
||||
_parentContextStack.pop();
|
||||
unrollRecursionContexts(parentContext.first);
|
||||
return result;
|
||||
} else {
|
||||
exitRule();
|
||||
return _rootContext;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
visitRuleStopState(p);
|
||||
break;
|
||||
|
||||
default :
|
||||
visitState(p);
|
||||
try {
|
||||
visitState(p);
|
||||
}
|
||||
catch (RecognitionException &e) {
|
||||
setState(_atn.ruleToStopState[p->ruleIndex]->stateNumber);
|
||||
getContext()->exception = std::make_exception_ptr(e);
|
||||
getErrorHandler()->reportError(this, e);
|
||||
recover(e);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -142,26 +164,41 @@ void ParserInterpreter::enterRecursionRule(Ref<ParserRuleContext> localctx, int
|
|||
Parser::enterRecursionRule(localctx, state, ruleIndex, precedence);
|
||||
}
|
||||
|
||||
atn::ATNState *ParserInterpreter::getATNState() {
|
||||
void ParserInterpreter::addDecisionOverride(int decision, int tokenIndex, int forcedAlt) {
|
||||
_overrideDecision = decision;
|
||||
_overrideDecisionInputIndex = tokenIndex;
|
||||
_overrideDecisionAlt = forcedAlt;
|
||||
}
|
||||
|
||||
Ref<InterpreterRuleContext> ParserInterpreter::getOverrideDecisionRoot() const {
|
||||
return _overrideDecisionRoot;
|
||||
}
|
||||
|
||||
Ref<InterpreterRuleContext> ParserInterpreter::getRootContext() {
|
||||
return _rootContext;
|
||||
}
|
||||
|
||||
atn::ATNState* ParserInterpreter::getATNState() {
|
||||
return _atn.states[(size_t)getState()];
|
||||
}
|
||||
|
||||
void ParserInterpreter::visitState(atn::ATNState *p) {
|
||||
int edge;
|
||||
if (p->getNumberOfTransitions() > 1) {
|
||||
edge = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input, ((atn::DecisionState*)p)->decision, _ctx);
|
||||
} else {
|
||||
edge = 1;
|
||||
int predictedAlt = 1;
|
||||
if (is<DecisionState *>(p)) {
|
||||
predictedAlt = visitDecisionState(dynamic_cast<DecisionState *>(p));
|
||||
}
|
||||
|
||||
atn::Transition *transition = p->transition((size_t)edge - 1);
|
||||
atn::Transition *transition = p->transition(predictedAlt - 1);
|
||||
switch (transition->getSerializationType()) {
|
||||
case atn::Transition::EPSILON:
|
||||
if (_pushRecursionContextStates[(size_t)p->stateNumber] == 1 && is<atn::LoopEndState*>(transition->target)) {
|
||||
Ref<InterpreterRuleContext> ruleContext = std::make_shared<InterpreterRuleContext>(_parentContextStack.top().first,
|
||||
_parentContextStack.top().second, _ctx->getRuleIndex());
|
||||
pushNewRecursionContext(ruleContext, _atn.ruleToStartState[(size_t)p->ruleIndex]->stateNumber,
|
||||
(int)ruleContext->getRuleIndex());
|
||||
if (p->getStateType() == ATNState::STAR_LOOP_ENTRY &&
|
||||
(dynamic_cast<StarLoopEntryState *>(p))->isPrecedenceDecision &&
|
||||
!is<LoopEndState *>(transition->target)) {
|
||||
// We are at the start of a left recursive rule's (...)* loop
|
||||
// and we're not taking the exit branch of loop.
|
||||
Ref<InterpreterRuleContext> localctx = createInterpreterRuleContext(_parentContextStack.top().first,
|
||||
_parentContextStack.top().second, (int)_ctx->getRuleIndex());
|
||||
pushNewRecursionContext(localctx, _atn.ruleToStartState[p->ruleIndex]->stateNumber, (int)_ctx->getRuleIndex());
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -173,7 +210,7 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
|
|||
case atn::Transition::SET:
|
||||
case atn::Transition::NOT_SET:
|
||||
if (!transition->matches((int)_input->LA(1), Token::MIN_USER_TOKEN_TYPE, 65535)) {
|
||||
_errHandler->recoverInline(this);
|
||||
recoverInline();
|
||||
}
|
||||
matchWildcard();
|
||||
break;
|
||||
|
@ -186,11 +223,11 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
|
|||
{
|
||||
atn::RuleStartState *ruleStartState = (atn::RuleStartState*)(transition->target);
|
||||
int ruleIndex = ruleStartState->ruleIndex;
|
||||
Ref<InterpreterRuleContext> ruleContext = std::make_shared<InterpreterRuleContext>(_ctx, p->stateNumber, ruleIndex);
|
||||
Ref<InterpreterRuleContext> newctx = createInterpreterRuleContext(_ctx, p->stateNumber, ruleIndex);
|
||||
if (ruleStartState->isLeftRecursiveRule) {
|
||||
enterRecursionRule(ruleContext, ruleStartState->stateNumber, ruleIndex, ((atn::RuleTransition*)(transition))->precedence);
|
||||
enterRecursionRule(newctx, ruleStartState->stateNumber, ruleIndex, ((atn::RuleTransition*)(transition))->precedence);
|
||||
} else {
|
||||
enterRule(_ctx, transition->target->stateNumber, ruleIndex);
|
||||
enterRule(newctx, transition->target->stateNumber, ruleIndex);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -226,6 +263,26 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
|
|||
setState(transition->target->stateNumber);
|
||||
}
|
||||
|
||||
int ParserInterpreter::visitDecisionState(DecisionState *p) {
|
||||
int predictedAlt = 1;
|
||||
if (p->getNumberOfTransitions() > 1) {
|
||||
getErrorHandler()->sync(this);
|
||||
int decision = p->decision;
|
||||
if (decision == _overrideDecision && (int)_input->index() == _overrideDecisionInputIndex && !_overrideDecisionReached) {
|
||||
predictedAlt = _overrideDecisionAlt;
|
||||
_overrideDecisionReached = true;
|
||||
} else {
|
||||
predictedAlt = getInterpreter<ParserATNSimulator>()->adaptivePredict(_input, decision, _ctx);
|
||||
}
|
||||
}
|
||||
return predictedAlt;
|
||||
}
|
||||
|
||||
Ref<InterpreterRuleContext> ParserInterpreter::createInterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent,
|
||||
int invokingStateNumber, int ruleIndex) {
|
||||
return std::make_shared<InterpreterRuleContext>(parent, invokingStateNumber, ruleIndex);
|
||||
}
|
||||
|
||||
void ParserInterpreter::visitRuleStopState(atn::ATNState *p) {
|
||||
atn::RuleStartState *ruleStartState = _atn.ruleToStartState[(size_t)p->ruleIndex];
|
||||
if (ruleStartState->isLeftRecursiveRule) {
|
||||
|
@ -241,3 +298,32 @@ void ParserInterpreter::visitRuleStopState(atn::ATNState *p) {
|
|||
atn::RuleTransition *ruleTransition = static_cast<atn::RuleTransition*>(_atn.states[(size_t)getState()]->transition(0));
|
||||
setState(ruleTransition->followState->stateNumber);
|
||||
}
|
||||
|
||||
void ParserInterpreter::recover(RecognitionException &e) {
|
||||
size_t i = _input->index();
|
||||
getErrorHandler()->recover(this, e);
|
||||
|
||||
if (_input->index() == i) {
|
||||
// no input consumed, better add an error node
|
||||
if (is<InputMismatchException>(e)) {
|
||||
InputMismatchException &ime = (InputMismatchException&)e;
|
||||
Ref<Token> tok = e.getOffendingToken();
|
||||
int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element
|
||||
auto errToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() },
|
||||
expectedTokenType, tok->getText(), Token::DEFAULT_CHANNEL, -1, -1, // invalid start/stop
|
||||
tok->getLine(), tok->getCharPositionInLine());
|
||||
_ctx->addErrorNode(std::dynamic_pointer_cast<Token>(errToken));
|
||||
}
|
||||
else { // NoViableAlt
|
||||
Ref<Token> tok = e.getOffendingToken();
|
||||
auto errToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() },
|
||||
Token::INVALID_TYPE, tok->getText(), Token::DEFAULT_CHANNEL, -1, -1, // invalid start/stop
|
||||
tok->getLine(), tok->getCharPositionInLine());
|
||||
_ctx->addErrorNode(std::dynamic_pointer_cast<Token>(errToken));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ref<Token> ParserInterpreter::recoverInline() {
|
||||
return _errHandler->recoverInline(this);
|
||||
}
|
||||
|
|
|
@ -57,12 +57,22 @@ namespace runtime {
|
|||
/// </summary>
|
||||
class ParserInterpreter : public Parser {
|
||||
public:
|
||||
// @deprecated
|
||||
ParserInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring>& tokenNames,
|
||||
const std::vector<std::wstring>& ruleNames, const atn::ATN &atn, TokenStream *input);
|
||||
ParserInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
|
||||
const std::vector<std::wstring> &ruleNames, const atn::ATN &atn, TokenStream *input);
|
||||
~ParserInterpreter();
|
||||
|
||||
virtual void reset() override;
|
||||
|
||||
virtual const atn::ATN& getATN() const override;
|
||||
|
||||
// @deprecated
|
||||
virtual const std::vector<std::wstring>& getTokenNames() const override;
|
||||
|
||||
Ref<dfa::Vocabulary> getVocabulary() const;
|
||||
|
||||
virtual const std::vector<std::wstring>& getRuleNames() const override;
|
||||
virtual std::wstring getGrammarFileName() const override;
|
||||
|
||||
|
@ -71,23 +81,127 @@ namespace runtime {
|
|||
|
||||
virtual void enterRecursionRule(Ref<ParserRuleContext> localctx, int state, int ruleIndex, int precedence) override;
|
||||
|
||||
|
||||
/** Override this parser interpreters normal decision-making process
|
||||
* at a particular decision and input token index. Instead of
|
||||
* allowing the adaptive prediction mechanism to choose the
|
||||
* first alternative within a block that leads to a successful parse,
|
||||
* force it to take the alternative, 1..n for n alternatives.
|
||||
*
|
||||
* As an implementation limitation right now, you can only specify one
|
||||
* override. This is sufficient to allow construction of different
|
||||
* parse trees for ambiguous input. It means re-parsing the entire input
|
||||
* in general because you're never sure where an ambiguous sequence would
|
||||
* live in the various parse trees. For example, in one interpretation,
|
||||
* an ambiguous input sequence would be matched completely in expression
|
||||
* but in another it could match all the way back to the root.
|
||||
*
|
||||
* s : e '!'? ;
|
||||
* e : ID
|
||||
* | ID '!'
|
||||
* ;
|
||||
*
|
||||
* Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
|
||||
* case, the ambiguous sequence is fully contained only by the root.
|
||||
* In the second case, the ambiguous sequences fully contained within just
|
||||
* e, as in: (e ID !).
|
||||
*
|
||||
* Rather than trying to optimize this and make
|
||||
* some intelligent decisions for optimization purposes, I settled on
|
||||
* just re-parsing the whole input and then using
|
||||
* {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
|
||||
* subtree that contains the ambiguous sequence. I originally tried to
|
||||
* record the call stack at the point the parser detected and ambiguity but
|
||||
* left recursive rules create a parse tree stack that does not reflect
|
||||
* the actual call stack. That impedance mismatch was enough to make
|
||||
* it it challenging to restart the parser at a deeply nested rule
|
||||
* invocation.
|
||||
*
|
||||
* Only parser interpreters can override decisions so as to avoid inserting
|
||||
* override checking code in the critical ALL(*) prediction execution path.
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
void addDecisionOverride(int decision, int tokenIndex, int forcedAlt);
|
||||
|
||||
Ref<InterpreterRuleContext> getOverrideDecisionRoot() const;
|
||||
|
||||
/** Return the root of the parse, which can be useful if the parser
|
||||
* bails out. You still can access the top node. Note that,
|
||||
* because of the way left recursive rules add children, it's possible
|
||||
* that the root will not have any children if the start rule immediately
|
||||
* called and left recursive rule that fails.
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
Ref<InterpreterRuleContext> getRootContext();
|
||||
|
||||
protected:
|
||||
const std::wstring _grammarFileName;
|
||||
std::vector<std::wstring> _tokenNames;
|
||||
const atn::ATN &_atn;
|
||||
|
||||
std::vector<std::wstring> _ruleNames;
|
||||
antlrcpp::BitSet _pushRecursionContextStates;
|
||||
|
||||
std::vector<dfa::DFA> _decisionToDFA; // not shared like it is for generated parsers
|
||||
Ref<atn::PredictionContextCache> _sharedContextCache;
|
||||
|
||||
/** This stack corresponds to the _parentctx, _parentState pair of locals
|
||||
* that would exist on call stack frames with a recursive descent parser;
|
||||
* in the generated function for a left-recursive rule you'd see:
|
||||
*
|
||||
* private EContext e(int _p) throws RecognitionException {
|
||||
* ParserRuleContext _parentctx = _ctx; // Pair.a
|
||||
* int _parentState = getState(); // Pair.b
|
||||
* ...
|
||||
* }
|
||||
*
|
||||
* Those values are used to create new recursive rule invocation contexts
|
||||
* associated with left operand of an alt like "expr '*' expr".
|
||||
*/
|
||||
std::stack<std::pair<Ref<ParserRuleContext>, int>> _parentContextStack;
|
||||
|
||||
/** We need a map from (decision,inputIndex)->forced alt for computing ambiguous
|
||||
* parse trees. For now, we allow exactly one override.
|
||||
*/
|
||||
int _overrideDecision = -1;
|
||||
int _overrideDecisionInputIndex = -1;
|
||||
int _overrideDecisionAlt = -1;
|
||||
bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop
|
||||
|
||||
/** What is the current context when we override a decision? This tells
|
||||
* us what the root of the parse tree is when using override
|
||||
* for an ambiguity/lookahead check.
|
||||
*/
|
||||
Ref<InterpreterRuleContext> _overrideDecisionRoot;
|
||||
Ref<InterpreterRuleContext> _rootContext;
|
||||
|
||||
virtual atn::ATNState *getATNState();
|
||||
virtual void visitState(atn::ATNState *p);
|
||||
|
||||
/** Method visitDecisionState() is called when the interpreter reaches
|
||||
* a decision state (instance of DecisionState). It gives an opportunity
|
||||
* for subclasses to track interesting things.
|
||||
*/
|
||||
int visitDecisionState(atn::DecisionState *p);
|
||||
|
||||
/** Provide simple "factory" for InterpreterRuleContext's.
|
||||
* @since 4.5.1
|
||||
*/
|
||||
Ref<InterpreterRuleContext> createInterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber,
|
||||
int ruleIndex);
|
||||
|
||||
virtual void visitRuleStopState(atn::ATNState *p);
|
||||
|
||||
/** Rely on the error handler for this parser but, if no tokens are consumed
|
||||
* to recover, add an error node. Otherwise, nothing is seen in the parse
|
||||
* tree.
|
||||
*/
|
||||
void recover(RecognitionException &e);
|
||||
Ref<Token> recoverInline();
|
||||
|
||||
private:
|
||||
Ref<dfa::Vocabulary> _vocabulary;
|
||||
};
|
||||
|
||||
} // namespace runtime
|
||||
|
|
|
@ -73,6 +73,19 @@ namespace runtime {
|
|||
/// </summary>
|
||||
static const size_t HIDDEN_CHANNEL = 1;
|
||||
|
||||
/**
|
||||
* This is the minimum constant value which can be assigned to a
|
||||
* user-defined token channel.
|
||||
*
|
||||
* <p>
|
||||
* The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are
|
||||
* assigned to the predefined channels {@link #DEFAULT_CHANNEL} and
|
||||
* {@link #HIDDEN_CHANNEL}.</p>
|
||||
*
|
||||
* @see Token#getChannel()
|
||||
*/
|
||||
static const size_t MIN_USER_CHANNEL_VALUE = 2;
|
||||
|
||||
/// <summary>
|
||||
/// Get the text of the token.
|
||||
/// </summary>
|
||||
|
|
|
@ -269,12 +269,16 @@ std::wstring TokenStreamRewriter::getText() {
|
|||
return getText(DEFAULT_PROGRAM_NAME, Interval(0, (int)tokens->size() - 1));
|
||||
}
|
||||
|
||||
std::wstring TokenStreamRewriter::getText(std::wstring programName) {
|
||||
return getText(programName, Interval(0, (int)tokens->size() - 1));
|
||||
}
|
||||
|
||||
std::wstring TokenStreamRewriter::getText(const Interval &interval) {
|
||||
return getText(DEFAULT_PROGRAM_NAME, interval);
|
||||
}
|
||||
|
||||
std::wstring TokenStreamRewriter::getText(const std::wstring &programName, const Interval &interval) {
|
||||
std::vector<TokenStreamRewriter::RewriteOperation*> rewrites = _programs.at(programName);
|
||||
std::vector<TokenStreamRewriter::RewriteOperation*> rewrites = _programs[programName];
|
||||
int start = interval.a;
|
||||
int stop = interval.b;
|
||||
|
||||
|
|
|
@ -36,68 +36,82 @@ namespace antlr {
|
|||
namespace v4 {
|
||||
namespace runtime {
|
||||
|
||||
/// <summary>
|
||||
/// Useful for rewriting out a buffered input token stream after doing some
|
||||
/// augmentation or other manipulations on it.
|
||||
///
|
||||
/// You can insert stuff, replace, and delete chunks. Note that the
|
||||
/// operations are done lazily--only if you convert the buffer to a
|
||||
/// String with getText(). This is very efficient because you are not moving
|
||||
/// data around all the time. As the buffer of tokens is converted to strings,
|
||||
/// the getText() method(s) scan the input token stream and check
|
||||
/// to see if there is an operation at the current index.
|
||||
/// If so, the operation is done and then normal String
|
||||
/// rendering continues on the buffer. This is like having multiple Turing
|
||||
/// machine instruction streams (programs) operating on a single input tape. :)
|
||||
///
|
||||
/// This rewriter makes no modifications to the token stream. It does not
|
||||
/// ask the stream to fill itself up nor does it advance the input cursor.
|
||||
/// The token stream index() will return the same value before and after
|
||||
/// any getText() call.
|
||||
///
|
||||
/// The rewriter only works on tokens that you have in the buffer and
|
||||
/// ignores the current input cursor. If you are buffering tokens on-demand,
|
||||
/// calling getText() halfway through the input will only do rewrites
|
||||
/// for those tokens in the first half of the file.
|
||||
///
|
||||
/// Since the operations are done lazily at getText-time, operations do not
|
||||
/// screw up the token index values. That is, an insert operation at token
|
||||
/// index i does not change the index values for tokens i+1..n-1.
|
||||
///
|
||||
/// Because operations never actually alter the buffer, you may always get
|
||||
/// the original token stream back without undoing anything. Since
|
||||
/// the instructions are queued up, you can easily simulate transactions and
|
||||
/// roll back any changes if there is an error just by removing instructions.
|
||||
/// For example,
|
||||
///
|
||||
/// CharStream input = new ANTLRFileStream("input");
|
||||
/// TLexer lex = new TLexer(input);
|
||||
/// CommonTokenStream tokens = new CommonTokenStream(lex);
|
||||
/// T parser = new T(tokens);
|
||||
/// TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
|
||||
/// parser.startRule();
|
||||
///
|
||||
/// Then in the rules, you can execute (assuming rewriter is visible):
|
||||
/// Token t,u;
|
||||
/// ...
|
||||
/// rewriter.insertAfter(t, "text to put after t");}
|
||||
/// rewriter.insertAfter(u, "text after u");}
|
||||
/// System.out.println(tokens.toString());
|
||||
///
|
||||
/// You can also have multiple "instruction streams" and get multiple
|
||||
/// rewrites from a single pass over the input. Just name the instruction
|
||||
/// streams and use that name again when printing the buffer. This could be
|
||||
/// useful for generating a C file and also its header file--all from the
|
||||
/// same buffer:
|
||||
///
|
||||
/// tokens.insertAfter("pass1", t, "text to put after t");}
|
||||
/// tokens.insertAfter("pass2", u, "text after u");}
|
||||
/// System.out.println(tokens.toString("pass1"));
|
||||
/// System.out.println(tokens.toString("pass2"));
|
||||
///
|
||||
/// If you don't use named rewrite streams, a "default" stream is used as
|
||||
/// the first example shows.
|
||||
/// </summary>
|
||||
/**
|
||||
* Useful for rewriting out a buffered input token stream after doing some
|
||||
* augmentation or other manipulations on it.
|
||||
*
|
||||
* <p>
|
||||
* You can insert stuff, replace, and delete chunks. Note that the operations
|
||||
* are done lazily--only if you convert the buffer to a {@link String} with
|
||||
* {@link TokenStream#getText()}. This is very efficient because you are not
|
||||
* moving data around all the time. As the buffer of tokens is converted to
|
||||
* strings, the {@link #getText()} method(s) scan the input token stream and
|
||||
* check to see if there is an operation at the current index. If so, the
|
||||
* operation is done and then normal {@link String} rendering continues on the
|
||||
* buffer. This is like having multiple Turing machine instruction streams
|
||||
* (programs) operating on a single input tape. :)</p>
|
||||
*
|
||||
* <p>
|
||||
* This rewriter makes no modifications to the token stream. It does not ask the
|
||||
* stream to fill itself up nor does it advance the input cursor. The token
|
||||
* stream {@link TokenStream#index()} will return the same value before and
|
||||
* after any {@link #getText()} call.</p>
|
||||
*
|
||||
* <p>
|
||||
* The rewriter only works on tokens that you have in the buffer and ignores the
|
||||
* current input cursor. If you are buffering tokens on-demand, calling
|
||||
* {@link #getText()} halfway through the input will only do rewrites for those
|
||||
* tokens in the first half of the file.</p>
|
||||
*
|
||||
* <p>
|
||||
* Since the operations are done lazily at {@link #getText}-time, operations do
|
||||
* not screw up the token index values. That is, an insert operation at token
|
||||
* index {@code i} does not change the index values for tokens
|
||||
* {@code i}+1..n-1.</p>
|
||||
*
|
||||
* <p>
|
||||
* Because operations never actually alter the buffer, you may always get the
|
||||
* original token stream back without undoing anything. Since the instructions
|
||||
* are queued up, you can easily simulate transactions and roll back any changes
|
||||
* if there is an error just by removing instructions. For example,</p>
|
||||
*
|
||||
* <pre>
|
||||
* CharStream input = new ANTLRFileStream("input");
|
||||
* TLexer lex = new TLexer(input);
|
||||
* CommonTokenStream tokens = new CommonTokenStream(lex);
|
||||
* T parser = new T(tokens);
|
||||
* TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
|
||||
* parser.startRule();
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Then in the rules, you can execute (assuming rewriter is visible):</p>
|
||||
*
|
||||
* <pre>
|
||||
* Token t,u;
|
||||
* ...
|
||||
* rewriter.insertAfter(t, "text to put after t");}
|
||||
* rewriter.insertAfter(u, "text after u");}
|
||||
* System.out.println(rewriter.getText());
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* You can also have multiple "instruction streams" and get multiple rewrites
|
||||
* from a single pass over the input. Just name the instruction streams and use
|
||||
* that name again when printing the buffer. This could be useful for generating
|
||||
* a C file and also its header file--all from the same buffer:</p>
|
||||
*
|
||||
* <pre>
|
||||
* rewriter.insertAfter("pass1", t, "text to put after t");}
|
||||
* rewriter.insertAfter("pass2", u, "text after u");}
|
||||
* System.out.println(rewriter.getText("pass1"));
|
||||
* System.out.println(rewriter.getText("pass2"));
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* If you don't use named rewrite streams, a "default" stream is used as the
|
||||
* first example shows.</p>
|
||||
*/
|
||||
class TokenStreamRewriter {
|
||||
public:
|
||||
static const std::wstring DEFAULT_PROGRAM_NAME;
|
||||
|
@ -153,6 +167,11 @@ namespace runtime {
|
|||
/// instructions given to this rewriter.
|
||||
virtual std::wstring getText();
|
||||
|
||||
/** Return the text from the original tokens altered per the
|
||||
* instructions given to this rewriter in programName.
|
||||
*/
|
||||
std::wstring getText(std::wstring programName);
|
||||
|
||||
/// <summary>
|
||||
/// Return the text associated with the tokens in the interval from the
|
||||
/// original token stream but with the alterations given to this rewriter.
|
||||
|
|
|
@ -188,6 +188,10 @@ size_t UnbufferedCharStream::size() {
|
|||
}
|
||||
|
||||
std::string UnbufferedCharStream::getSourceName() const {
|
||||
if (name.empty()) {
|
||||
return UNKNOWN_SOURCE_NAME;
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
|
|
@ -141,7 +141,7 @@ int ATN::defineDecisionState(DecisionState *s) {
|
|||
|
||||
DecisionState *ATN::getDecisionState(int decision) const {
|
||||
if (!decisionToState.empty()) {
|
||||
return decisionToState.at((size_t)decision);
|
||||
return decisionToState[(size_t)decision];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2016 Mike Lischke
|
||||
* Copyright (c) 2014 Terence Parr
|
||||
* Copyright (c) 2014 Dan McLaughlin
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "Predicate.h"
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2016 Mike Lischke
|
||||
* Copyright (c) 2014 Terence Parr
|
||||
* Copyright (c) 2014 Dan McLaughlin
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace org {
|
||||
namespace antlr {
|
||||
namespace v4 {
|
||||
namespace runtime {
|
||||
namespace misc {
|
||||
|
||||
template<typename T>
|
||||
class Predicate {
|
||||
public:
|
||||
virtual bool test(Ref<T> t) = 0;
|
||||
};
|
||||
|
||||
} // namespace tree
|
||||
} // namespace runtime
|
||||
} // namespace v4
|
||||
} // namespace antlr
|
||||
} // namespace org
|
|
@ -97,6 +97,7 @@ namespace org {
|
|||
class MurmurHash;
|
||||
class ParseCancellationException;
|
||||
class Utils;
|
||||
template <typename T> class Predicate;
|
||||
}
|
||||
namespace atn {
|
||||
class ATN;
|
||||
|
|
|
@ -53,17 +53,19 @@ namespace tree {
|
|||
return tree->accept(this);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// {@inheritDoc}
|
||||
/// <p/>
|
||||
/// The default implementation initializes the aggregate result to
|
||||
/// <seealso cref="#defaultResult defaultResult()"/>. Before visiting each child, it
|
||||
/// calls <seealso cref="#shouldVisitNextChild shouldVisitNextChild"/>; if the result
|
||||
/// is {@code false} no more children are visited and the current aggregate
|
||||
/// result is returned. After visiting a child, the aggregate result is
|
||||
/// updated by calling <seealso cref="#aggregateResult aggregateResult"/> with the
|
||||
/// previous aggregate result and the result of visiting the child.
|
||||
/// </summary>
|
||||
/**
|
||||
* <p>The default implementation initializes the aggregate result to
|
||||
* {@link #defaultResult defaultResult()}. Before visiting each child, it
|
||||
* calls {@link #shouldVisitNextChild shouldVisitNextChild}; if the result
|
||||
* is {@code false} no more children are visited and the current aggregate
|
||||
* result is returned. After visiting a child, the aggregate result is
|
||||
* updated by calling {@link #aggregateResult aggregateResult} with the
|
||||
* previous aggregate result and the result of visiting the child.</p>
|
||||
*
|
||||
* <p>The default implementation is not safe for use in visitors that modify
|
||||
* the tree structure. Visitors that modify the tree should override this
|
||||
* method to behave properly in respect to the specific algorithm in use.</p>
|
||||
*/
|
||||
virtual T* visitChildren(RuleNode *node) override {
|
||||
T* result = defaultResult();
|
||||
size_t n = node->getChildCount();
|
||||
|
|
|
@ -37,6 +37,17 @@ namespace v4 {
|
|||
namespace runtime {
|
||||
namespace tree {
|
||||
|
||||
/** This interface describes the minimal core of methods triggered
|
||||
* by {@link ParseTreeWalker}. E.g.,
|
||||
*
|
||||
* ParseTreeWalker walker = new ParseTreeWalker();
|
||||
* walker.walk(myParseTreeListener, myParseTree); <-- triggers events in your listener
|
||||
*
|
||||
* If you want to trigger events in multiple listeners during a single
|
||||
* tree walk, you can use the ParseTreeDispatcher object available at
|
||||
*
|
||||
* https://github.com/antlr/antlr4/issues/841
|
||||
*/
|
||||
class ParseTreeListener {
|
||||
public:
|
||||
virtual void visitTerminal(Ref<TerminalNode> node) = 0;
|
||||
|
|
|
@ -45,15 +45,23 @@ namespace tree {
|
|||
/// between parse trees and other kinds of syntax trees we might want to create.
|
||||
/// </summary>
|
||||
class SyntaxTree : public Tree {
|
||||
/// <summary>
|
||||
/// Return an <seealso cref="Interval"/> indicating the index in the
|
||||
/// <seealso cref="TokenStream"/> of the first and last token associated with this
|
||||
/// subtree. If this node is a leaf, then the interval represents a single
|
||||
/// token.
|
||||
/// <p/>
|
||||
/// If source interval is unknown, this returns <seealso cref="Interval#INVALID"/>.
|
||||
/// </summary>
|
||||
public:
|
||||
/**
|
||||
* Return an {@link Interval} indicating the index in the
|
||||
* {@link TokenStream} of the first and last token associated with this
|
||||
* subtree. If this node is a leaf, then the interval represents a single
|
||||
* token and has interval i..i for token index i.
|
||||
*
|
||||
* <p>An interval of i..i-1 indicates an empty interval at position
|
||||
* i in the input stream, where 0 <= i <= the size of the input
|
||||
* token stream. Currently, the code base can only have i=0..n-1 but
|
||||
* in concept one could have an empty interval after EOF. </p>
|
||||
*
|
||||
* <p>If source interval is unknown, this returns {@link Interval#INVALID}.</p>
|
||||
*
|
||||
* <p>As a weird special case, the source interval for rules matched after
|
||||
* EOF is unspecified.</p>
|
||||
*/
|
||||
virtual misc::Interval getSourceInterval() = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -32,3 +32,7 @@
|
|||
#include "Tree.h"
|
||||
|
||||
using namespace org::antlr::v4::runtime::tree;
|
||||
|
||||
bool Tree::operator == (const Tree &other) const {
|
||||
return &other == this;
|
||||
}
|
||||
|
|
|
@ -78,6 +78,8 @@ namespace tree {
|
|||
|
||||
virtual std::wstring toString() = 0;
|
||||
|
||||
virtual bool operator == (const Tree &other) const;
|
||||
|
||||
protected:
|
||||
virtual std::weak_ptr<Tree> getParentReference() = 0;
|
||||
virtual Ref<Tree> getChildReference(size_t i) = 0;
|
||||
|
|
|
@ -34,14 +34,22 @@
|
|||
#include "ParserRuleContext.h"
|
||||
#include "CPPUtils.h"
|
||||
#include "TerminalNodeImpl.h"
|
||||
#include "ATN.h"
|
||||
#include "Interval.h"
|
||||
#include "CommonToken.h"
|
||||
#include "Predicate.h"
|
||||
|
||||
#include "Trees.h"
|
||||
|
||||
using namespace org::antlr::v4::runtime;
|
||||
using namespace org::antlr::v4::runtime::misc;
|
||||
using namespace org::antlr::v4::runtime::tree;
|
||||
|
||||
using namespace antlrcpp;
|
||||
|
||||
Trees::Trees() {
|
||||
}
|
||||
|
||||
std::wstring Trees::toStringTree(Ref<Tree> t) {
|
||||
return toStringTree(t, nullptr);
|
||||
}
|
||||
|
@ -76,11 +84,15 @@ std::wstring Trees::getNodeText(Ref<Tree> t, Parser *recog) {
|
|||
|
||||
std::wstring Trees::getNodeText(Ref<Tree> t, const std::vector<std::wstring> &ruleNames) {
|
||||
if (ruleNames.size() > 0) {
|
||||
if (is<RuleNode>(t)) {
|
||||
ssize_t ruleIndex = (std::static_pointer_cast<RuleNode>(t))->getRuleContext()->getRuleIndex();
|
||||
if (is<RuleContext>(t)) {
|
||||
ssize_t ruleIndex = std::static_pointer_cast<RuleContext>(t)->getRuleContext()->getRuleIndex();
|
||||
if (ruleIndex < 0)
|
||||
return L"Invalid Rule Index";
|
||||
std::wstring ruleName = ruleNames[(size_t)ruleIndex];
|
||||
int altNumber = std::static_pointer_cast<RuleContext>(t)->getAltNumber();
|
||||
if (altNumber != atn::ATN::INVALID_ALT_NUMBER) {
|
||||
return ruleName + L":" + std::to_wstring(altNumber);
|
||||
}
|
||||
return ruleName;
|
||||
} else if (is<ErrorNode>(t)) {
|
||||
return t->toString();
|
||||
|
@ -141,6 +153,21 @@ static void _findAllNodes(Ref<ParseTree> t, int index, bool findTokens, std::vec
|
|||
}
|
||||
}
|
||||
|
||||
bool Trees::isAncestorOf(Ref<Tree> t, Ref<Tree> u) {
|
||||
if (t == nullptr || u == nullptr || t->getParent().expired()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Ref<Tree> p = u->getParent().lock();
|
||||
while (p != nullptr) {
|
||||
if (t == p) {
|
||||
return true;
|
||||
}
|
||||
p = p->getParent().lock();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<Ref<ParseTree>> Trees::findAllTokenNodes(Ref<ParseTree> t, int ttype) {
|
||||
return findAllNodes(t, ttype, true);
|
||||
}
|
||||
|
@ -155,12 +182,12 @@ std::vector<Ref<ParseTree>> Trees::findAllNodes(Ref<ParseTree> t, int index, boo
|
|||
return nodes;
|
||||
}
|
||||
|
||||
std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
|
||||
std::vector<Ref<ParseTree>> Trees::getDescendants(Ref<ParseTree> t) {
|
||||
std::vector<Ref<ParseTree>> nodes;
|
||||
nodes.push_back(t);
|
||||
std::size_t n = t->getChildCount();
|
||||
for (size_t i = 0 ; i < n ; i++) {
|
||||
auto descentants = descendants(t->getChild(i));
|
||||
auto descentants = getDescendants(t->getChild(i));
|
||||
for (auto entry: descentants) {
|
||||
nodes.push_back(entry);
|
||||
}
|
||||
|
@ -168,5 +195,62 @@ std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
|
|||
return nodes;
|
||||
}
|
||||
|
||||
Trees::Trees() {
|
||||
std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
|
||||
return getDescendants(t);
|
||||
}
|
||||
|
||||
Ref<ParserRuleContext> Trees::getRootOfSubtreeEnclosingRegion(Ref<ParseTree> t, size_t startTokenIndex,
|
||||
size_t stopTokenIndex) {
|
||||
size_t n = t->getChildCount();
|
||||
for (size_t i = 0; i<n; i++) {
|
||||
Ref<ParseTree> child = t->getChild(i);
|
||||
Ref<ParserRuleContext> r = getRootOfSubtreeEnclosingRegion(child, startTokenIndex, stopTokenIndex);
|
||||
if (r != nullptr) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
if (is<ParserRuleContext>(t)) {
|
||||
Ref<ParserRuleContext> r = std::static_pointer_cast<ParserRuleContext>(t);
|
||||
if ((int)startTokenIndex >= r->getStart()->getTokenIndex() && // is range fully contained in t?
|
||||
(r->getStop() == nullptr || (int)stopTokenIndex <= r->getStop()->getTokenIndex())) {
|
||||
// note: r.getStop()==null likely implies that we bailed out of parser and there's nothing to the right
|
||||
return r;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Trees::stripChildrenOutOfRange(Ref<ParserRuleContext> t, Ref<ParserRuleContext> root, size_t startIndex, size_t stopIndex) {
|
||||
if (t == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < t->getChildCount(); ++i) {
|
||||
Ref<ParseTree> child = t->getChild(i);
|
||||
Interval range = child->getSourceInterval();
|
||||
if (is<ParserRuleContext>(child) && (range.b < (int)startIndex || range.a > (int)stopIndex)) {
|
||||
if (isAncestorOf(child, root)) { // replace only if subtree doesn't have displayed root
|
||||
Ref<CommonToken> abbrev = std::make_shared<CommonToken>(Token::INVALID_TYPE, L"...");
|
||||
t->children[i] = std::make_shared<TerminalNodeImpl>(abbrev);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ref<Tree> Trees::findNodeSuchThat(Ref<Tree> t, Ref<Predicate<Tree>> pred) {
|
||||
if (pred->test(t)) {
|
||||
return t;
|
||||
}
|
||||
|
||||
size_t n = t->getChildCount();
|
||||
for (size_t i = 0 ; i < n ; ++i) {
|
||||
Ref<Tree> u = findNodeSuchThat(t->getChild(i), pred);
|
||||
if (u != nullptr) {
|
||||
return u;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -68,12 +68,52 @@ namespace tree {
|
|||
/// Return a list of all ancestors of this node. The first node of
|
||||
/// list is the root and the last is the parent of this node.
|
||||
static std::vector<std::weak_ptr<Tree>> getAncestors(Ref<Tree> t);
|
||||
|
||||
/** Return true if t is u's parent or a node on path to root from u.
|
||||
* Use == not equals().
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
static bool isAncestorOf(Ref<Tree> t, Ref<Tree> u);
|
||||
static std::vector<Ref<ParseTree>> findAllTokenNodes(Ref<ParseTree> t, int ttype);
|
||||
static std::vector<Ref<ParseTree>> findAllRuleNodes(Ref<ParseTree> t, int ruleIndex);
|
||||
static std::vector<Ref<ParseTree>> findAllNodes(Ref<ParseTree> t, int index, bool findTokens);
|
||||
|
||||
static std::vector<Ref<ParseTree>> descendants(Ref<ParseTree> t);
|
||||
/** Get all descendents; includes t itself.
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
static std::vector<Ref<ParseTree>> getDescendants(Ref<ParseTree> t);
|
||||
|
||||
/** @deprecated */
|
||||
static std::vector<Ref<ParseTree>> descendants(Ref<ParseTree> t);
|
||||
|
||||
/** Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex
|
||||
* inclusively using postorder traversal. Recursive depth-first-search.
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
static Ref<ParserRuleContext> getRootOfSubtreeEnclosingRegion(Ref<ParseTree> t,
|
||||
size_t startTokenIndex, // inclusive
|
||||
size_t stopTokenIndex); // inclusive
|
||||
|
||||
/** Replace any subtree siblings of root that are completely to left
|
||||
* or right of lookahead range with a CommonToken(Token.INVALID_TYPE,"...")
|
||||
* node. The source interval for t is not altered to suit smaller range!
|
||||
*
|
||||
* WARNING: destructive to t.
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
static void stripChildrenOutOfRange(Ref<ParserRuleContext> t, Ref<ParserRuleContext> root, size_t startIndex,
|
||||
size_t stopIndex);
|
||||
|
||||
/** Return first node satisfying the pred
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
static Ref<Tree> findNodeSuchThat(Ref<Tree> t, Ref<misc::Predicate<Tree>> pred);
|
||||
|
||||
private:
|
||||
Trees();
|
||||
};
|
||||
|
|
|
@ -47,7 +47,7 @@ ParseTreeMatch::ParseTreeMatch(Ref<ParseTree> tree, const ParseTreePattern &patt
|
|||
|
||||
Ref<ParseTree> ParseTreeMatch::get(const std::wstring &label) {
|
||||
auto iterator = _labels.find(label);
|
||||
if (iterator == _labels.end()) {
|
||||
if (iterator == _labels.end() || iterator->second.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "TagChunk.h"
|
||||
#include "ATN.h"
|
||||
#include "Lexer.h"
|
||||
#include "BailErrorStrategy.h"
|
||||
|
||||
#include "ListTokenSource.h"
|
||||
#include "TextChunk.h"
|
||||
|
@ -56,7 +57,7 @@ using namespace org::antlr::v4::runtime::tree;
|
|||
using namespace org::antlr::v4::runtime::tree::pattern;
|
||||
using namespace antlrcpp;
|
||||
|
||||
ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(std::exception e) {
|
||||
ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(const RuntimeException &e) : RuntimeException(e.what()) {
|
||||
}
|
||||
|
||||
ParseTreePatternMatcher::ParseTreePatternMatcher(Lexer *lexer, Parser *parser) : _lexer(lexer), _parser(parser) {
|
||||
|
@ -109,12 +110,17 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::wstring &pattern, i
|
|||
delete tokens;
|
||||
});
|
||||
|
||||
ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getTokenNames(),
|
||||
ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getVocabulary(),
|
||||
_parser->getRuleNames(), _parser->getATNWithBypassAlts(), tokens);
|
||||
|
||||
Ref<ParserRuleContext> tree;
|
||||
try {
|
||||
Ref<ParserRuleContext> context = parserInterp.parse(patternRuleIndex);
|
||||
return ParseTreePattern(this, pattern, patternRuleIndex, context);
|
||||
parserInterp.setErrorHandler(std::make_shared<BailErrorStrategy>());
|
||||
tree = parserInterp.parse(patternRuleIndex);
|
||||
} catch (ParseCancellationException &e) {
|
||||
std::rethrow_if_nested(e);
|
||||
} catch (RecognitionException &re) {
|
||||
throw re;
|
||||
} catch (std::exception &e) {
|
||||
#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026
|
||||
// throw_with_nested is not available before VS 2015.
|
||||
|
@ -124,6 +130,12 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::wstring &pattern, i
|
|||
#endif
|
||||
}
|
||||
|
||||
// Make sure tree pattern compilation checks for a complete parse
|
||||
if (tokens->LA(1) != EOF) {
|
||||
throw StartRuleDoesNotConsumeFullPattern();
|
||||
}
|
||||
|
||||
return ParseTreePattern(this, pattern, patternRuleIndex, tree);
|
||||
}
|
||||
|
||||
Lexer* ParseTreePatternMatcher::getLexer() {
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Token.h"
|
||||
#include "Exceptions.h"
|
||||
|
||||
namespace org {
|
||||
namespace antlr {
|
||||
|
@ -99,9 +99,14 @@ namespace pattern {
|
|||
/// </summary>
|
||||
class ParseTreePatternMatcher {
|
||||
public:
|
||||
class CannotInvokeStartRule : public std::exception {
|
||||
class CannotInvokeStartRule : public RuntimeException {
|
||||
public:
|
||||
CannotInvokeStartRule(std::exception e);
|
||||
CannotInvokeStartRule(const RuntimeException &e);
|
||||
};
|
||||
|
||||
// Fixes https://github.com/antlr/antlr4/issues/413
|
||||
// "Tree pattern compilation doesn't check for a complete parse"
|
||||
class StartRuleDoesNotConsumeFullPattern : public RuntimeException {
|
||||
};
|
||||
|
||||
/// Constructs a <seealso cref="ParseTreePatternMatcher"/> or from a <seealso cref="Lexer"/> and
|
||||
|
|
Loading…
Reference in New Issue