Switching to current ANTLR revision, final part.

2016-05-01 12:41:32 +02:00 · 2016-05-01 12:41:32 +02:00 · bfcb0a71cb
parent d4ebdfa138
commit bfcb0a71cb
47 changed files with 1364 additions and 516 deletions
--- a/runtime/Cpp/README.md
+++ b/runtime/Cpp/README.md
@ -1,6 +1,6 @@
 # C++ target for ANTLR 4

-This fork provides C++ runtime support for C++.  See [the canonical antlr4 repository](https://github.com/antlr/antlr4) for in depth detail about how to use Antlr4.
+This folder contains the C++ runtime support for ANTLR.  See [the canonical antlr4 repository](https://github.com/antlr/antlr4) for in depth detail about how to use ANTLR 4.

 ## Authors and major contributors

--- a/runtime/Cpp/demo/Mac/antlr4-cpp-demo/main.cpp
+++ b/runtime/Cpp/demo/Mac/antlr4-cpp-demo/main.cpp
@ -17,14 +17,6 @@
 using namespace antlrcpptest;
 using namespace org::antlr::v4::runtime;

-class A {
-public:
-  static void doit(const A &a) {
-    size_t i = a.counter;
-  }
-private:
-  size_t counter;
-};
 int main(int argc, const char * argv[]) {

  ANTLRInputStream input(L"divideŴ and conquer");
--- a/runtime/Cpp/demo/Mac/antlrcpp.xcodeproj/project.pbxproj
+++ b/runtime/Cpp/demo/Mac/antlrcpp.xcodeproj/project.pbxproj
@ -57,6 +57,10 @@
 		278A66FC1C95838E002D667E /* ANTLRErrorListener.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 278A66FA1C95838E002D667E /* ANTLRErrorListener.cpp */; };
 		27A23EA31CC2A8D60036D8A3 /* TLexer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A23EA11CC2A8D60036D8A3 /* TLexer.cpp */; };
 		27A23EA41CC2A8D60036D8A3 /* TLexer.h in Headers */ = {isa = PBXBuildFile; fileRef = 27A23EA21CC2A8D60036D8A3 /* TLexer.h */; };
+		27B4A79A1CD605BB00FCCD3E /* Predicate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27B4A7981CD605BB00FCCD3E /* Predicate.cpp */; };
+		27B4A79B1CD605BB00FCCD3E /* Predicate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27B4A7981CD605BB00FCCD3E /* Predicate.cpp */; };
+		27B4A79C1CD605BB00FCCD3E /* Predicate.h in Headers */ = {isa = PBXBuildFile; fileRef = 27B4A7991CD605BB00FCCD3E /* Predicate.h */; };
+		27B4A79D1CD605BB00FCCD3E /* Predicate.h in Headers */ = {isa = PBXBuildFile; fileRef = 27B4A7991CD605BB00FCCD3E /* Predicate.h */; };
 		27C62E261CD269C90088721B /* ParseInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27C62E241CD269C90088721B /* ParseInfo.cpp */; };
 		27C62E271CD269C90088721B /* ParseInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27C62E241CD269C90088721B /* ParseInfo.cpp */; };
 		27C62E281CD269C90088721B /* ParseInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 27C62E251CD269C90088721B /* ParseInfo.h */; };
@ -649,6 +653,8 @@
 		278A66FA1C95838E002D667E /* ANTLRErrorListener.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ANTLRErrorListener.cpp; path = ../../runtime/ANTLRErrorListener.cpp; sourceTree = SOURCE_ROOT; };
 		27A23EA11CC2A8D60036D8A3 /* TLexer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TLexer.cpp; path = ../generated/TLexer.cpp; sourceTree = "<group>"; wrapsLines = 0; };
 		27A23EA21CC2A8D60036D8A3 /* TLexer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TLexer.h; path = ../generated/TLexer.h; sourceTree = "<group>"; };
+		27B4A7981CD605BB00FCCD3E /* Predicate.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Predicate.cpp; sourceTree = "<group>"; };
+		27B4A7991CD605BB00FCCD3E /* Predicate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Predicate.h; sourceTree = "<group>"; };
 		27C62E241CD269C90088721B /* ParseInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParseInfo.cpp; sourceTree = "<group>"; };
 		27C62E251CD269C90088721B /* ParseInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParseInfo.h; sourceTree = "<group>"; };
 		27C62E2A1CD26C780088721B /* ProfilingATNSimulator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ProfilingATNSimulator.cpp; sourceTree = "<group>"; wrapsLines = 0; };
@ -889,7 +895,7 @@
 		27C669861C9585B80021E494 /* TerminalNodeImpl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TerminalNodeImpl.h; sourceTree = "<group>"; };
 		27C669871C9585B80021E494 /* Tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tree.cpp; sourceTree = "<group>"; };
 		27C669881C9585B80021E494 /* Tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Tree.h; sourceTree = "<group>"; };
-		27C669891C9585B80021E494 /* Trees.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Trees.cpp; sourceTree = "<group>"; };
+		27C669891C9585B80021E494 /* Trees.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Trees.cpp; sourceTree = "<group>"; wrapsLines = 0; };
 		27C6698A1C9585B80021E494 /* Trees.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Trees.h; sourceTree = "<group>"; wrapsLines = 0; };
 		27C669F01C958AB30021E494 /* Chunk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Chunk.cpp; path = pattern/Chunk.cpp; sourceTree = "<group>"; };
 		27C669F11C958AB30021E494 /* ParseTreeMatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ParseTreeMatch.cpp; path = pattern/ParseTreeMatch.cpp; sourceTree = "<group>"; wrapsLines = 0; };
@ -1136,6 +1142,8 @@
 		27C6687D1C9584E90021E494 /* misc */ = {
 			isa = PBXGroup;
 			children = (
+				27B4A7981CD605BB00FCCD3E /* Predicate.cpp */,
+				27B4A7991CD605BB00FCCD3E /* Predicate.h */,
 				27C668881C9584FA0021E494 /* Interval.cpp */,
 				27C668891C9584FA0021E494 /* Interval.h */,
 				27C6688A1C9584FA0021E494 /* IntervalSet.cpp */,
@ -1512,6 +1520,7 @@
 				27C668631C95846E0021E494 /* WildcardTransition.h in Headers */,
 				27C667C11C95846E0021E494 /* ATNDeserializationOptions.h in Headers */,
 				27C667BD1C95846E0021E494 /* ATNConfigSet.h in Headers */,
+				27B4A79D1CD605BB00FCCD3E /* Predicate.h in Headers */,
 				27C667F51C95846E0021E494 /* EpsilonTransition.h in Headers */,
 				27C667F91C95846E0021E494 /* LexerATNConfig.h in Headers */,
 				27C669031C9585230021E494 /* BitSet.h in Headers */,
@ -1596,6 +1605,7 @@
 				27C6683E1C95846E0021E494 /* RuleTransition.h in Headers */,
 				27C667BC1C95846E0021E494 /* ATNConfigSet.h in Headers */,
 				275ECC4E1CCCD95B00E79E2A /* LexerAction.h in Headers */,
+				27B4A79C1CD605BB00FCCD3E /* Predicate.h in Headers */,
 				275DB3E91CCD23C000D8C543 /* LexerModeAction.h in Headers */,
 				27C666F21C9584050021E494 /* InterpreterRuleContext.h in Headers */,
 				27C667F41C95846E0021E494 /* EpsilonTransition.h in Headers */,
@ -1892,6 +1902,7 @@
 				27C62E3F1CD272480088721B /* DecisionEventInfo.cpp in Sources */,
 				27C667311C9584050021E494 /* TokenSource.cpp in Sources */,
 				27C668CB1C9584FA0021E494 /* MurmurHash.cpp in Sources */,
+				27B4A79B1CD605BB00FCCD3E /* Predicate.cpp in Sources */,
 				27C669A81C9585B80021E494 /* ParseTreeWalker.cpp in Sources */,
 				27C62E511CD275C50088721B /* ContextSensitivityInfo.cpp in Sources */,
 				27C666ED1C9584050021E494 /* InputMismatchException.cpp in Sources */,
@ -2123,6 +2134,7 @@
 				27C66A0C1C958AB30021E494 /* TextChunk.cpp in Sources */,
 				27C6682C1C95846E0021E494 /* PredictionMode.cpp in Sources */,
 				27C667C61C95846E0021E494 /* ATNSerializer.cpp in Sources */,
+				27B4A79A1CD605BB00FCCD3E /* Predicate.cpp in Sources */,
 				27C668341C95846E0021E494 /* RuleStartState.cpp in Sources */,
 				27C668481C95846E0021E494 /* SingletonPredictionContext.cpp in Sources */,
 				27C668751C9584B60021E494 /* DFAState.cpp in Sources */,
--- a/runtime/Cpp/demo/README.md
+++ b/runtime/Cpp/demo/README.md
@ -0,0 +1,12 @@
+# Demo application for the ANTLR 4 C++ target
+
+This demo app shows how to build the ANTLR runtime both as dynamic and static library and how to use a parser generated from a simple demo grammar.
+
+A few steps are necessary to get this to work:
+
+- Download the current ANTLR jar and place it in this folder.
+- Open the generation script for your platform (generate.cmd for Windows, generate.sh for *nix/OSX) and update the LOCATION var to the actual name of the jar you downloaded.
+- Run the generation script. This will generate a test parser + lexer, along with listener + visitor classes in a subfolder named "generated". This is where the demo application looks for these files.
+- Open the project in the folder that matches your system.
+- Compile and run.
+
--- a/runtime/Cpp/demo/antlr-4.1.1-dev-complete.jar
+++ b/runtime/Cpp/demo/antlr-4.1.1-dev-complete.jar
--- a/runtime/Cpp/demo/generate.sh
+++ b/runtime/Cpp/demo/generate.sh
@ -9,8 +9,8 @@ set -o errexit
 # There are 2 ways of running the ANTLR generator here.

 # 1) Running from jar. Use the given jar (or replace it by another one you built or downloaded) for generation.
-#LOCATION=antlr-4.1.1-dev-complete.jar
-#java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
+LOCATION=antlr-4.1.1-dev-complete.jar
+java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
 #java -jar $LOCATION -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest -XdbgST TLexer.g4 TParser.g4
 #java -jar $LOCATION -Dlanguage=Java -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4

@ -21,9 +21,9 @@ set -o errexit
 #    Furthermore is assumed that the antlr3 folder is located side-by-side with the antlr4 folder. Adjust CLASSPATH if not.
 #    This approach is especially useful if you are working on a target stg file, as it doesn't require to regenerate the
 #    antlr jar over and over again.
-CLASSPATH=../../../tool/resources/:ST-4.0.8.jar:../../../tool/target/classes:../../../runtime/Java/target/classes:../../../../antlr3/runtime/Java/target/classes
+#CLASSPATH=../../../tool/resources/:ST-4.0.8.jar:../../../tool/target/classes:../../../runtime/Java/target/classes:../../../../antlr3/runtime/Java/target/classes

-java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
+#java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest TLexer.g4 TParser.g4
 #java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Cpp -listener -visitor -o generated/ -package antlrcpptest -XdbgST TLexer.g4 TParser.g4
 #java -cp $CLASSPATH org.antlr.v4.Tool -Dlanguage=Java -listener -visitor -o generated/ TLexer.g4 TParser.g4

--- a/runtime/Cpp/runtime/ANTLRErrorListener.h
+++ b/runtime/Cpp/runtime/ANTLRErrorListener.h
@ -80,86 +80,113 @@ namespace runtime {
    virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
                             const std::wstring &msg, std::exception_ptr e) = 0;

-    /// <summary>
-    /// This method is called by the parser when a full-context prediction
-    /// results in an ambiguity.
-    /// <p/>
-    /// When {@code exact} is {@code true}, <em>all</em> of the alternatives in
-    /// {@code ambigAlts} are viable, i.e. this is reporting an exact ambiguity.
-    /// When {@code exact} is {@code false}, <em>at least two</em> of the
-    /// alternatives in {@code ambigAlts} are viable for the current input, but
-    /// the prediction algorithm terminated as soon as it determined that at
-    /// least the <em>minimum</em> alternative in {@code ambigAlts} is viable.
-    /// <p/>
-    /// When the <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> prediction mode
-    /// is used, the parser is required to identify exact ambiguities so
-    /// {@code exact} will always be {@code true}.
-    /// <p/>
-    /// This method is not used by lexers.
-    /// </summary>
-    /// <param name="recognizer"> the parser instance </param>
-    /// <param name="dfa"> the DFA for the current decision </param>
-    /// <param name="startIndex"> the input index where the decision started </param>
-    /// <param name="stopIndex"> the input input where the ambiguity is reported </param>
-    /// <param name="exact"> {@code true} if the ambiguity is exactly known, otherwise
-    /// {@code false}. This is always {@code true} when
-    /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used. </param>
-    /// <param name="ambigAlts"> the potentially ambiguous alternatives </param>
-    /// <param name="configs"> the ATN configuration set where the ambiguity was
-    /// determined </param>
+    /**
+     * This method is called by the parser when a full-context prediction
+     * results in an ambiguity.
+     *
+     * <p>Each full-context prediction which does not result in a syntax error
+     * will call either {@link #reportContextSensitivity} or
+     * {@link #reportAmbiguity}.</p>
+     *
+     * <p>When {@code ambigAlts} is not null, it contains the set of potentially
+     * viable alternatives identified by the prediction algorithm. When
+     * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the
+     * represented alternatives from the {@code configs} argument.</p>
+     *
+     * <p>When {@code exact} is {@code true}, <em>all</em> of the potentially
+     * viable alternatives are truly viable, i.e. this is reporting an exact
+     * ambiguity. When {@code exact} is {@code false}, <em>at least two</em> of
+     * the potentially viable alternatives are viable for the current input, but
+     * the prediction algorithm terminated as soon as it determined that at
+     * least the <em>minimum</em> potentially viable alternative is truly
+     * viable.</p>
+     *
+     * <p>When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction
+     * mode is used, the parser is required to identify exact ambiguities so
+     * {@code exact} will always be {@code true}.</p>
+     *
+     * <p>This method is not used by lexers.</p>
+     *
+     * @param recognizer the parser instance
+     * @param dfa the DFA for the current decision
+     * @param startIndex the input index where the decision started
+     * @param stopIndex the input input where the ambiguity was identified
+     * @param exact {@code true} if the ambiguity is exactly known, otherwise
+     * {@code false}. This is always {@code true} when
+     * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used.
+     * @param ambigAlts the potentially ambiguous alternatives, or {@code null}
+     * to indicate that the potentially ambiguous alternatives are the complete
+     * set of represented alternatives in {@code configs}
+     * @param configs the ATN configuration set where the ambiguity was
+     * identified
+     */
    virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact,
      const antlrcpp::BitSet &ambigAlts, Ref<atn::ATNConfigSet> configs) = 0;

-    /// <summary>
-    /// This method is called when an SLL conflict occurs and the parser is about
-    /// to use the full context information to make an LL decision.
-    /// <p/>
-    /// If one or more configurations in {@code configs} contains a semantic
-    /// predicate, the predicates are evaluated before this method is called. The
-    /// subset of alternatives which are still viable after predicates are
-    /// evaluated is reported in {@code conflictingAlts}.
-    /// <p/>
-    /// This method is not used by lexers.
-    /// </summary>
-    /// <param name="recognizer"> the parser instance </param>
-    /// <param name="dfa"> the DFA for the current decision </param>
-    /// <param name="startIndex"> the input index where the decision started </param>
-    /// <param name="stopIndex"> the input index where the SLL conflict occurred </param>
-    /// <param name="conflictingAlts"> The specific conflicting alternatives. If this is
-    /// {@code null}, the conflicting alternatives are all alternatives
-    /// represented in {@code configs}. </param>
-    /// <param name="configs"> the ATN configuration set where the SLL conflict was
-    /// detected </param>
+    /**
+     * This method is called when an SLL conflict occurs and the parser is about
+     * to use the full context information to make an LL decision.
+     *
+     * <p>If one or more configurations in {@code configs} contains a semantic
+     * predicate, the predicates are evaluated before this method is called. The
+     * subset of alternatives which are still viable after predicates are
+     * evaluated is reported in {@code conflictingAlts}.</p>
+     *
+     * <p>This method is not used by lexers.</p>
+     *
+     * @param recognizer the parser instance
+     * @param dfa the DFA for the current decision
+     * @param startIndex the input index where the decision started
+     * @param stopIndex the input index where the SLL conflict occurred
+     * @param conflictingAlts The specific conflicting alternatives. If this is
+     * {@code null}, the conflicting alternatives are all alternatives
+     * represented in {@code configs}. At the moment, conflictingAlts is non-null
+     * (for the reference implementation, but Sam's optimized version can see this
+     * as null).
+     * @param configs the ATN configuration set where the SLL conflict was
+     * detected
+     */
    virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex,
      const antlrcpp::BitSet &conflictingAlts, Ref<atn::ATNConfigSet> configs) = 0;

-    /// <summary>
-    /// This method is called by the parser when a full-context prediction has a
-    /// unique result.
-    /// <p/>
-    /// For prediction implementations that only evaluate full-context
-    /// predictions when an SLL conflict is found (including the default
-    /// <seealso cref="ParserATNSimulator"/> implementation), this method reports cases
-    /// where SLL conflicts were resolved to unique full-context predictions,
-    /// i.e. the decision was context-sensitive. This report does not necessarily
-    /// indicate a problem, and it may appear even in completely unambiguous
-    /// grammars.
-    /// <p/>
-    /// {@code configs} may have more than one represented alternative if the
-    /// full-context prediction algorithm does not evaluate predicates before
-    /// beginning the full-context prediction. In all cases, the final prediction
-    /// is passed as the {@code prediction} argument.
-    /// <p/>
-    /// This method is not used by lexers.
-    /// </summary>
-    /// <param name="recognizer"> the parser instance </param>
-    /// <param name="dfa"> the DFA for the current decision </param>
-    /// <param name="startIndex"> the input index where the decision started </param>
-    /// <param name="stopIndex"> the input index where the context sensitivity was
-    /// finally determined </param>
-    /// <param name="prediction"> the unambiguous result of the full-context prediction </param>
-    /// <param name="configs"> the ATN configuration set where the unambiguous prediction
-    /// was determined </param>
+    /**
+     * This method is called by the parser when a full-context prediction has a
+     * unique result.
+     *
+     * <p>Each full-context prediction which does not result in a syntax error
+     * will call either {@link #reportContextSensitivity} or
+     * {@link #reportAmbiguity}.</p>
+     *
+     * <p>For prediction implementations that only evaluate full-context
+     * predictions when an SLL conflict is found (including the default
+     * {@link ParserATNSimulator} implementation), this method reports cases
+     * where SLL conflicts were resolved to unique full-context predictions,
+     * i.e. the decision was context-sensitive. This report does not necessarily
+     * indicate a problem, and it may appear even in completely unambiguous
+     * grammars.</p>
+     *
+     * <p>{@code configs} may have more than one represented alternative if the
+     * full-context prediction algorithm does not evaluate predicates before
+     * beginning the full-context prediction. In all cases, the final prediction
+     * is passed as the {@code prediction} argument.</p>
+     *
+     * <p>Note that the definition of "context sensitivity" in this method
+     * differs from the concept in {@link DecisionInfo#contextSensitivities}.
+     * This method reports all instances where an SLL conflict occurred but LL
+     * parsing produced a unique result, whether or not that unique result
+     * matches the minimum alternative in the SLL conflicting set.</p>
+     *
+     * <p>This method is not used by lexers.</p>
+     *
+     * @param recognizer the parser instance
+     * @param dfa the DFA for the current decision
+     * @param startIndex the input index where the decision started
+     * @param stopIndex the input index where the context sensitivity was
+     * finally determined
+     * @param prediction the unambiguous result of the full-context prediction
+     * @param configs the ATN configuration set where the unambiguous prediction
+     * was determined
+     */
    virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex,
      int prediction, Ref<atn::ATNConfigSet> configs) = 0;
  };
--- a/runtime/Cpp/runtime/ANTLRErrorStrategy.h
+++ b/runtime/Cpp/runtime/ANTLRErrorStrategy.h
@ -65,20 +65,24 @@ namespace runtime {

    virtual void reset(Parser *recognizer) = 0;

-    /// <summary>
-    /// This method is called when an unexpected symbol is encountered during an
-    /// inline match operation, such as <seealso cref="Parser#match"/>. If the error
-    /// strategy successfully recovers from the match failure, this method
-    /// returns the <seealso cref="Token"/> instance which should be treated as the
-    /// successful result of the match.
-    /// <p/>
-    /// Note that the calling code will not report an error if this method
-    /// returns successfully. The error strategy implementation is responsible
-    /// for calling <seealso cref="Parser#notifyErrorListeners"/> as appropriate.
-    /// </summary>
-    /// <param name="recognizer"> the parser instance </param>
-    /// <exception cref="RecognitionException"> if the error strategy was not able to
-    /// recover from the unexpected input symbol </exception>
+    /**
+     * This method is called when an unexpected symbol is encountered during an
+     * inline match operation, such as {@link Parser#match}. If the error
+     * strategy successfully recovers from the match failure, this method
+     * returns the {@link Token} instance which should be treated as the
+     * successful result of the match.
+     *
+     * <p>This method handles the consumption of any tokens - the caller should
+     * <b>not</b> call {@link Parser#consume} after a successful recovery.</p>
+     *
+     * <p>Note that the calling code will not report an error if this method
+     * returns successfully. The error strategy implementation is responsible
+     * for calling {@link Parser#notifyErrorListeners} as appropriate.</p>
+     *
+     * @param recognizer the parser instance
+     * @throws RecognitionException if the error strategy was not able to
+     * recover from the unexpected input symbol
+     */
    virtual Ref<Token> recoverInline(Parser *recognizer) = 0;

    /// <summary>
--- a/runtime/Cpp/runtime/ANTLRInputStream.cpp
+++ b/runtime/Cpp/runtime/ANTLRInputStream.cpp
@ -29,9 +29,10 @@
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <algorithm>
 #include "Exceptions.h"
 #include "Interval.h"
+#include "IntStream.h"
+
 #include "Arrays.h"
 #include "CPPUtils.h"

@ -139,8 +140,9 @@ void ANTLRInputStream::seek(size_t index) {
    p = index; // just jump; don't update stream state (line, ...)
    return;
  }
-  // seek forward, consume until p hits index
-  while (p < index && index < data.size()) {
+  // seek forward, consume until p hits index or n (whichever comes first)
+  index = std::min(index, data.size());
+  while (p < index) {
    consume();
  }
 }
@ -162,6 +164,9 @@ std::wstring ANTLRInputStream::getText(const Interval &interval) {
 }

 std::string ANTLRInputStream::getSourceName() const {
+  if (name.empty()) {
+    return IntStream::UNKNOWN_SOURCE_NAME;
+  }
  return name;
 }

--- a/runtime/Cpp/runtime/BailErrorStrategy.h
+++ b/runtime/Cpp/runtime/BailErrorStrategy.h
@ -38,7 +38,34 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

-  /// Bail out of parser at first syntax error. Use myparser.setErrorHandler(..) to set a different strategy.
+  /**
+   * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
+   * by immediately canceling the parse operation with a
+   * {@link ParseCancellationException}. The implementation ensures that the
+   * {@link ParserRuleContext#exception} field is set for all parse tree nodes
+   * that were not completed prior to encountering the error.
+   *
+   * <p>
+   * This error strategy is useful in the following scenarios.</p>
+   *
+   * <ul>
+   * <li><strong>Two-stage parsing:</strong> This error strategy allows the first
+   * stage of two-stage parsing to immediately terminate if an error is
+   * encountered, and immediately fall back to the second stage. In addition to
+   * avoiding wasted work by attempting to recover from errors here, the empty
+   * implementation of {@link BailErrorStrategy#sync} improves the performance of
+   * the first stage.</li>
+   * <li><strong>Silent validation:</strong> When syntax errors are not being
+   * reported or logged, and the parse result is simply ignored if errors occur,
+   * the {@link BailErrorStrategy} avoids wasting work on recovering from errors
+   * when the result will be ignored either way.</li>
+   * </ul>
+   *
+   * <p>
+   * {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
+   *
+   * @see Parser#setErrorHandler(ANTLRErrorStrategy)
+   */
  class BailErrorStrategy : public DefaultErrorStrategy {
    /// <summary>
    /// Instead of recovering from exception {@code e}, re-throw it wrapped
--- a/runtime/Cpp/runtime/BaseErrorListener.h
+++ b/runtime/Cpp/runtime/BaseErrorListener.h
@ -42,6 +42,11 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

+  /**
+   * Provides an empty default implementation of {@link ANTLRErrorListener}. The
+   * default implementation of each method does nothing, but can be overridden as
+   * necessary.
+   */
  class BaseErrorListener : public ANTLRErrorListener {

    virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
--- a/runtime/Cpp/runtime/BufferedTokenStream.cpp
+++ b/runtime/Cpp/runtime/BufferedTokenStream.cpp
@ -75,7 +75,22 @@ size_t BufferedTokenStream::size() {
 }

 void BufferedTokenStream::consume() {
-  if (LA(1) == EOF) {
+  bool skipEofCheck = false;
+  if (!_needSetup) {
+    if (_fetchedEOF) {
+      // the last token in tokens is EOF. skip check if p indexes any
+      // fetched token except the last.
+      skipEofCheck = _p < _tokens.size() - 1;
+    } else {
+      // no EOF token in tokens. skip check if p indexes a fetched token.
+      skipEofCheck = _p < _tokens.size();
+    }
+  } else {
+    // not yet initialized
+    skipEofCheck = false;
+  }
+
+  if (!skipEofCheck && LA(1) == EOF) {
    throw IllegalStateException("cannot consume EOF");
  }

@ -246,13 +261,13 @@ std::vector<Ref<Token>> BufferedTokenStream::getTokens(int start, int stop, int
 ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, int channel) {
  sync(i);
  if (i >= size()) {
-    return -1;
+    return size() - 1;
  }

  Ref<Token> token = _tokens[i];
  while (token->getChannel() != channel) {
    if (token->getType() == EOF) {
-      return -1;
+      return i;
    }
    i++;
    sync(i);
@ -261,15 +276,24 @@ ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, int channel) {
  return i;
 }

-ssize_t BufferedTokenStream::previousTokenOnChannel(ssize_t i, int channel) const {
-  do {
-    if (_tokens[(size_t)i]->getChannel() == channel)
+ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, int channel) {
+  sync(i);
+  if (i >= size()) {
+    // the EOF token is on every channel
+    return size() - 1;
+  }
+
+  while (true) {
+    Ref<Token> token = _tokens[i];
+    if (token->getType() == EOF || token->getChannel() == channel) {
      return i;
+    }
+
    if (i == 0)
-      return -1;
+      return i;
    i--;
-  } while (true);
-  return -1;
+  }
+  return i;
 }

 std::vector<Ref<Token>> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, int channel) {
@ -301,7 +325,12 @@ std::vector<Ref<Token>> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenI
    throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
  }

-  ssize_t prevOnChannel = previousTokenOnChannel((ssize_t)tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
+  if (tokenIndex == 0) {
+    // Obviously no tokens can appear before the first token.
+    return { };
+  }
+
+  ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
  if (prevOnChannel == (ssize_t)tokenIndex - 1) {
    return { };
  }
@ -334,6 +363,10 @@ std::vector<Ref<Token>> BufferedTokenStream::filterForChannel(size_t from, size_
  return hidden;
 }

+bool BufferedTokenStream::isInitialized() const {
+  return !_needSetup;
+}
+
 /**
 * Get the text of all tokens in this buffer.
 */
--- a/runtime/Cpp/runtime/BufferedTokenStream.h
+++ b/runtime/Cpp/runtime/BufferedTokenStream.h
@ -38,16 +38,18 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

-  /// Buffer all input tokens but do on-demand fetching of new tokens from lexer.
-  /// Useful when the parser or lexer has to set context/mode info before proper
-  /// lexing of future tokens. The ST template parser needs this, for example,
-  /// because it has to constantly flip back and forth between inside/output
-  /// templates. E.g., <names:{hi, <it>}> has to parse names as part of an
-  /// expression but "hi, <it>" as a nested template.
-  ///
-  /// You can't use this stream if you pass whitespace or other off-channel tokens
-  /// to the parser. The stream can't ignore off-channel tokens.
-  /// (UnbufferedTokenStream is the same way.) Use CommonTokenStream.
+  /**
+   * This implementation of {@link TokenStream} loads tokens from a
+   * {@link TokenSource} on-demand, and places the tokens in a buffer to provide
+   * access to any previous token by index.
+   *
+   * <p>
+   * This token stream ignores the value of {@link Token#getChannel}. If your
+   * parser requires the token stream filter tokens to only those on a particular
+   * channel, such as {@link Token#DEFAULT_CHANNEL} or
+   * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
+   * {@link CommonTokenStream}.</p>
+   */
  class BufferedTokenStream : public TokenStream {
  public:
    BufferedTokenStream(TokenSource *tokenSource);
@ -120,30 +122,46 @@ namespace runtime {
    virtual void fill();

  protected:
+    /**
+     * The {@link TokenSource} from which tokens for this stream are fetched.
+     */
    TokenSource *_tokenSource;

-    /// Record every single token pulled from the source so we can reproduce
-    /// chunks of it later. This list captures everything so we can access
-    /// complete input text.
-    // ml: we own the tokens produced by the token factory.
+    /**
+     * A collection of all tokens fetched from the token source. The list is
+     * considered a complete view of the input once {@link #fetchedEOF} is set
+     * to {@code true}.
+     */
    std::vector<Ref<Token>> _tokens;

-    /// <summary>
-    /// The index into <seealso cref="#tokens"/> of the current token (next token to
-    /// consume). <seealso cref="#tokens"/>{@code [}<seealso cref="#p"/>{@code ]} should be
-    /// <seealso cref="#LT LT(1)"/>. <seealso cref="#p"/>{@code =-1} indicates need to initialize
-    /// with first token. The constructor doesn't get a token. First call to
-    /// <seealso cref="#LT LT(1)"/> or whatever gets the first token and sets
-    /// <seealso cref="#p"/>{@code =0;}.
-    /// </summary>
+    /**
+     * The index into {@link #tokens} of the current token (next token to
+     * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
+     * {@link #LT LT(1)}.
+     *
+     * <p>This field is set to -1 when the stream is first constructed or when
+     * {@link #setTokenSource} is called, indicating that the first token has
+     * not yet been fetched from the token source. For additional information,
+     * see the documentation of {@link IntStream} for a description of
+     * Initializing Methods.</p>
+     */
+    // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead.
+    //     Use bool isInitialized() to find out if this stream has started reading.
    size_t _p;

-    /// <summary>
-    /// Set to {@code true} when the EOF token is fetched. Do not continue fetching
-    /// tokens after that point, or multiple EOF tokens could end up in the
-    /// <seealso cref="#tokens"/> array.
-    /// </summary>
-    /// <seealso cref= #fetch </seealso>
+    /**
+     * Indicates whether the {@link Token#EOF} token has been fetched from
+     * {@link #tokenSource} and added to {@link #tokens}. This field improves
+     * performance for the following cases:
+     *
+     * <ul>
+     * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
+     * consuming the EOF symbol is optimized by checking the values of
+     * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
+     * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
+     * {@link #tokens} is trivial with this field.</li>
+     * <ul>
+     */
    bool _fetchedEOF;
    
    /// <summary>
@ -177,17 +195,30 @@ namespace runtime {
    void lazyInit();
    virtual void setup();

-    /// Given a starting index, return the index of the next token on channel.
-    /// Return i if tokens[i] is on channel.  Return -1 if there are no tokens
-    /// on channel between i and EOF.
+    /**
+     * Given a starting index, return the index of the next token on channel.
+     * Return {@code i} if {@code tokens[i]} is on channel. Return the index of
+     * the EOF token if there are no tokens on channel between {@code i} and
+     * EOF.
+     */
    virtual ssize_t nextTokenOnChannel(size_t i, int channel);

-    /// Given a starting index, return the index of the previous token on channel.
-    /// Return i if tokens[i] is on channel. Return -1 if there are no tokens
-    /// on channel between i and 0.
-    virtual ssize_t previousTokenOnChannel(ssize_t i, int channel) const;
+    /**
+     * Given a starting index, return the index of the previous token on
+     * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1
+     * if there are no tokens on channel between {@code i} and 0.
+     *
+     * <p>
+     * If {@code i} specifies an index at or after the EOF token, the EOF token
+     * index is returned. This is due to the fact that the EOF token is treated
+     * as though it were on every channel.</p>
+     */
+    virtual ssize_t previousTokenOnChannel(size_t i, int channel);
    
    virtual std::vector<Ref<Token>> filterForChannel(size_t from, size_t to, int channel);
+
+    bool isInitialized() const;
+
  private:
    bool _needSetup;
    void InitializeInstanceFields();
--- a/runtime/Cpp/runtime/CommonToken.cpp
+++ b/runtime/Cpp/runtime/CommonToken.cpp
@ -70,7 +70,6 @@ CommonToken::CommonToken(int type, const std::wstring &text) {

 CommonToken::CommonToken(Token *oldToken) {
  InitializeInstanceFields();
-  _text = oldToken->getText();
  _type = oldToken->getType();
  _line = oldToken->getLine();
  _index = oldToken->getTokenIndex();
@ -79,9 +78,11 @@ CommonToken::CommonToken(Token *oldToken) {
  _start = oldToken->getStartIndex();
  _stop = oldToken->getStopIndex();

-  if (is<CommonToken*>(oldToken)) {
-    _source = (static_cast<CommonToken*>(oldToken))->_source;
+  if (is<CommonToken *>(oldToken)) {
+    _text = (static_cast<CommonToken *>(oldToken))->_text;
+    _source = (static_cast<CommonToken *>(oldToken))->_source;
  } else {
+    _text = oldToken->getText();
    _source = { oldToken->getTokenSource(), oldToken->getInputStream() };
  }
 }
@ -179,4 +180,5 @@ void CommonToken::InitializeInstanceFields() {
  _index = -1;
  _start = 0;
  _stop = 0;
+  _source = EMPTY_SOURCE;
 }
--- a/runtime/Cpp/runtime/CommonToken.h
+++ b/runtime/Cpp/runtime/CommonToken.h
@ -40,50 +40,121 @@ namespace runtime {

  class CommonToken : public WritableToken {
  protected:
+    /**
+     * An empty {@link Pair} which is used as the default value of
+     * {@link #source} for tokens that do not have a source.
+     */
    static const std::pair<TokenSource*, CharStream*> EMPTY_SOURCE;

+    /**
+     * This is the backing field for {@link #getType} and {@link #setType}.
+     */
    int _type;
-    int _line;
-    int _charPositionInLine; // set to invalid position
-    int _channel;
-    std::pair<TokenSource*, CharStream*> _source; // Pure references, usually from statically allocated classes.

-    /// We need to be able to change the text once in a while.  If
-    ///  this is non-empty, then getText should return this.  Note that
-    ///  start/stop are not affected by changing this.
-    ///
-    // TO_DO: can store these in map in token stream rather than as field here
+    /**
+     * This is the backing field for {@link #getLine} and {@link #setLine}.
+     */
+    int _line;
+
+    /**
+     * This is the backing field for {@link #getCharPositionInLine} and
+     * {@link #setCharPositionInLine}.
+     */
+    int _charPositionInLine; // set to invalid position
+
+    /**
+     * This is the backing field for {@link #getChannel} and
+     * {@link #setChannel}.
+     */
+    int _channel;
+
+    /**
+     * This is the backing field for {@link #getTokenSource} and
+     * {@link #getInputStream}.
+     *
+     * <p>
+     * These properties share a field to reduce the memory footprint of
+     * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from
+     * the same source and input stream share a reference to the same
+     * {@link Pair} containing these values.</p>
+     */
+    
+    std::pair<TokenSource*, CharStream*> _source; // ml: pure references, usually from statically allocated classes.
+
+    /**
+     * This is the backing field for {@link #getText} when the token text is
+     * explicitly set in the constructor or via {@link #setText}.
+     *
+     * @see #getText()
+     */
    std::wstring _text;

-    /// <summary>
-    /// What token number is this from 0..n-1 tokens; < 0 implies invalid index </summary>
+    /**
+     * This is the backing field for {@link #getTokenIndex} and
+     * {@link #setTokenIndex}.
+     */
    int _index;

-    /// <summary>
-    /// The char position into the input buffer where this token starts </summary>
+    /**
+     * This is the backing field for {@link #getStartIndex} and
+     * {@link #setStartIndex}.
+     */
    int _start;

-    /// <summary>
-    /// The char position into the input buffer where this token stops </summary>
+    /**
+     * This is the backing field for {@link #getStopIndex} and
+     * {@link #setStopIndex}.
+     */
    int _stop;

  public:
+    /**
+     * Constructs a new {@link CommonToken} with the specified token type.
+     *
+     * @param type The token type.
+     */
    CommonToken(int type);
    CommonToken(std::pair<TokenSource*, CharStream*> source, int type, int channel, int start, int stop);
+
+    /**
+     * Constructs a new {@link CommonToken} with the specified token type and
+     * text.
+     *
+     * @param type The token type.
+     * @param text The text of the token.
+     */
    CommonToken(int type, const std::wstring &text);
+
+    /**
+     * Constructs a new {@link CommonToken} as a copy of another {@link Token}.
+     *
+     * <p>
+     * If {@code oldToken} is also a {@link CommonToken} instance, the newly
+     * constructed token will share a reference to the {@link #text} field and
+     * the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will
+     * be assigned the result of calling {@link #getText}, and {@link #source}
+     * will be constructed from the result of {@link Token#getTokenSource} and
+     * {@link Token#getInputStream}.</p>
+     *
+     * @param oldToken The token to copy.
+     */
    CommonToken(Token *oldToken);

    virtual int getType() const override;
-    virtual void setLine(int line) override;
+
+    /**
+     * Explicitly set the text for this token. If {code text} is not
+     * {@code null}, then {@link #getText} will return this value rather than
+     * extracting the text from the input.
+     *
+     * @param text The explicit text of the token, or {@code null} if the text
+     * should be obtained from the input along with the start and stop indexes
+     * of the token.
+     */
+    virtual void setText(const std::wstring &text) override;
    virtual std::wstring getText() override;

-    /// <summary>
-    /// Override the text for this token.  getText() will return this text
-    ///  rather than pulling from the buffer.  Note that this does not mean
-    ///  that start/stop indexes are not valid.  It means that that input
-    ///  was converted to a new string in the token object.
-    /// </summary>
-    virtual void setText(const std::wstring &text) override;
+    virtual void setLine(int line) override;
    virtual int getLine() override;

    virtual int getCharPositionInLine() override;
--- a/runtime/Cpp/runtime/CommonTokenFactory.h
+++ b/runtime/Cpp/runtime/CommonTokenFactory.h
@ -38,22 +38,60 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

+  /**
+   * This default implementation of {@link TokenFactory} creates
+   * {@link CommonToken} objects.
+   */
  class CommonTokenFactory : public TokenFactory<CommonToken> {
  public:
+    /**
+     * The default {@link CommonTokenFactory} instance.
+     *
+     * <p>
+     * This token factory does not explicitly copy token text when constructing
+     * tokens.</p>
+     */
    static const Ref<TokenFactory<CommonToken>> DEFAULT;

-    /// <summary>
-    /// Copy text for token out of input char stream. Useful when input
-    ///  stream is unbuffered. </summary>
-    ///  <seealso cref= UnbufferedCharStream </seealso>
  protected:
+    /**
+     * Indicates whether {@link CommonToken#setText} should be called after
+     * constructing tokens to explicitly set the text. This is useful for cases
+     * where the input stream might not be able to provide arbitrary substrings
+     * of text from the input after the lexer creates a token (e.g. the
+     * implementation of {@link CharStream#getText} in
+     * {@link UnbufferedCharStream} throws an
+     * {@link UnsupportedOperationException}). Explicitly setting the token text
+     * allows {@link Token#getText} to be called at any time regardless of the
+     * input stream implementation.
+     *
+     * <p>
+     * The default value is {@code false} to avoid the performance and memory
+     * overhead of copying text for every token unless explicitly requested.</p>
+     */
    const bool copyText;

  public:
-    /// Create factory and indicate whether or not the factory copy
-    ///  text out of the char stream.
+    /**
+     * Constructs a {@link CommonTokenFactory} with the specified value for
+     * {@link #copyText}.
+     *
+     * <p>
+     * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance
+     * should be used instead of constructing a new instance.</p>
+     *
+     * @param copyText The value for {@link #copyText}.
+     */
    CommonTokenFactory(bool copyText);

+    /**
+     * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to
+     * {@code false}.
+     *
+     * <p>
+     * The {@link #DEFAULT} instance should be used instead of calling this
+     * directly.</p>
+     */
    CommonTokenFactory();

    virtual Ref<CommonToken> create(std::pair<TokenSource*, CharStream*> source, int type,
--- a/runtime/Cpp/runtime/CommonTokenStream.h
+++ b/runtime/Cpp/runtime/CommonTokenStream.h
@ -38,31 +38,60 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

-  /// <summary>
-  /// The most common stream of tokens where every token is buffered up
-  ///  and tokens are filtered for a certain channel (the parser will only
-  ///  see these tokens).
-  ///
-  ///  Even though it buffers all of the tokens, this token stream pulls tokens
-  ///  from the tokens source on demand. In other words, until you ask for a
-  ///  token using consume(), LT(), etc. the stream does not pull from the lexer.
-  ///
-  ///  The only difference between this stream and <seealso cref="BufferedTokenStream"/> superclass
-  ///  is that this stream knows how to ignore off channel tokens. There may be
-  ///  a performance advantage to using the superclass if you don't pass
-  ///  whitespace and comments etc. to the parser on a hidden channel (i.e.,
-  ///  you set {@code $channel} instead of calling {@code skip()} in lexer rules.)
-  /// </summary>
-  ///  <seealso cref= UnbufferedTokenStream </seealso>
-  ///  <seealso cref= BufferedTokenStream </seealso>
+  /**
+   * This class extends {@link BufferedTokenStream} with functionality to filter
+   * token streams to tokens on a particular channel (tokens where
+   * {@link Token#getChannel} returns a particular value).
+   *
+   * <p>
+   * This token stream provides access to all tokens by index or when calling
+   * methods like {@link #getText}. The channel filtering is only used for code
+   * accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and
+   * {@link #LB}.</p>
+   *
+   * <p>
+   * By default, tokens are placed on the default channel
+   * ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the
+   * {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to
+   * call {@link Lexer#setChannel}.
+   * </p>
+   *
+   * <p>
+   * Note: lexer rules which use the {@code ->skip} lexer command or call
+   * {@link Lexer#skip} do not produce tokens at all, so input text matched by
+   * such a rule will not be available as part of the token stream, regardless of
+   * channel.</p>
+   */
  class CommonTokenStream : public BufferedTokenStream {
-    /// <summary>
-    /// Skip tokens on any channel but this one; this is how we skip whitespace... </summary>
  protected:
+    /**
+     * Specifies the channel to use for filtering tokens.
+     *
+     * <p>
+     * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the
+     * default channel assigned to tokens created by the lexer.</p>
+     */
    int channel;

  public:
+    /**
+     * Constructs a new {@link CommonTokenStream} using the specified token
+     * source and the default token channel ({@link Token#DEFAULT_CHANNEL}).
+     *
+     * @param tokenSource The token source.
+     */
    CommonTokenStream(TokenSource *tokenSource);
+
+    /**
+     * Constructs a new {@link CommonTokenStream} using the specified token
+     * source and filtering tokens to the specified channel. Only tokens whose
+     * {@link Token#getChannel} matches {@code channel} or have the
+     * {@link Token#getType} equal to {@link Token#EOF} will be returned by the
+     * token stream lookahead methods.
+     *
+     * @param tokenSource The token source.
+     * @param channel The channel to use for filtering tokens.
+     */
    CommonTokenStream(TokenSource *tokenSource, int channel);

  protected:
--- a/runtime/Cpp/runtime/ConsoleErrorListener.h
+++ b/runtime/Cpp/runtime/ConsoleErrorListener.h
@ -40,8 +40,23 @@ namespace runtime {

  class ConsoleErrorListener : public BaseErrorListener {
  public:
+    /**
+     * Provides a default instance of {@link ConsoleErrorListener}.
+     */
    static ConsoleErrorListener INSTANCE;

+    /**
+     * {@inheritDoc}
+     *
+     * <p>
+     * This implementation prints messages to {@link System#err} containing the
+     * values of {@code line}, {@code charPositionInLine}, and {@code msg} using
+     * the following format.</p>
+     *
+     * <pre>
+     * line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
+     * </pre>
+     */
    virtual void syntaxError(IRecognizer *recognizer, Ref<Token> offendingSymbol, size_t line, int charPositionInLine,
                             const std::wstring &msg, std::exception_ptr e) override;
  };
--- a/runtime/Cpp/runtime/DefaultErrorStrategy.cpp
+++ b/runtime/Cpp/runtime/DefaultErrorStrategy.cpp
@ -41,6 +41,7 @@
 #include "Parser.h"
 #include "Strings.h"
 #include "CommonToken.h"
+#include "Vocabulary.h"

 #include "DefaultErrorStrategy.h"

@ -174,7 +175,7 @@ void DefaultErrorStrategy::reportNoViableAlternative(Parser *recognizer, const N

 void DefaultErrorStrategy::reportInputMismatch(Parser *recognizer, const InputMismatchException &e) {
  std::wstring msg = std::wstring(L"mismatched input ") + getTokenErrorDisplay(e.getOffendingToken()) +
-  std::wstring(L" expecting ") + e.getExpectedTokens().toString(recognizer->getTokenNames());
+  std::wstring(L" expecting ") + e.getExpectedTokens().toString(recognizer->getVocabulary());
  recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e));
 }

@ -195,7 +196,8 @@ void DefaultErrorStrategy::reportUnwantedToken(Parser *recognizer) {
  std::wstring tokenName = getTokenErrorDisplay(t);
  misc::IntervalSet expecting = getExpectedTokens(recognizer);

-  std::wstring msg = std::wstring(L"extraneous input ") + tokenName + std::wstring(L" expecting ") + expecting.toString(recognizer->getTokenNames());
+  std::wstring msg = std::wstring(L"extraneous input ") + tokenName + std::wstring(L" expecting ") +
+    expecting.toString(recognizer->getVocabulary());
  recognizer->notifyErrorListeners(t, msg, nullptr);
 }

@ -208,7 +210,7 @@ void DefaultErrorStrategy::reportMissingToken(Parser *recognizer) {

  Ref<Token> t = recognizer->getCurrentToken();
  misc::IntervalSet expecting = getExpectedTokens(recognizer);
-  std::wstring msg = std::wstring(L"missing ") + expecting.toString(recognizer->getTokenNames()) + std::wstring(L" at ") + getTokenErrorDisplay(t);
+  std::wstring msg = L"missing " + expecting.toString(recognizer->getVocabulary()) + L" at " + getTokenErrorDisplay(t);

  recognizer->notifyErrorListeners(t, msg, nullptr);
 }
@ -271,7 +273,7 @@ Ref<Token> DefaultErrorStrategy::getMissingSymbol(Parser *recognizer) {
  if (expectedTokenType == EOF) {
    tokenText = L"<missing EOF>";
  } else {
-    tokenText = std::wstring(L"<missing ") + recognizer->getTokenNames()[(size_t)expectedTokenType] + std::wstring(L">");
+    tokenText = L"<missing " + recognizer->getVocabulary()->getDisplayName(expectedTokenType) + L">";
  }
  Ref<Token> current = currentSymbol;
  Ref<Token> lookback = recognizer->getTokenStream()->LT(-1);
--- a/runtime/Cpp/runtime/DefaultErrorStrategy.h
+++ b/runtime/Cpp/runtime/DefaultErrorStrategy.h
@ -39,32 +39,32 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

-  /// <summary>
-  /// This is the default error handling mechanism for ANTLR parsers
-  ///  and tree parsers.
-  /// </summary>
+  /**
+   * This is the default implementation of {@link ANTLRErrorStrategy} used for
+   * error reporting and recovery in ANTLR parsers.
+   */
  class DefaultErrorStrategy : public ANTLRErrorStrategy {
  public:
    DefaultErrorStrategy() {
      InitializeInstanceFields();
    }

-    /// <summary>
-    /// This is true after we see an error and before having successfully
-    ///  matched a token. Prevents generation of more than one error message
-    ///  per error.
-    /// </summary>
-    /// <seealso cref= #inErrorRecoveryMode </seealso>
  protected:
+    /**
+     * Indicates whether the error strategy is currently "recovering from an
+     * error". This is used to suppress reporting multiple error messages while
+     * attempting to recover from a detected syntax error.
+     *
+     * @see #inErrorRecoveryMode
+     */
    bool errorRecoveryMode;

-    /// <summary>
-    /// The index into the input stream where the last error occurred.
-    /// 	This is used to prevent infinite loops where an error is found
-    ///  but no token is consumed during recovery...another error is found,
-    ///  ad nauseum.  This is a failsafe mechanism to guarantee that at least
-    ///  one token/tree node is consumed for two errors.
-    /// </summary>
+    /** The index into the input stream where the last error occurred.
+     * 	This is used to prevent infinite loops where an error is found
+     *  but no token is consumed during recovery...another error is found,
+     *  ad nauseum.  This is a failsafe mechanism to guarantee that at least
+     *  one token/tree node is consumed for two errors.
+     */
    int lastErrorIndex;

    misc::IntervalSet lastErrorStates;
@ -138,52 +138,52 @@ namespace runtime {
    /// </summary>
    virtual void recover(Parser *recognizer, const RecognitionException &e) override;

-    /// <summary>
-    /// The default implementation of <seealso cref="ANTLRErrorStrategy#sync"/> makes sure
-    /// that the current lookahead symbol is consistent with what were expecting
-    /// at this point in the ATN. You can call this anytime but ANTLR only
-    /// generates code to check before subrules/loops and each iteration.
-    /// <p/>
-    /// Implements Jim Idle's magic sync mechanism in closures and optional
-    /// subrules. E.g.,
-    ///
-    /// <pre>
-    /// a : sync ( stuff sync )* ;
-    /// sync : {consume to what can follow sync} ;
-    /// </pre>
-    ///
-    /// At the start of a sub rule upon error, <seealso cref="#sync"/> performs single
-    /// token deletion, if possible. If it can't do that, it bails on the current
-    /// rule and uses the default error recovery, which consumes until the
-    /// resynchronization set of the current rule.
-    /// <p/>
-    /// If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
-    /// with an empty alternative), then the expected set includes what follows
-    /// the subrule.
-    /// <p/>
-    /// During loop iteration, it consumes until it sees a token that can start a
-    /// sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
-    /// stay in the loop as long as possible.
-    /// <p/>
-    /// <strong>ORIGINS</strong>
-    /// <p/>
-    /// Previous versions of ANTLR did a poor job of their recovery within loops.
-    /// A single mismatch token or missing token would force the parser to bail
-    /// out of the entire rules surrounding the loop. So, for rule
-    ///
-    /// <pre>
-    /// classDef : 'class' ID '{' member* '}'
-    /// </pre>
-    ///
-    /// input with an extra token between members would force the parser to
-    /// consume until it found the next class definition rather than the next
-    /// member definition of the current class.
-    /// <p/>
-    /// This functionality cost a little bit of effort because the parser has to
-    /// compare token set at the start of the loop and at each iteration. If for
-    /// some reason speed is suffering for you, you can turn off this
-    /// functionality by simply overriding this method as a blank { }.
-    /// </summary>
+    /**
+     * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
+     * that the current lookahead symbol is consistent with what were expecting
+     * at this point in the ATN. You can call this anytime but ANTLR only
+     * generates code to check before subrules/loops and each iteration.
+     *
+     * <p>Implements Jim Idle's magic sync mechanism in closures and optional
+     * subrules. E.g.,</p>
+     *
+     * <pre>
+     * a : sync ( stuff sync )* ;
+     * sync : {consume to what can follow sync} ;
+     * </pre>
+     *
+     * At the start of a sub rule upon error, {@link #sync} performs single
+     * token deletion, if possible. If it can't do that, it bails on the current
+     * rule and uses the default error recovery, which consumes until the
+     * resynchronization set of the current rule.
+     *
+     * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
+     * with an empty alternative), then the expected set includes what follows
+     * the subrule.</p>
+     *
+     * <p>During loop iteration, it consumes until it sees a token that can start a
+     * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
+     * stay in the loop as long as possible.</p>
+     *
+     * <p><strong>ORIGINS</strong></p>
+     *
+     * <p>Previous versions of ANTLR did a poor job of their recovery within loops.
+     * A single mismatch token or missing token would force the parser to bail
+     * out of the entire rules surrounding the loop. So, for rule</p>
+     *
+     * <pre>
+     * classDef : 'class' ID '{' member* '}'
+     * </pre>
+     *
+     * input with an extra token between members would force the parser to
+     * consume until it found the next class definition rather than the next
+     * member definition of the current class.
+     *
+     * <p>This functionality cost a little bit of effort because the parser has to
+     * compare token set at the start of the loop and at each iteration. If for
+     * some reason speed is suffering for you, you can turn off this
+     * functionality by simply overriding this method as a blank { }.</p>
+     */
    virtual void sync(Parser *recognizer) override;

    /// <summary>
@ -217,94 +217,96 @@ namespace runtime {
    /// <param name="e"> the recognition exception </param>
    virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e);

-    /// <summary>
-    /// This method is called to report a syntax error which requires the removal
-    /// of a token from the input stream. At the time this method is called, the
-    /// erroneous symbol is current {@code LT(1)} symbol and has not yet been
-    /// removed from the input stream. When this method returns,
-    /// {@code recognizer} is in error recovery mode.
-    /// <p/>
-    /// This method is called when <seealso cref="#singleTokenDeletion"/> identifies
-    /// single-token deletion as a viable recovery strategy for a mismatched
-    /// input error.
-    /// <p/>
-    /// The default implementation simply returns if the handler is already in
-    /// error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> to
-    /// enter error recovery mode, followed by calling
-    /// <seealso cref="Parser#notifyErrorListeners"/>.
-    /// </summary>
-    /// <param name="recognizer"> the parser instance </param>
+    /**
+     * This method is called to report a syntax error which requires the removal
+     * of a token from the input stream. At the time this method is called, the
+     * erroneous symbol is current {@code LT(1)} symbol and has not yet been
+     * removed from the input stream. When this method returns,
+     * {@code recognizer} is in error recovery mode.
+     *
+     * <p>This method is called when {@link #singleTokenDeletion} identifies
+     * single-token deletion as a viable recovery strategy for a mismatched
+     * input error.</p>
+     *
+     * <p>The default implementation simply returns if the handler is already in
+     * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
+     * enter error recovery mode, followed by calling
+     * {@link Parser#notifyErrorListeners}.</p>
+     *
+     * @param recognizer the parser instance
+     */
    virtual void reportUnwantedToken(Parser *recognizer);

-    /// <summary>
-    /// This method is called to report a syntax error which requires the
-    /// insertion of a missing token into the input stream. At the time this
-    /// method is called, the missing token has not yet been inserted. When this
-    /// method returns, {@code recognizer} is in error recovery mode.
-    /// <p/>
-    /// This method is called when <seealso cref="#singleTokenInsertion"/> identifies
-    /// single-token insertion as a viable recovery strategy for a mismatched
-    /// input error.
-    /// <p/>
-    /// The default implementation simply returns if the handler is already in
-    /// error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> to
-    /// enter error recovery mode, followed by calling
-    /// <seealso cref="Parser#notifyErrorListeners"/>.
-    /// </summary>
-    /// <param name="recognizer"> the parser instance </param>
+    /**
+     * This method is called to report a syntax error which requires the
+     * insertion of a missing token into the input stream. At the time this
+     * method is called, the missing token has not yet been inserted. When this
+     * method returns, {@code recognizer} is in error recovery mode.
+     *
+     * <p>This method is called when {@link #singleTokenInsertion} identifies
+     * single-token insertion as a viable recovery strategy for a mismatched
+     * input error.</p>
+     *
+     * <p>The default implementation simply returns if the handler is already in
+     * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
+     * enter error recovery mode, followed by calling
+     * {@link Parser#notifyErrorListeners}.</p>
+     *
+     * @param recognizer the parser instance
+     */
    virtual void reportMissingToken(Parser *recognizer);

-    /// <summary>
-    /// {@inheritDoc}
-    /// <p/>
-    /// The default implementation attempts to recover from the mismatched input
-    /// by using single token insertion and deletion as described below. If the
-    /// recovery attempt fails, this method throws an
-    /// <seealso cref="InputMismatchException"/>.
-    /// <p/>
-    /// <strong>EXTRA TOKEN</strong> (single token deletion)
-    /// <p/>
-    /// {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
-    /// right token, however, then assume {@code LA(1)} is some extra spurious
-    /// token and delete it. Then consume and return the next token (which was
-    /// the {@code LA(2)} token) as the successful result of the match operation.
-    /// <p/>
-    /// This recovery strategy is implemented by <seealso cref="#singleTokenDeletion"/>.
-    /// <p/>
-    /// <strong>MISSING TOKEN</strong> (single token insertion)
-    /// <p/>
-    /// If current token (at {@code LA(1)}) is consistent with what could come
-    /// after the expected {@code LA(1)} token, then assume the token is missing
-    /// and use the parser's <seealso cref="TokenFactory"/> to create it on the fly. The
-    /// "insertion" is performed by returning the created token as the successful
-    /// result of the match operation.
-    /// <p/>
-    /// This recovery strategy is implemented by <seealso cref="#singleTokenInsertion"/>.
-    /// <p/>
-    /// <strong>EXAMPLE</strong>
-    /// <p/>
-    /// For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
-    /// the parser returns from the nested call to {@code expr}, it will have
-    /// call chain:
-    ///
-    /// <pre>
-    /// stat -> expr -> atom
-    /// </pre>
-    ///
-    /// and it will be trying to match the {@code ')'} at this point in the
-    /// derivation:
-    ///
-    /// <pre>
-    /// => ID '=' '(' INT ')' ('+' atom)* ';'
-    ///                    ^
-    /// </pre>
-    ///
-    /// The attempt to match {@code ')'} will fail when it sees {@code ';'} and
-    /// call <seealso cref="#recoverInline"/>. To recover, it sees that {@code LA(1)==';'}
-    /// is in the set of tokens that can follow the {@code ')'} token reference
-    /// in rule {@code atom}. It can assume that you forgot the {@code ')'}.
-    /// </summary>
  public:
+    /**
+     * {@inheritDoc}
+     *
+     * <p>The default implementation attempts to recover from the mismatched input
+     * by using single token insertion and deletion as described below. If the
+     * recovery attempt fails, this method throws an
+     * {@link InputMismatchException}.</p>
+     *
+     * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
+     *
+     * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
+     * right token, however, then assume {@code LA(1)} is some extra spurious
+     * token and delete it. Then consume and return the next token (which was
+     * the {@code LA(2)} token) as the successful result of the match operation.</p>
+     *
+     * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
+     *
+     * <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
+     *
+     * <p>If current token (at {@code LA(1)}) is consistent with what could come
+     * after the expected {@code LA(1)} token, then assume the token is missing
+     * and use the parser's {@link TokenFactory} to create it on the fly. The
+     * "insertion" is performed by returning the created token as the successful
+     * result of the match operation.</p>
+     *
+     * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
+     *
+     * <p><strong>EXAMPLE</strong></p>
+     *
+     * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
+     * the parser returns from the nested call to {@code expr}, it will have
+     * call chain:</p>
+     *
+     * <pre>
+     * stat &rarr; expr &rarr; atom
+     * </pre>
+     *
+     * and it will be trying to match the {@code ')'} at this point in the
+     * derivation:
+     *
+     * <pre>
+     * =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
+     *                    ^
+     * </pre>
+     *
+     * The attempt to match {@code ')'} will fail when it sees {@code ';'} and
+     * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
+     * is in the set of tokens that can follow the {@code ')'} token reference
+     * in rule {@code atom}. It can assume that you forgot the {@code ')'}.
+     */
    virtual Ref<Token> recoverInline(Parser *recognizer) override;

    /// <summary>
--- a/runtime/Cpp/runtime/Exceptions.cpp
+++ b/runtime/Cpp/runtime/Exceptions.cpp
@ -32,8 +32,7 @@

 using namespace org::antlr::v4::runtime;

-RuntimeException::RuntimeException(const std::string &msg)
-  : std::exception(), _message(msg) {
+RuntimeException::RuntimeException(const std::string &msg) : std::exception(), _message(msg) {
 }

 const char* RuntimeException::what() const NOEXCEPT {
--- a/runtime/Cpp/runtime/IntStream.cpp
+++ b/runtime/Cpp/runtime/IntStream.cpp
@ -33,4 +33,4 @@

 using namespace org::antlr::v4::runtime;

-const std::wstring IntStream::UNKNOWN_SOURCE_NAME = L"<unknown>";
+const std::string IntStream::UNKNOWN_SOURCE_NAME = "<unknown>";
--- a/runtime/Cpp/runtime/IntStream.h
+++ b/runtime/Cpp/runtime/IntStream.h
@ -63,7 +63,7 @@ namespace runtime {
    /// The value returned by <seealso cref="#getSourceName"/> when the actual name of the
    /// underlying source is not known.
    /// </summary>
-    static const std::wstring UNKNOWN_SOURCE_NAME;
+    static const std::string UNKNOWN_SOURCE_NAME;

    /// <summary>
    /// Consumes the current symbol in the stream. This method has the following
--- a/runtime/Cpp/runtime/InterpreterRuleContext.cpp
+++ b/runtime/Cpp/runtime/InterpreterRuleContext.cpp
@ -33,10 +33,13 @@

 using namespace org::antlr::v4::runtime;

+InterpreterRuleContext::InterpreterRuleContext() {
+}
+
 InterpreterRuleContext::InterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber, ssize_t ruleIndex)
-  : ParserRuleContext(parent, invokingStateNumber), ruleIndex(ruleIndex) {
+  : ParserRuleContext(parent, invokingStateNumber), _ruleIndex(ruleIndex) {
 }

 ssize_t InterpreterRuleContext::getRuleIndex() const {
-  return ruleIndex;
+  return _ruleIndex;
 }
--- a/runtime/Cpp/runtime/InterpreterRuleContext.h
+++ b/runtime/Cpp/runtime/InterpreterRuleContext.h
@ -38,20 +38,38 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

-  /// <summary>
-  /// This object is used by the ParserInterpreter and is the same as a regular
-  ///  ParserRuleContext except that we need to track the rule index of the
-  ///  current context so that we can build parse trees.
-  /// </summary>
+  /**
+   * This class extends {@link ParserRuleContext} by allowing the value of
+   * {@link #getRuleIndex} to be explicitly set for the context.
+   *
+   * <p>
+   * {@link ParserRuleContext} does not include field storage for the rule index
+   * since the context classes created by the code generator override the
+   * {@link #getRuleIndex} method to return the correct value for that context.
+   * Since the parser interpreter does not use the context classes generated for a
+   * parser, this class (with slightly more memory overhead per node) is used to
+   * provide equivalent functionality.</p>
+   */
  class InterpreterRuleContext : public ParserRuleContext {
-  private:
-    const ssize_t ruleIndex;
-
  public:
+    InterpreterRuleContext();
+
+    /**
+     * Constructs a new {@link InterpreterRuleContext} with the specified
+     * parent, invoking state, and rule index.
+     *
+     * @param parent The parent context.
+     * @param invokingStateNumber The invoking state number.
+     * @param ruleIndex The rule index for the current context.
+     */
    InterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber, ssize_t ruleIndex);

    virtual ssize_t getRuleIndex() const override;
-  };
+
+  protected:
+    /** This is the backing field for {@link #getRuleIndex}. */
+    const ssize_t _ruleIndex = -1;
+};

 } // namespace runtime
 } // namespace v4
--- a/runtime/Cpp/runtime/LexerInterpreter.cpp
+++ b/runtime/Cpp/runtime/LexerInterpreter.cpp
@ -34,6 +34,7 @@
 #include "DFA.h"
 #include "EmptyPredictionContext.h"
 #include "Exceptions.h"
+#include "VocabularyImpl.h"

 #include "LexerInterpreter.h"

@ -42,13 +43,23 @@ using namespace org::antlr::v4::runtime;
 LexerInterpreter::LexerInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring> &tokenNames,
  const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames, const atn::ATN &atn,
  CharStream *input)
-  : Lexer(input), grammarFileName(grammarFileName), _tokenNames(tokenNames), _ruleNames(ruleNames), _modeNames(modeNames),
-    _atn(atn) {
+: LexerInterpreter(grammarFileName, dfa::VocabularyImpl::fromTokenNames(tokenNames), ruleNames, modeNames, atn, input) {
+}
+
+LexerInterpreter::LexerInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
+  const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames, const atn::ATN &atn,
+  CharStream *input)
+  : Lexer(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _modeNames(modeNames),
+                  _vocabulary(vocabulary) {

  if (_atn.grammarType != atn::ATNType::LEXER) {
    throw IllegalArgumentException("The ATN must be a lexer ATN.");
  }

+  for (size_t i = 0; i < atn.maxTokenType; i++) {
+    _tokenNames.push_back(vocabulary->getDisplayName(i));
+  }
+
  _sharedContextCache = std::make_shared<atn::PredictionContextCache>();
  for (size_t i = 0; i < (size_t)atn.getNumberOfDecisions(); ++i) {
    _decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState((int)i), (int)i));
@ -66,7 +77,7 @@ const atn::ATN& LexerInterpreter::getATN() const {
 }

 std::wstring LexerInterpreter::getGrammarFileName() const {
-  return grammarFileName;
+  return _grammarFileName;
 }

 const std::vector<std::wstring>& LexerInterpreter::getTokenNames() const {
@ -80,3 +91,11 @@ const std::vector<std::wstring>& LexerInterpreter::getRuleNames() const {
 const std::vector<std::wstring>& LexerInterpreter::getModeNames() const {
  return _modeNames;
 }
+
+Ref<dfa::Vocabulary> LexerInterpreter::getVocabulary() const {
+  if (_vocabulary != nullptr) {
+    return _vocabulary;
+  }
+
+  return Lexer::getVocabulary();
+}
--- a/runtime/Cpp/runtime/LexerInterpreter.h
+++ b/runtime/Cpp/runtime/LexerInterpreter.h
@ -40,9 +40,14 @@ namespace runtime {

  class LexerInterpreter : public Lexer {
  public:
+    // @deprecated
    LexerInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring> &tokenNames,
                     const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames,
                     const atn::ATN &atn, CharStream *input);
+    LexerInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
+                     const std::vector<std::wstring> &ruleNames, const std::vector<std::wstring> &modeNames,
+                     const atn::ATN &atn, CharStream *input);
+
    ~LexerInterpreter();

    virtual const atn::ATN& getATN() const override;
@ -51,16 +56,22 @@ namespace runtime {
    virtual const std::vector<std::wstring>& getRuleNames() const override;
    virtual const std::vector<std::wstring>& getModeNames() const override;
    
+    Ref<dfa::Vocabulary> getVocabulary() const;
+
  protected:
-    const std::wstring grammarFileName;
+    const std::wstring _grammarFileName;
    const atn::ATN &_atn;

-    const std::vector<std::wstring> &_tokenNames;
+    // @deprecated
+    std::vector<std::wstring> _tokenNames;
    const std::vector<std::wstring> &_ruleNames;
    const std::vector<std::wstring> &_modeNames;
    std::vector<dfa::DFA> _decisionToDFA;

    Ref<atn::PredictionContextCache> _sharedContextCache;
+
+  private:
+    Ref<dfa::Vocabulary> _vocabulary;
  };

 } // namespace runtime
--- a/runtime/Cpp/runtime/ParserInterpreter.cpp
+++ b/runtime/Cpp/runtime/ParserInterpreter.cpp
@ -45,40 +45,36 @@
 #include "ATN.h"
 #include "RuleStopState.h"
 #include "Token.h"
+#include "VocabularyImpl.h"
+#include "InputMismatchException.h"
+#include "CommonToken.h"
+
 #include "CPPUtils.h"

 #include "ParserInterpreter.h"

 using namespace org::antlr::v4::runtime;
+using namespace org::antlr::v4::runtime::atn;
 using namespace antlrcpp;

 ParserInterpreter::ParserInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring>& tokenNames,
  const std::vector<std::wstring>& ruleNames, const atn::ATN &atn, TokenStream *input)
-  : Parser(input), _grammarFileName(grammarFileName), _tokenNames(tokenNames), _atn(atn), _ruleNames(ruleNames) {
+  : ParserInterpreter(grammarFileName, dfa::VocabularyImpl::fromTokenNames(tokenNames), ruleNames, atn, input) {
+}

-  for (int i = 0; i < _atn.getNumberOfDecisions(); i++) {
-    _decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState(i), i));
+ParserInterpreter::ParserInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
+  const std::vector<std::wstring> &ruleNames, const atn::ATN &atn, TokenStream *input)
+  : Parser(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _vocabulary(vocabulary) {
+
+  _sharedContextCache = std::make_shared<atn::PredictionContextCache>();
+  for (size_t i = 0; i < atn.maxTokenType; ++i) {
+    _tokenNames.push_back(vocabulary->getDisplayName(i));
  }

-  // identify the ATN states where pushNewRecursionContext must be called
-  for (auto state : _atn.states) {
-    if (!is<atn::StarLoopEntryState*>(state)) {
-      continue;
-    }
-
-    atn::RuleStartState *ruleStartState = _atn.ruleToStartState[(size_t)state->ruleIndex];
-    if (!ruleStartState->isLeftRecursiveRule) {
-      continue;
-    }
-
-    atn::ATNState *maybeLoopEndState = state->transition(state->getNumberOfTransitions() - 1)->target;
-    if (!is<atn::LoopEndState*>(maybeLoopEndState)) {
-      continue;
-    }
-
-    if (maybeLoopEndState->epsilonOnlyTransitions && is<atn::RuleStopState*>(maybeLoopEndState->transition(0)->target)) {
-      _pushRecursionContextStates.set((size_t)state->stateNumber);
-    }
+  // init decision DFA
+  for (int i = 0; i < atn.getNumberOfDecisions(); ++i) {
+    atn::DecisionState *decisionState = atn.getDecisionState(i);
+    _decisionToDFA.push_back(dfa::DFA(decisionState, i));
  }

  // get atn simulator that knows how to do predictions
@ -89,6 +85,12 @@ ParserInterpreter::~ParserInterpreter() {
  delete _interpreter;
 }

+void ParserInterpreter::reset() {
+  Parser::reset();
+  _overrideDecisionReached = false;
+  _overrideDecisionRoot = nullptr;
+}
+
 const atn::ATN& ParserInterpreter::getATN() const {
  return _atn;
 }
@ -97,6 +99,10 @@ const std::vector<std::wstring>& ParserInterpreter::getTokenNames() const {
  return _tokenNames;
 }

+Ref<dfa::Vocabulary> ParserInterpreter::getVocabulary() const {
+  return _vocabulary;
+}
+
 const std::vector<std::wstring>& ParserInterpreter::getRuleNames() const {
  return _ruleNames;
 }
@ -108,13 +114,12 @@ std::wstring ParserInterpreter::getGrammarFileName() const {
 Ref<ParserRuleContext> ParserInterpreter::parse(int startRuleIndex) {
  atn::RuleStartState *startRuleStartState = _atn.ruleToStartState[(size_t)startRuleIndex];

-  Ref<InterpreterRuleContext> rootContext =
-    std::make_shared<InterpreterRuleContext>(std::weak_ptr<ParserRuleContext>(), atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex);
+  _rootContext = createInterpreterRuleContext(std::weak_ptr<ParserRuleContext>(), atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex);
  
  if (startRuleStartState->isLeftRecursiveRule) {
-    enterRecursionRule(rootContext, startRuleStartState->stateNumber, startRuleIndex, 0);
+    enterRecursionRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex, 0);
  } else {
-    enterRule(rootContext, startRuleStartState->stateNumber, startRuleIndex);
+    enterRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex);
  }

  while (true) {
@ -123,15 +128,32 @@ Ref<ParserRuleContext> ParserInterpreter::parse(int startRuleIndex) {
      case atn::ATNState::RULE_STOP :
        // pop; return from rule
        if (_ctx->isEmpty()) {
-          exitRule();
-          return rootContext;
+          if (startRuleStartState->isLeftRecursiveRule) {
+            Ref<ParserRuleContext> result = _ctx;
+            auto parentContext = _parentContextStack.top();
+            _parentContextStack.pop();
+            unrollRecursionContexts(parentContext.first);
+            return result;
+          } else {
+            exitRule();
+            return _rootContext;
+          }
        }
-
+        
        visitRuleStopState(p);
        break;

      default :
-        visitState(p);
+        try {
+          visitState(p);
+        }
+        catch (RecognitionException &e) {
+          setState(_atn.ruleToStopState[p->ruleIndex]->stateNumber);
+          getContext()->exception = std::make_exception_ptr(e);
+          getErrorHandler()->reportError(this, e);
+          recover(e);
+        }
+        
        break;
    }
  }
@ -142,26 +164,41 @@ void ParserInterpreter::enterRecursionRule(Ref<ParserRuleContext> localctx, int
  Parser::enterRecursionRule(localctx, state, ruleIndex, precedence);
 }

-atn::ATNState *ParserInterpreter::getATNState() {
+void ParserInterpreter::addDecisionOverride(int decision, int tokenIndex, int forcedAlt) {
+  _overrideDecision = decision;
+  _overrideDecisionInputIndex = tokenIndex;
+  _overrideDecisionAlt = forcedAlt;
+}
+
+Ref<InterpreterRuleContext> ParserInterpreter::getOverrideDecisionRoot() const {
+  return _overrideDecisionRoot;
+}
+
+Ref<InterpreterRuleContext> ParserInterpreter::getRootContext() {
+  return _rootContext;
+}
+
+atn::ATNState* ParserInterpreter::getATNState() {
  return _atn.states[(size_t)getState()];
 }

 void ParserInterpreter::visitState(atn::ATNState *p) {
-  int edge;
-  if (p->getNumberOfTransitions() > 1) {
-    edge = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input, ((atn::DecisionState*)p)->decision, _ctx);
-  } else {
-    edge = 1;
+  int predictedAlt = 1;
+  if (is<DecisionState *>(p)) {
+    predictedAlt = visitDecisionState(dynamic_cast<DecisionState *>(p));
  }

-  atn::Transition *transition = p->transition((size_t)edge - 1);
+  atn::Transition *transition = p->transition(predictedAlt - 1);
  switch (transition->getSerializationType()) {
    case atn::Transition::EPSILON:
-      if (_pushRecursionContextStates[(size_t)p->stateNumber] == 1 && is<atn::LoopEndState*>(transition->target)) {
-        Ref<InterpreterRuleContext> ruleContext = std::make_shared<InterpreterRuleContext>(_parentContextStack.top().first,
-          _parentContextStack.top().second, _ctx->getRuleIndex());
-        pushNewRecursionContext(ruleContext, _atn.ruleToStartState[(size_t)p->ruleIndex]->stateNumber,
-          (int)ruleContext->getRuleIndex());
+      if (p->getStateType() == ATNState::STAR_LOOP_ENTRY &&
+        (dynamic_cast<StarLoopEntryState *>(p))->isPrecedenceDecision &&
+        !is<LoopEndState *>(transition->target)) {
+        // We are at the start of a left recursive rule's (...)* loop
+        // and we're not taking the exit branch of loop.
+        Ref<InterpreterRuleContext> localctx = createInterpreterRuleContext(_parentContextStack.top().first,
+          _parentContextStack.top().second, (int)_ctx->getRuleIndex());
+        pushNewRecursionContext(localctx, _atn.ruleToStartState[p->ruleIndex]->stateNumber, (int)_ctx->getRuleIndex());
      }
      break;

@ -173,7 +210,7 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
    case atn::Transition::SET:
    case atn::Transition::NOT_SET:
      if (!transition->matches((int)_input->LA(1), Token::MIN_USER_TOKEN_TYPE, 65535)) {
-        _errHandler->recoverInline(this);
+        recoverInline();
      }
      matchWildcard();
      break;
@ -186,11 +223,11 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
    {
      atn::RuleStartState *ruleStartState = (atn::RuleStartState*)(transition->target);
      int ruleIndex = ruleStartState->ruleIndex;
-      Ref<InterpreterRuleContext> ruleContext = std::make_shared<InterpreterRuleContext>(_ctx, p->stateNumber, ruleIndex);
+      Ref<InterpreterRuleContext> newctx = createInterpreterRuleContext(_ctx, p->stateNumber, ruleIndex);
      if (ruleStartState->isLeftRecursiveRule) {
-        enterRecursionRule(ruleContext, ruleStartState->stateNumber, ruleIndex, ((atn::RuleTransition*)(transition))->precedence);
+        enterRecursionRule(newctx, ruleStartState->stateNumber, ruleIndex, ((atn::RuleTransition*)(transition))->precedence);
      } else {
-        enterRule(_ctx, transition->target->stateNumber, ruleIndex);
+        enterRule(newctx, transition->target->stateNumber, ruleIndex);
      }
    }
      break;
@ -226,6 +263,26 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
  setState(transition->target->stateNumber);
 }

+int ParserInterpreter::visitDecisionState(DecisionState *p) {
+  int predictedAlt = 1;
+  if (p->getNumberOfTransitions() > 1) {
+    getErrorHandler()->sync(this);
+    int decision = p->decision;
+    if (decision == _overrideDecision && (int)_input->index() == _overrideDecisionInputIndex && !_overrideDecisionReached) {
+      predictedAlt = _overrideDecisionAlt;
+      _overrideDecisionReached = true;
+    } else {
+      predictedAlt = getInterpreter<ParserATNSimulator>()->adaptivePredict(_input, decision, _ctx);
+    }
+  }
+  return predictedAlt;
+}
+
+Ref<InterpreterRuleContext> ParserInterpreter::createInterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent,
+  int invokingStateNumber, int ruleIndex) {
+  return std::make_shared<InterpreterRuleContext>(parent, invokingStateNumber, ruleIndex);
+}
+
 void ParserInterpreter::visitRuleStopState(atn::ATNState *p) {
  atn::RuleStartState *ruleStartState = _atn.ruleToStartState[(size_t)p->ruleIndex];
  if (ruleStartState->isLeftRecursiveRule) {
@ -241,3 +298,32 @@ void ParserInterpreter::visitRuleStopState(atn::ATNState *p) {
  atn::RuleTransition *ruleTransition = static_cast<atn::RuleTransition*>(_atn.states[(size_t)getState()]->transition(0));
  setState(ruleTransition->followState->stateNumber);
 }
+
+void ParserInterpreter::recover(RecognitionException &e) {
+  size_t i = _input->index();
+  getErrorHandler()->recover(this, e);
+
+  if (_input->index() == i) {
+    // no input consumed, better add an error node
+    if (is<InputMismatchException>(e)) {
+      InputMismatchException &ime = (InputMismatchException&)e;
+      Ref<Token> tok = e.getOffendingToken();
+      int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element
+      auto errToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() },
+        expectedTokenType, tok->getText(), Token::DEFAULT_CHANNEL, -1, -1, // invalid start/stop
+        tok->getLine(), tok->getCharPositionInLine());
+      _ctx->addErrorNode(std::dynamic_pointer_cast<Token>(errToken));
+    }
+    else { // NoViableAlt
+      Ref<Token> tok = e.getOffendingToken();
+      auto errToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() },
+        Token::INVALID_TYPE, tok->getText(), Token::DEFAULT_CHANNEL, -1, -1, // invalid start/stop
+        tok->getLine(), tok->getCharPositionInLine());
+      _ctx->addErrorNode(std::dynamic_pointer_cast<Token>(errToken));
+    }
+  }
+}
+
+Ref<Token> ParserInterpreter::recoverInline() {
+  return _errHandler->recoverInline(this);
+}
--- a/runtime/Cpp/runtime/ParserInterpreter.h
+++ b/runtime/Cpp/runtime/ParserInterpreter.h
@ -57,12 +57,22 @@ namespace runtime {
  /// </summary>
  class ParserInterpreter : public Parser {
  public:
+    // @deprecated
    ParserInterpreter(const std::wstring &grammarFileName, const std::vector<std::wstring>& tokenNames,
      const std::vector<std::wstring>& ruleNames, const atn::ATN &atn, TokenStream *input);
+    ParserInterpreter(const std::wstring &grammarFileName, Ref<dfa::Vocabulary> vocabulary,
+                      const std::vector<std::wstring> &ruleNames, const atn::ATN &atn, TokenStream *input);
    ~ParserInterpreter();

+    virtual void reset() override;
+    
    virtual const atn::ATN& getATN() const override;
+
+    // @deprecated
    virtual const std::vector<std::wstring>& getTokenNames() const override;
+
+    Ref<dfa::Vocabulary> getVocabulary() const;
+    
    virtual const std::vector<std::wstring>& getRuleNames() const override;
    virtual std::wstring getGrammarFileName() const override;

@ -71,23 +81,127 @@ namespace runtime {

    virtual void enterRecursionRule(Ref<ParserRuleContext> localctx, int state, int ruleIndex, int precedence) override;

+
+    /** Override this parser interpreters normal decision-making process
+     *  at a particular decision and input token index. Instead of
+     *  allowing the adaptive prediction mechanism to choose the
+     *  first alternative within a block that leads to a successful parse,
+     *  force it to take the alternative, 1..n for n alternatives.
+     *
+     *  As an implementation limitation right now, you can only specify one
+     *  override. This is sufficient to allow construction of different
+     *  parse trees for ambiguous input. It means re-parsing the entire input
+     *  in general because you're never sure where an ambiguous sequence would
+     *  live in the various parse trees. For example, in one interpretation,
+     *  an ambiguous input sequence would be matched completely in expression
+     *  but in another it could match all the way back to the root.
+     *
+     *  s : e '!'? ;
+     *  e : ID
+     *    | ID '!'
+     *    ;
+     *
+     *  Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
+     *  case, the ambiguous sequence is fully contained only by the root.
+     *  In the second case, the ambiguous sequences fully contained within just
+     *  e, as in: (e ID !).
+     *
+     *  Rather than trying to optimize this and make
+     *  some intelligent decisions for optimization purposes, I settled on
+     *  just re-parsing the whole input and then using
+     *  {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
+     *  subtree that contains the ambiguous sequence. I originally tried to
+     *  record the call stack at the point the parser detected and ambiguity but
+     *  left recursive rules create a parse tree stack that does not reflect
+     *  the actual call stack. That impedance mismatch was enough to make
+     *  it it challenging to restart the parser at a deeply nested rule
+     *  invocation.
+     *
+     *  Only parser interpreters can override decisions so as to avoid inserting
+     *  override checking code in the critical ALL(*) prediction execution path.
+     *
+     *  @since 4.5.1
+     */
+    void addDecisionOverride(int decision, int tokenIndex, int forcedAlt);
+    
+    Ref<InterpreterRuleContext> getOverrideDecisionRoot() const;
+    
+    /** Return the root of the parse, which can be useful if the parser
+     *  bails out. You still can access the top node. Note that,
+     *  because of the way left recursive rules add children, it's possible
+     *  that the root will not have any children if the start rule immediately
+     *  called and left recursive rule that fails.
+     *
+     * @since 4.5.1
+     */
+    Ref<InterpreterRuleContext> getRootContext();
+
  protected:
    const std::wstring _grammarFileName;
    std::vector<std::wstring> _tokenNames;
    const atn::ATN &_atn;

    std::vector<std::wstring> _ruleNames;
-    antlrcpp::BitSet _pushRecursionContextStates;

    std::vector<dfa::DFA> _decisionToDFA; // not shared like it is for generated parsers
    Ref<atn::PredictionContextCache> _sharedContextCache;

+    /** This stack corresponds to the _parentctx, _parentState pair of locals
+     *  that would exist on call stack frames with a recursive descent parser;
+     *  in the generated function for a left-recursive rule you'd see:
+     *
+     *  private EContext e(int _p) throws RecognitionException {
+     *      ParserRuleContext _parentctx = _ctx;    // Pair.a
+     *      int _parentState = getState();          // Pair.b
+     *      ...
+     *  }
+     *
+     *  Those values are used to create new recursive rule invocation contexts
+     *  associated with left operand of an alt like "expr '*' expr".
+     */
    std::stack<std::pair<Ref<ParserRuleContext>, int>> _parentContextStack;
    
+    /** We need a map from (decision,inputIndex)->forced alt for computing ambiguous
+     *  parse trees. For now, we allow exactly one override.
+     */
+    int _overrideDecision = -1;
+    int _overrideDecisionInputIndex = -1;
+    int _overrideDecisionAlt = -1;
+    bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop
+
+    /** What is the current context when we override a decision? This tells
+     *  us what the root of the parse tree is when using override
+     *  for an ambiguity/lookahead check.
+     */
+    Ref<InterpreterRuleContext> _overrideDecisionRoot;
+    Ref<InterpreterRuleContext> _rootContext;
+    
    virtual atn::ATNState *getATNState();
    virtual void visitState(atn::ATNState *p);
+
+    /** Method visitDecisionState() is called when the interpreter reaches
+     *  a decision state (instance of DecisionState). It gives an opportunity
+     *  for subclasses to track interesting things.
+     */
+    int visitDecisionState(atn::DecisionState *p);
+
+    /** Provide simple "factory" for InterpreterRuleContext's.
+     *  @since 4.5.1
+     */
+    Ref<InterpreterRuleContext> createInterpreterRuleContext(std::weak_ptr<ParserRuleContext> parent, int invokingStateNumber,
+                                                             int ruleIndex);
+
    virtual void visitRuleStopState(atn::ATNState *p);

+    /** Rely on the error handler for this parser but, if no tokens are consumed
+     *  to recover, add an error node. Otherwise, nothing is seen in the parse
+     *  tree.
+     */
+    void recover(RecognitionException &e);
+    Ref<Token> recoverInline();
+
+  private:
+    Ref<dfa::Vocabulary> _vocabulary;
  };

 } // namespace runtime
--- a/runtime/Cpp/runtime/Token.h
+++ b/runtime/Cpp/runtime/Token.h
@ -73,6 +73,19 @@ namespace runtime {
    /// </summary>
    static const size_t HIDDEN_CHANNEL = 1;

+    /**
+     * This is the minimum constant value which can be assigned to a
+     * user-defined token channel.
+     *
+     * <p>
+     * The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are
+     * assigned to the predefined channels {@link #DEFAULT_CHANNEL} and
+     * {@link #HIDDEN_CHANNEL}.</p>
+     *
+     * @see Token#getChannel()
+     */
+    static const size_t MIN_USER_CHANNEL_VALUE = 2;
+    
    /// <summary>
    /// Get the text of the token.
    /// </summary>
--- a/runtime/Cpp/runtime/TokenStreamRewriter.cpp
+++ b/runtime/Cpp/runtime/TokenStreamRewriter.cpp
@ -269,12 +269,16 @@ std::wstring TokenStreamRewriter::getText() {
  return getText(DEFAULT_PROGRAM_NAME, Interval(0, (int)tokens->size() - 1));
 }

+std::wstring TokenStreamRewriter::getText(std::wstring programName) {
+  return getText(programName, Interval(0, (int)tokens->size() - 1));
+}
+
 std::wstring TokenStreamRewriter::getText(const Interval &interval) {
  return getText(DEFAULT_PROGRAM_NAME, interval);
 }

 std::wstring TokenStreamRewriter::getText(const std::wstring &programName, const Interval &interval) {
-  std::vector<TokenStreamRewriter::RewriteOperation*> rewrites = _programs.at(programName);
+  std::vector<TokenStreamRewriter::RewriteOperation*> rewrites = _programs[programName];
  int start = interval.a;
  int stop = interval.b;

--- a/runtime/Cpp/runtime/TokenStreamRewriter.h
+++ b/runtime/Cpp/runtime/TokenStreamRewriter.h
@ -36,68 +36,82 @@ namespace antlr {
 namespace v4 {
 namespace runtime {

-  /// <summary>
-  /// Useful for rewriting out a buffered input token stream after doing some
-  ///  augmentation or other manipulations on it.
-  ///
-  ///  You can insert stuff, replace, and delete chunks.  Note that the
-  ///  operations are done lazily--only if you convert the buffer to a
-  ///  String with getText(). This is very efficient because you are not moving
-  ///  data around all the time.  As the buffer of tokens is converted to strings,
-  ///  the getText() method(s) scan the input token stream and check
-  ///  to see if there is an operation at the current index.
-  ///  If so, the operation is done and then normal String
-  ///  rendering continues on the buffer.  This is like having multiple Turing
-  ///  machine instruction streams (programs) operating on a single input tape. :)
-  ///
-  ///  This rewriter makes no modifications to the token stream. It does not
-  ///  ask the stream to fill itself up nor does it advance the input cursor.
-  ///  The token stream index() will return the same value before and after
-  ///  any getText() call.
-  ///
-  ///  The rewriter only works on tokens that you have in the buffer and
-  ///  ignores the current input cursor. If you are buffering tokens on-demand,
-  ///  calling getText() halfway through the input will only do rewrites
-  ///  for those tokens in the first half of the file.
-  ///
-  ///  Since the operations are done lazily at getText-time, operations do not
-  ///  screw up the token index values.  That is, an insert operation at token
-  ///  index i does not change the index values for tokens i+1..n-1.
-  ///
-  ///  Because operations never actually alter the buffer, you may always get
-  ///  the original token stream back without undoing anything.  Since
-  ///  the instructions are queued up, you can easily simulate transactions and
-  ///  roll back any changes if there is an error just by removing instructions.
-  ///  For example,
-  ///
-  ///   CharStream input = new ANTLRFileStream("input");
-  ///   TLexer lex = new TLexer(input);
-  ///   CommonTokenStream tokens = new CommonTokenStream(lex);
-  ///   T parser = new T(tokens);
-  ///   TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
-  ///   parser.startRule();
-  ///
-  ///    Then in the rules, you can execute (assuming rewriter is visible):
-  ///      Token t,u;
-  ///      ...
-  ///      rewriter.insertAfter(t, "text to put after t");}
-  ///       rewriter.insertAfter(u, "text after u");}
-  ///       System.out.println(tokens.toString());
-  ///
-  ///  You can also have multiple "instruction streams" and get multiple
-  ///  rewrites from a single pass over the input.  Just name the instruction
-  ///  streams and use that name again when printing the buffer.  This could be
-  ///  useful for generating a C file and also its header file--all from the
-  ///  same buffer:
-  ///
-  ///      tokens.insertAfter("pass1", t, "text to put after t");}
-  ///       tokens.insertAfter("pass2", u, "text after u");}
-  ///       System.out.println(tokens.toString("pass1"));
-  ///       System.out.println(tokens.toString("pass2"));
-  ///
-  ///  If you don't use named rewrite streams, a "default" stream is used as
-  ///  the first example shows.
-  /// </summary>
+  /**
+   * Useful for rewriting out a buffered input token stream after doing some
+   * augmentation or other manipulations on it.
+   *
+   * <p>
+   * You can insert stuff, replace, and delete chunks. Note that the operations
+   * are done lazily--only if you convert the buffer to a {@link String} with
+   * {@link TokenStream#getText()}. This is very efficient because you are not
+   * moving data around all the time. As the buffer of tokens is converted to
+   * strings, the {@link #getText()} method(s) scan the input token stream and
+   * check to see if there is an operation at the current index. If so, the
+   * operation is done and then normal {@link String} rendering continues on the
+   * buffer. This is like having multiple Turing machine instruction streams
+   * (programs) operating on a single input tape. :)</p>
+   *
+   * <p>
+   * This rewriter makes no modifications to the token stream. It does not ask the
+   * stream to fill itself up nor does it advance the input cursor. The token
+   * stream {@link TokenStream#index()} will return the same value before and
+   * after any {@link #getText()} call.</p>
+   *
+   * <p>
+   * The rewriter only works on tokens that you have in the buffer and ignores the
+   * current input cursor. If you are buffering tokens on-demand, calling
+   * {@link #getText()} halfway through the input will only do rewrites for those
+   * tokens in the first half of the file.</p>
+   *
+   * <p>
+   * Since the operations are done lazily at {@link #getText}-time, operations do
+   * not screw up the token index values. That is, an insert operation at token
+   * index {@code i} does not change the index values for tokens
+   * {@code i}+1..n-1.</p>
+   *
+   * <p>
+   * Because operations never actually alter the buffer, you may always get the
+   * original token stream back without undoing anything. Since the instructions
+   * are queued up, you can easily simulate transactions and roll back any changes
+   * if there is an error just by removing instructions. For example,</p>
+   *
+   * <pre>
+   * CharStream input = new ANTLRFileStream("input");
+   * TLexer lex = new TLexer(input);
+   * CommonTokenStream tokens = new CommonTokenStream(lex);
+   * T parser = new T(tokens);
+   * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
+   * parser.startRule();
+   * </pre>
+   *
+   * <p>
+   * Then in the rules, you can execute (assuming rewriter is visible):</p>
+   *
+   * <pre>
+   * Token t,u;
+   * ...
+   * rewriter.insertAfter(t, "text to put after t");}
+   * rewriter.insertAfter(u, "text after u");}
+   * System.out.println(rewriter.getText());
+   * </pre>
+   *
+   * <p>
+   * You can also have multiple "instruction streams" and get multiple rewrites
+   * from a single pass over the input. Just name the instruction streams and use
+   * that name again when printing the buffer. This could be useful for generating
+   * a C file and also its header file--all from the same buffer:</p>
+   *
+   * <pre>
+   * rewriter.insertAfter("pass1", t, "text to put after t");}
+   * rewriter.insertAfter("pass2", u, "text after u");}
+   * System.out.println(rewriter.getText("pass1"));
+   * System.out.println(rewriter.getText("pass2"));
+   * </pre>
+   *
+   * <p>
+   * If you don't use named rewrite streams, a "default" stream is used as the
+   * first example shows.</p>
+   */
  class TokenStreamRewriter {
  public:
    static const std::wstring DEFAULT_PROGRAM_NAME;
@ -153,6 +167,11 @@ namespace runtime {
    ///  instructions given to this rewriter.
    virtual std::wstring getText();

+    /** Return the text from the original tokens altered per the
+     *  instructions given to this rewriter in programName.
+     */
+    std::wstring getText(std::wstring programName);
+    
    /// <summary>
    /// Return the text associated with the tokens in the interval from the
    ///  original token stream but with the alterations given to this rewriter.
--- a/runtime/Cpp/runtime/UnbufferedCharStream.cpp
+++ b/runtime/Cpp/runtime/UnbufferedCharStream.cpp
@ -188,6 +188,10 @@ size_t UnbufferedCharStream::size() {
 }

 std::string UnbufferedCharStream::getSourceName() const {
+  if (name.empty()) {
+    return UNKNOWN_SOURCE_NAME;
+  }
+  
  return name;
 }

--- a/runtime/Cpp/runtime/atn/ATN.cpp
+++ b/runtime/Cpp/runtime/atn/ATN.cpp
@ -141,7 +141,7 @@ int ATN::defineDecisionState(DecisionState *s) {

 DecisionState *ATN::getDecisionState(int decision) const {
  if (!decisionToState.empty()) {
-    return decisionToState.at((size_t)decision);
+    return decisionToState[(size_t)decision];
  }
  return nullptr;
 }
--- a/runtime/Cpp/runtime/misc/Predicate.cpp
+++ b/runtime/Cpp/runtime/misc/Predicate.cpp
@ -0,0 +1,32 @@
+/*
+ * [The "BSD license"]
+ *  Copyright (c) 2016 Mike Lischke
+ *  Copyright (c) 2014 Terence Parr
+ *  Copyright (c) 2014 Dan McLaughlin
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "Predicate.h"
--- a/runtime/Cpp/runtime/misc/Predicate.h
+++ b/runtime/Cpp/runtime/misc/Predicate.h
@ -0,0 +1,50 @@
+/*
+ * [The "BSD license"]
+ *  Copyright (c) 2016 Mike Lischke
+ *  Copyright (c) 2014 Terence Parr
+ *  Copyright (c) 2014 Dan McLaughlin
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+namespace org {
+namespace antlr {
+namespace v4 {
+namespace runtime {
+namespace misc {
+
+  template<typename T>
+  class Predicate {
+  public:
+    virtual bool test(Ref<T> t) = 0;
+  };
+
+} // namespace tree
+} // namespace runtime
+} // namespace v4
+} // namespace antlr
+} // namespace org
--- a/runtime/Cpp/runtime/support/Declarations.h
+++ b/runtime/Cpp/runtime/support/Declarations.h
@ -97,6 +97,7 @@ namespace org {
          class MurmurHash;
          class ParseCancellationException;
          class Utils;
+          template <typename T> class Predicate;
        }
        namespace atn {
          class ATN;
--- a/runtime/Cpp/runtime/tree/AbstractParseTreeVisitor.h
+++ b/runtime/Cpp/runtime/tree/AbstractParseTreeVisitor.h
@ -53,17 +53,19 @@ namespace tree {
      return tree->accept(this);
    }

-    /// <summary>
-    /// {@inheritDoc}
-    /// <p/>
-    /// The default implementation initializes the aggregate result to
-    /// <seealso cref="#defaultResult defaultResult()"/>. Before visiting each child, it
-    /// calls <seealso cref="#shouldVisitNextChild shouldVisitNextChild"/>; if the result
-    /// is {@code false} no more children are visited and the current aggregate
-    /// result is returned. After visiting a child, the aggregate result is
-    /// updated by calling <seealso cref="#aggregateResult aggregateResult"/> with the
-    /// previous aggregate result and the result of visiting the child.
-    /// </summary>
+    /**
+     * <p>The default implementation initializes the aggregate result to
+     * {@link #defaultResult defaultResult()}. Before visiting each child, it
+     * calls {@link #shouldVisitNextChild shouldVisitNextChild}; if the result
+     * is {@code false} no more children are visited and the current aggregate
+     * result is returned. After visiting a child, the aggregate result is
+     * updated by calling {@link #aggregateResult aggregateResult} with the
+     * previous aggregate result and the result of visiting the child.</p>
+     *
+     * <p>The default implementation is not safe for use in visitors that modify
+     * the tree structure. Visitors that modify the tree should override this
+     * method to behave properly in respect to the specific algorithm in use.</p>
+     */
    virtual T* visitChildren(RuleNode *node) override {
      T* result = defaultResult();
      size_t n = node->getChildCount();
--- a/runtime/Cpp/runtime/tree/ParseTreeListener.h
+++ b/runtime/Cpp/runtime/tree/ParseTreeListener.h
@ -37,6 +37,17 @@ namespace v4 {
 namespace runtime {
 namespace tree {

+  /** This interface describes the minimal core of methods triggered
+   *  by {@link ParseTreeWalker}. E.g.,
+   *
+   *  	ParseTreeWalker walker = new ParseTreeWalker();
+   *		walker.walk(myParseTreeListener, myParseTree); <-- triggers events in your listener
+   *
+   *  If you want to trigger events in multiple listeners during a single
+   *  tree walk, you can use the ParseTreeDispatcher object available at
+   *
+   * 		https://github.com/antlr/antlr4/issues/841
+   */
  class ParseTreeListener {
  public:
    virtual void visitTerminal(Ref<TerminalNode> node) = 0;
--- a/runtime/Cpp/runtime/tree/SyntaxTree.h
+++ b/runtime/Cpp/runtime/tree/SyntaxTree.h
@ -45,15 +45,23 @@ namespace tree {
  ///  between parse trees and other kinds of syntax trees we might want to create.
  /// </summary>
  class SyntaxTree : public Tree {
-    /// <summary>
-    /// Return an <seealso cref="Interval"/> indicating the index in the
-    /// <seealso cref="TokenStream"/> of the first and last token associated with this
-    /// subtree. If this node is a leaf, then the interval represents a single
-    /// token.
-    /// <p/>
-    /// If source interval is unknown, this returns <seealso cref="Interval#INVALID"/>.
-    /// </summary>
  public:
+    /**
+     * Return an {@link Interval} indicating the index in the
+     * {@link TokenStream} of the first and last token associated with this
+     * subtree. If this node is a leaf, then the interval represents a single
+     * token and has interval i..i for token index i.
+     *
+     * <p>An interval of i..i-1 indicates an empty interval at position
+     * i in the input stream, where 0 &lt;= i &lt;= the size of the input
+     * token stream.  Currently, the code base can only have i=0..n-1 but
+     * in concept one could have an empty interval after EOF. </p>
+     *
+     * <p>If source interval is unknown, this returns {@link Interval#INVALID}.</p>
+     *
+     * <p>As a weird special case, the source interval for rules matched after
+     * EOF is unspecified.</p>
+     */
    virtual misc::Interval getSourceInterval() = 0;
  };

--- a/runtime/Cpp/runtime/tree/Tree.cpp
+++ b/runtime/Cpp/runtime/tree/Tree.cpp
@ -32,3 +32,7 @@
 #include "Tree.h"

 using namespace org::antlr::v4::runtime::tree;
+
+bool Tree::operator == (const Tree &other) const {
+  return &other == this;
+}
--- a/runtime/Cpp/runtime/tree/Tree.h
+++ b/runtime/Cpp/runtime/tree/Tree.h
@ -78,6 +78,8 @@ namespace tree {

    virtual std::wstring toString() = 0;

+    virtual bool operator == (const Tree &other) const;
+    
  protected:
    virtual std::weak_ptr<Tree> getParentReference() = 0;
    virtual Ref<Tree> getChildReference(size_t i) = 0;
--- a/runtime/Cpp/runtime/tree/Trees.cpp
+++ b/runtime/Cpp/runtime/tree/Trees.cpp
@ -34,14 +34,22 @@
 #include "ParserRuleContext.h"
 #include "CPPUtils.h"
 #include "TerminalNodeImpl.h"
+#include "ATN.h"
+#include "Interval.h"
+#include "CommonToken.h"
+#include "Predicate.h"

 #include "Trees.h"

 using namespace org::antlr::v4::runtime;
+using namespace org::antlr::v4::runtime::misc;
 using namespace org::antlr::v4::runtime::tree;

 using namespace antlrcpp;

+Trees::Trees() {
+}
+
 std::wstring Trees::toStringTree(Ref<Tree> t) {
  return toStringTree(t, nullptr);
 }
@ -76,11 +84,15 @@ std::wstring Trees::getNodeText(Ref<Tree> t, Parser *recog) {

 std::wstring Trees::getNodeText(Ref<Tree> t, const std::vector<std::wstring> &ruleNames) {
  if (ruleNames.size() > 0) {
-    if (is<RuleNode>(t)) {
-      ssize_t ruleIndex = (std::static_pointer_cast<RuleNode>(t))->getRuleContext()->getRuleIndex();
+    if (is<RuleContext>(t)) {
+      ssize_t ruleIndex = std::static_pointer_cast<RuleContext>(t)->getRuleContext()->getRuleIndex();
      if (ruleIndex < 0)
        return L"Invalid Rule Index";
      std::wstring ruleName = ruleNames[(size_t)ruleIndex];
+      int altNumber = std::static_pointer_cast<RuleContext>(t)->getAltNumber();
+      if (altNumber != atn::ATN::INVALID_ALT_NUMBER) {
+        return ruleName + L":" + std::to_wstring(altNumber);
+      }
      return ruleName;
    } else if (is<ErrorNode>(t)) {
      return t->toString();
@ -141,6 +153,21 @@ static void _findAllNodes(Ref<ParseTree> t, int index, bool findTokens, std::vec
  }
 }

+bool Trees::isAncestorOf(Ref<Tree> t, Ref<Tree> u) {
+  if (t == nullptr || u == nullptr || t->getParent().expired()) {
+    return false;
+  }
+
+  Ref<Tree> p = u->getParent().lock();
+  while (p != nullptr) {
+    if (t == p) {
+      return true;
+    }
+    p = p->getParent().lock();
+  }
+  return false;
+}
+
 std::vector<Ref<ParseTree>> Trees::findAllTokenNodes(Ref<ParseTree> t, int ttype) {
  return findAllNodes(t, ttype, true);
 }
@ -155,12 +182,12 @@ std::vector<Ref<ParseTree>> Trees::findAllNodes(Ref<ParseTree> t, int index, boo
  return nodes;
 }

-std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
+std::vector<Ref<ParseTree>> Trees::getDescendants(Ref<ParseTree> t) {
  std::vector<Ref<ParseTree>> nodes;
  nodes.push_back(t);
  std::size_t n = t->getChildCount();
  for (size_t i = 0 ; i < n ; i++) {
-    auto descentants = descendants(t->getChild(i));
+    auto descentants = getDescendants(t->getChild(i));
    for (auto entry: descentants) {
      nodes.push_back(entry);
    }
@ -168,5 +195,62 @@ std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
  return nodes;
 }

-Trees::Trees() {
+std::vector<Ref<ParseTree>> Trees::descendants(Ref<ParseTree> t) {
+  return getDescendants(t);
 }
+
+Ref<ParserRuleContext> Trees::getRootOfSubtreeEnclosingRegion(Ref<ParseTree> t, size_t startTokenIndex,
+                                                              size_t stopTokenIndex) {
+  size_t n = t->getChildCount();
+  for (size_t i = 0; i<n; i++) {
+    Ref<ParseTree> child = t->getChild(i);
+    Ref<ParserRuleContext> r = getRootOfSubtreeEnclosingRegion(child, startTokenIndex, stopTokenIndex);
+    if (r != nullptr) {
+      return r;
+    }
+  }
+
+  if (is<ParserRuleContext>(t)) {
+    Ref<ParserRuleContext> r = std::static_pointer_cast<ParserRuleContext>(t);
+    if ((int)startTokenIndex >= r->getStart()->getTokenIndex() && // is range fully contained in t?
+        (r->getStop() == nullptr || (int)stopTokenIndex <= r->getStop()->getTokenIndex())) {
+      // note: r.getStop()==null likely implies that we bailed out of parser and there's nothing to the right
+      return r;
+    }
+  }
+  return nullptr;
+}
+
+void Trees::stripChildrenOutOfRange(Ref<ParserRuleContext> t, Ref<ParserRuleContext> root, size_t startIndex, size_t stopIndex) {
+  if (t == nullptr) {
+    return;
+  }
+
+  for (size_t i = 0; i < t->getChildCount(); ++i) {
+    Ref<ParseTree> child = t->getChild(i);
+    Interval range = child->getSourceInterval();
+    if (is<ParserRuleContext>(child) && (range.b < (int)startIndex || range.a > (int)stopIndex)) {
+      if (isAncestorOf(child, root)) { // replace only if subtree doesn't have displayed root
+        Ref<CommonToken> abbrev = std::make_shared<CommonToken>(Token::INVALID_TYPE, L"...");
+        t->children[i] = std::make_shared<TerminalNodeImpl>(abbrev);
+      }
+    }
+  }
+}
+
+Ref<Tree> Trees::findNodeSuchThat(Ref<Tree> t, Ref<Predicate<Tree>> pred) {
+  if (pred->test(t)) {
+    return t;
+  }
+
+  size_t n = t->getChildCount();
+  for (size_t i = 0 ; i < n ; ++i) {
+    Ref<Tree> u = findNodeSuchThat(t->getChild(i), pred);
+    if (u != nullptr) {
+      return u;
+    }
+  }
+
+  return nullptr;
+}
+
--- a/runtime/Cpp/runtime/tree/Trees.h
+++ b/runtime/Cpp/runtime/tree/Trees.h
@ -68,12 +68,52 @@ namespace tree {
    /// Return a list of all ancestors of this node.  The first node of
    ///  list is the root and the last is the parent of this node.
    static std::vector<std::weak_ptr<Tree>> getAncestors(Ref<Tree> t);
+
+    /** Return true if t is u's parent or a node on path to root from u.
+     *  Use == not equals().
+     *
+     *  @since 4.5.1
+     */
+    static bool isAncestorOf(Ref<Tree> t, Ref<Tree> u);    
    static std::vector<Ref<ParseTree>> findAllTokenNodes(Ref<ParseTree> t, int ttype);
    static std::vector<Ref<ParseTree>> findAllRuleNodes(Ref<ParseTree> t, int ruleIndex);
    static std::vector<Ref<ParseTree>> findAllNodes(Ref<ParseTree> t, int index, bool findTokens);

-    static std::vector<Ref<ParseTree>> descendants(Ref<ParseTree> t);
+    /** Get all descendents; includes t itself.
+     *
+     * @since 4.5.1
+     */
+    static std::vector<Ref<ParseTree>> getDescendants(Ref<ParseTree> t);

+    /** @deprecated */
+    static std::vector<Ref<ParseTree>> descendants(Ref<ParseTree> t);
+    
+    /** Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex
+     *  inclusively using postorder traversal.  Recursive depth-first-search.
+     *
+     *  @since 4.5.1
+     */
+    static Ref<ParserRuleContext> getRootOfSubtreeEnclosingRegion(Ref<ParseTree> t,
+                                                                  size_t startTokenIndex, // inclusive
+                                                                  size_t stopTokenIndex); // inclusive
+   
+    /** Replace any subtree siblings of root that are completely to left
+     *  or right of lookahead range with a CommonToken(Token.INVALID_TYPE,"...")
+     *  node. The source interval for t is not altered to suit smaller range!
+     *
+     *  WARNING: destructive to t.
+     *
+     *  @since 4.5.1
+     */
+    static void stripChildrenOutOfRange(Ref<ParserRuleContext> t, Ref<ParserRuleContext> root, size_t startIndex,
+                                        size_t stopIndex);
+    
+    /** Return first node satisfying the pred
+     *
+     *  @since 4.5.1
+     */
+    static Ref<Tree> findNodeSuchThat(Ref<Tree> t, Ref<misc::Predicate<Tree>> pred);
+    
  private:
    Trees();
  };
--- a/runtime/Cpp/runtime/tree/pattern/ParseTreeMatch.cpp
+++ b/runtime/Cpp/runtime/tree/pattern/ParseTreeMatch.cpp
@ -47,7 +47,7 @@ ParseTreeMatch::ParseTreeMatch(Ref<ParseTree> tree, const ParseTreePattern &patt

 Ref<ParseTree> ParseTreeMatch::get(const std::wstring &label) {
  auto iterator = _labels.find(label);
-  if (iterator == _labels.end()) {
+  if (iterator == _labels.end() || iterator->second.empty()) {
    return nullptr;
  }

--- a/runtime/Cpp/runtime/tree/pattern/ParseTreePatternMatcher.cpp
+++ b/runtime/Cpp/runtime/tree/pattern/ParseTreePatternMatcher.cpp
@ -40,6 +40,7 @@
 #include "TagChunk.h"
 #include "ATN.h"
 #include "Lexer.h"
+#include "BailErrorStrategy.h"

 #include "ListTokenSource.h"
 #include "TextChunk.h"
@ -56,7 +57,7 @@ using namespace org::antlr::v4::runtime::tree;
 using namespace org::antlr::v4::runtime::tree::pattern;
 using namespace antlrcpp;

-ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(std::exception e) {
+ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(const RuntimeException &e) : RuntimeException(e.what()) {
 }

 ParseTreePatternMatcher::ParseTreePatternMatcher(Lexer *lexer, Parser *parser) : _lexer(lexer), _parser(parser) {
@ -109,12 +110,17 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::wstring &pattern, i
    delete tokens;
  });

-  ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getTokenNames(),
+  ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getVocabulary(),
                                 _parser->getRuleNames(), _parser->getATNWithBypassAlts(), tokens);

+  Ref<ParserRuleContext> tree;
  try {
-    Ref<ParserRuleContext> context = parserInterp.parse(patternRuleIndex);
-    return ParseTreePattern(this, pattern, patternRuleIndex, context);
+    parserInterp.setErrorHandler(std::make_shared<BailErrorStrategy>());
+    tree = parserInterp.parse(patternRuleIndex);
+  } catch (ParseCancellationException &e) {
+    std::rethrow_if_nested(e);
+  } catch (RecognitionException &re) {
+    throw re;
  } catch (std::exception &e) {
 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026
    // throw_with_nested is not available before VS 2015.
@ -124,6 +130,12 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::wstring &pattern, i
 #endif
  }

+  // Make sure tree pattern compilation checks for a complete parse
+  if (tokens->LA(1) != EOF) {
+    throw StartRuleDoesNotConsumeFullPattern();
+  }
+  
+  return ParseTreePattern(this, pattern, patternRuleIndex, tree);
 }

 Lexer* ParseTreePatternMatcher::getLexer() {
--- a/runtime/Cpp/runtime/tree/pattern/ParseTreePatternMatcher.h
+++ b/runtime/Cpp/runtime/tree/pattern/ParseTreePatternMatcher.h
@ -31,7 +31,7 @@

 #pragma once

-#include "Token.h"
+#include "Exceptions.h"

 namespace org {
 namespace antlr {
@ -99,9 +99,14 @@ namespace pattern {
  /// </summary>
  class ParseTreePatternMatcher {
  public:
-    class CannotInvokeStartRule : public std::exception {
+    class CannotInvokeStartRule : public RuntimeException {
    public:
-      CannotInvokeStartRule(std::exception e);
+      CannotInvokeStartRule(const RuntimeException &e);
+    };
+
+    // Fixes https://github.com/antlr/antlr4/issues/413
+    // "Tree pattern compilation doesn't check for a complete parse"
+    class StartRuleDoesNotConsumeFullPattern : public RuntimeException {
    };

    /// Constructs a <seealso cref="ParseTreePatternMatcher"/> or from a <seealso cref="Lexer"/> and