Switching to current ANTLR revision, part 3.

- Everything from atn + dfa subfolders is updated now. - IntervalSet changed behavior a bit which needed updated unit tests.
2016-04-30 15:08:28 +02:00 · 2016-04-30 15:08:28 +02:00 · d4ebdfa138
parent 99ada0550f
commit d4ebdfa138
8 changed files with 531 additions and 499 deletions
--- a/runtime/Cpp/demo/Mac/antlr4-cpp-demo/main.cpp
+++ b/runtime/Cpp/demo/Mac/antlr4-cpp-demo/main.cpp
@ -17,7 +17,16 @@
 using namespace antlrcpptest;
 using namespace org::antlr::v4::runtime;

+class A {
+public:
+  static void doit(const A &a) {
+    size_t i = a.counter;
+  }
+private:
+  size_t counter;
+};
 int main(int argc, const char * argv[]) {
+
  ANTLRInputStream input(L"divideŴ and conquer");
  TLexer lexer(&input);
  CommonTokenStream tokens(&lexer);
--- a/runtime/Cpp/demo/Mac/antlrcpp
+++ b/runtime/Cpp/demo/Mac/antlrcpp
@ -326,7 +326,7 @@ using namespace antlrcpp;
  XCTAssert(set2.contains(1111));
  XCTAssertFalse(set2.contains(10000));
  XCTAssertEqual(set2.getSingleElement(), Token::INVALID_TYPE);
-  XCTAssertEqual(set2.getMinElement(), 10);
+  XCTAssertEqual(set2.getMinElement(), -1);
  XCTAssertEqual(set2.getMaxElement(), 2000);

  IntervalSet set3(set2);
@ -368,7 +368,13 @@ using namespace antlrcpp;
  catch (IllegalStateException &e) {
  }

-  set4.setReadOnly(false);
+  try {
+    set4.setReadOnly(false);
+    XCTFail(@"Expected exception");
+  }
+  catch (IllegalStateException &e) {
+  }
+  
  set4 = IntervalSet::of(12345);
  XCTAssertEqual(set4.getSingleElement(), 12345);
  XCTAssertEqual(set4.getMinElement(), 12345);
--- a/runtime/Cpp/runtime/atn/PredictionContext.h
+++ b/runtime/Cpp/runtime/atn/PredictionContext.h
@ -156,87 +156,66 @@ namespace atn {
    static Ref<PredictionContext> mergeSingletons(Ref<SingletonPredictionContext> a,
      Ref<SingletonPredictionContext> b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache);

-    /// <summary>
-    /// Handle case where at least one of {@code a} or {@code b} is
-    /// <seealso cref="#EMPTY"/>. In the following diagrams, the symbol {@code $} is used
-    /// to represent <seealso cref="#EMPTY"/>.
-    ///
-    /// <h2>Local-Context Merges</h2>
-    ///
-    /// These local-context merge operations are used when {@code rootIsWildcard}
-    /// is true.
-    ///
-    /// <p/>
-    ///
-    /// <seealso cref="#EMPTY"/> is superset of any graph; return <seealso cref="#EMPTY"/>.<br/>
-    /// <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// <seealso cref="#EMPTY"/> and anything is {@code #EMPTY}, so merged parent is
-    /// {@code #EMPTY}; return left graph.<br/>
-    /// <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// Special case of last merge if local context.<br/>
-    /// <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/>
-    ///
-    /// <h2>Full-Context Merges</h2>
-    ///
-    /// These full-context merge operations are used when {@code rootIsWildcard}
-    /// is false.
-    ///
-    /// <p/>
-    ///
-    /// <embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// Must keep all contexts; <seealso cref="#EMPTY"/> in array is a special value (and
-    /// null parent).<br/>
-    /// <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// <embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/>
-    /// </summary>
-    /// <param name="a"> the first <seealso cref="SingletonPredictionContext"/> </param>
-    /// <param name="b"> the second <seealso cref="SingletonPredictionContext"/> </param>
-    /// <param name="rootIsWildcard"> {@code true} if this is a local-context merge,
-    /// otherwise false to indicate a full-context merge </param>
+    /**
+     * Handle case where at least one of {@code a} or {@code b} is
+     * {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used
+     * to represent {@link #EMPTY}.
+     *
+     * <h2>Local-Context Merges</h2>
+     *
+     * <p>These local-context merge operations are used when {@code rootIsWildcard}
+     * is true.</p>
+     *
+     * <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br>
+     * <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
+     *
+     * <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is
+     * {@code #EMPTY}; return left graph.<br>
+     * <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p>
+     *
+     * <p>Special case of last merge if local context.<br>
+     * <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p>
+     *
+     * <h2>Full-Context Merges</h2>
+     *
+     * <p>These full-context merge operations are used when {@code rootIsWildcard}
+     * is false.</p>
+     *
+     * <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p>
+     *
+     * <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and
+     * null parent).<br>
+     * <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
+     *
+     * <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p>
+     *
+     * @param a the first {@link SingletonPredictionContext}
+     * @param b the second {@link SingletonPredictionContext}
+     * @param rootIsWildcard {@code true} if this is a local-context merge,
+     * otherwise false to indicate a full-context merge
+     */
    static Ref<PredictionContext> mergeRoot(Ref<SingletonPredictionContext> a,
      Ref<SingletonPredictionContext> b, bool rootIsWildcard);

-    /// <summary>
-    /// Merge two <seealso cref="ArrayPredictionContext"/> instances.
-    ///
-    /// <p/>
-    ///
-    /// Different tops, different parents.<br/>
-    /// <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// Shared top, same parents.<br/>
-    /// <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// Shared top, different parents.<br/>
-    /// <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// Shared top, all shared parents.<br/>
-    /// <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/>
-    ///
-    /// <p/>
-    ///
-    /// Equal tops, merge parents and reduce top to
-    /// <seealso cref="SingletonPredictionContext"/>.<br/>
-    /// <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/>
-    /// </summary>
+    /**
+     * Merge two {@link ArrayPredictionContext} instances.
+     *
+     * <p>Different tops, different parents.<br>
+     * <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p>
+     *
+     * <p>Shared top, same parents.<br>
+     * <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p>
+     *
+     * <p>Shared top, different parents.<br>
+     * <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p>
+     *
+     * <p>Shared top, all shared parents.<br>
+     * <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p>
+     *
+     * <p>Equal tops, merge parents and reduce top to
+     * {@link SingletonPredictionContext}.<br>
+     * <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p>
+     */
    static Ref<PredictionContext> mergeArrays(Ref<ArrayPredictionContext> a,
      Ref<ArrayPredictionContext> b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache);

--- a/runtime/Cpp/runtime/atn/PredictionMode.cpp
+++ b/runtime/Cpp/runtime/atn/PredictionMode.cpp
@ -43,6 +43,10 @@ using namespace antlrcpp;

 struct AltAndContextConfigHasher
 {
+  /**
+   * The hash code is only a function of the {@link ATNState#stateNumber}
+   * and {@link ATNConfig#context}.
+   */
  size_t operator () (const ATNConfig &o) const {
    size_t hashCode = misc::MurmurHash::initialize(7);
    hashCode = misc::MurmurHash::update(hashCode, (size_t)o.state->stateNumber);
@ -172,6 +176,14 @@ antlrcpp::BitSet PredictionModeClass::getAlts(const std::vector<antlrcpp::BitSet
  return all;
 }

+antlrcpp::BitSet PredictionModeClass::getAlts(Ref<ATNConfigSet> configs) {
+  antlrcpp::BitSet alts;
+  for (auto config : configs->configs) {
+    alts.set(config->alt);
+  }
+  return alts;
+}
+
 std::vector<antlrcpp::BitSet> PredictionModeClass::getConflictingAltSubsets(Ref<ATNConfigSet> configs) {
  std::unordered_map<Ref<ATNConfig>, antlrcpp::BitSet, AltAndContextConfigHasher, AltAndContextConfigComparer> configToAlts;
  for (auto config : configs->configs) {
--- a/runtime/Cpp/runtime/atn/PredictionMode.h
+++ b/runtime/Cpp/runtime/atn/PredictionMode.h
@ -39,171 +39,169 @@ namespace v4 {
 namespace runtime {
 namespace atn {

+  /**
+   * This enumeration defines the prediction modes available in ANTLR 4 along with
+   * utility methods for analyzing configuration sets for conflicts and/or
+   * ambiguities.
+   */
  enum class PredictionMode {
-    /// <summary>
-    /// Do only local context prediction (SLL style) and using
-    ///  heuristic which almost always works but is much faster
-    ///  than precise answer.
-    /// </summary>
+    /**
+     * The SLL(*) prediction mode. This prediction mode ignores the current
+     * parser context when making predictions. This is the fastest prediction
+     * mode, and provides correct results for many grammars. This prediction
+     * mode is more powerful than the prediction mode provided by ANTLR 3, but
+     * may result in syntax errors for grammar and input combinations which are
+     * not SLL.
+     *
+     * <p>
+     * When using this prediction mode, the parser will either return a correct
+     * parse tree (i.e. the same parse tree that would be returned with the
+     * {@link #LL} prediction mode), or it will report a syntax error. If a
+     * syntax error is encountered when using the {@link #SLL} prediction mode,
+     * it may be due to either an actual syntax error in the input or indicate
+     * that the particular combination of grammar and input requires the more
+     * powerful {@link #LL} prediction abilities to complete successfully.</p>
+     *
+     * <p>
+     * This prediction mode does not provide any guarantees for prediction
+     * behavior for syntactically-incorrect inputs.</p>
+     */
    SLL,

-    /// <summary>
-    /// Full LL(*) that always gets right answer. For speed
-    ///  reasons, we terminate the prediction process when we know for
-    ///  sure which alt to predict. We don't always know what
-    ///  the ambiguity is in this mode.
-    /// </summary>
+    /**
+     * The LL(*) prediction mode. This prediction mode allows the current parser
+     * context to be used for resolving SLL conflicts that occur during
+     * prediction. This is the fastest prediction mode that guarantees correct
+     * parse results for all combinations of grammars with syntactically correct
+     * inputs.
+     *
+     * <p>
+     * When using this prediction mode, the parser will make correct decisions
+     * for all syntactically-correct grammar and input combinations. However, in
+     * cases where the grammar is truly ambiguous this prediction mode might not
+     * report a precise answer for <em>exactly which</em> alternatives are
+     * ambiguous.</p>
+     *
+     * <p>
+     * This prediction mode does not provide any guarantees for prediction
+     * behavior for syntactically-incorrect inputs.</p>
+     */
    LL,

-    /// <summary>
-    /// Tell the full LL prediction algorithm to pursue lookahead until
-    ///  it has uniquely predicted an alternative without conflict or it's
-    ///  certain that it's found an ambiguous input sequence.  when this
-    ///  variable is false. When true, the prediction process will
-    ///  continue looking for the exact ambiguous sequence even if
-    ///  it has already figured out which alternative to predict.
-    /// </summary>
+    /**
+     * The LL(*) prediction mode with exact ambiguity detection. In addition to
+     * the correctness guarantees provided by the {@link #LL} prediction mode,
+     * this prediction mode instructs the prediction algorithm to determine the
+     * complete and exact set of ambiguous alternatives for every ambiguous
+     * decision encountered while parsing.
+     *
+     * <p>
+     * This prediction mode may be used for diagnosing ambiguities during
+     * grammar development. Due to the performance overhead of calculating sets
+     * of ambiguous alternatives, this prediction mode should be avoided when
+     * the exact results are not necessary.</p>
+     *
+     * <p>
+     * This prediction mode does not provide any guarantees for prediction
+     * behavior for syntactically-incorrect inputs.</p>
+     */
    LL_EXACT_AMBIG_DETECTION
  };

  class PredictionModeClass {
-
  public:
-    /// <summary>
-    /// Computes the SLL prediction termination condition.
-    ///
-    /// <p/>
-    ///
-    /// This method computes the SLL prediction termination condition for both of
-    /// the following cases.
-    ///
-    /// <ul>
-    /// <li>The usual SLL+LL fallback upon SLL conflict</li>
-    /// <li>Pure SLL without LL fallback</li>
-    /// </ul>
-    ///
-    /// <p/>
-    ///
-    /// <strong>COMBINED SLL+LL PARSING</strong>
-    ///
-    /// <p/>
-    ///
-    /// When LL-fallback is enabled upon SLL conflict, correct predictions are
-    /// ensured regardless of how the termination condition is computed by this
-    /// method. Due to the substantially higher cost of LL prediction, the
-    /// prediction should only fall back to LL when the additional lookahead
-    /// cannot lead to a unique SLL prediction.
-    ///
-    /// <p/>
-    ///
-    /// Assuming combined SLL+LL parsing, an SLL configuration set with only
-    /// conflicting subsets should fall back to full LL, even if the
-    /// configuration sets don't resolve to the same alternative (e.g.
-    /// {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
-    /// configuration, SLL could continue with the hopes that more lookahead will
-    /// resolve via one of those non-conflicting configurations.
-    ///
-    /// <p/>
-    ///
-    /// Here's the prediction termination rule them: SLL (for SLL+LL parsing)
-    /// stops when it sees only conflicting configuration subsets. In contrast,
-    /// full LL keeps going when there is uncertainty.
-    ///
-    /// <p/>
-    ///
-    /// <strong>HEURISTIC</strong>
-    ///
-    /// <p/>
-    ///
-    /// As a heuristic, we stop prediction when we see any conflicting subset
-    /// unless we see a state that only has one alternative associated with it.
-    /// The single-alt-state thing lets prediction continue upon rules like
-    /// (otherwise, it would admit defeat too soon):
-    ///
-    /// <p/>
-    ///
-    /// {@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}
-    ///
-    /// <p/>
-    ///
-    /// When the ATN simulation reaches the state before {@code ';'}, it has a
-    /// DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
-    /// {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
-    /// processing this node because alternative to has another way to continue,
-    /// via {@code [6|2|[]]}.
-    ///
-    /// <p/>
-    ///
-    /// It also let's us continue for this rule:
-    ///
-    /// <p/>
-    ///
-    /// {@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}
-    ///
-    /// <p/>
-    ///
-    /// After matching input A, we reach the stop state for rule A, state 1.
-    /// State 8 is the state right before B. Clearly alternatives 1 and 2
-    /// conflict and no amount of further lookahead will separate the two.
-    /// However, alternative 3 will be able to continue and so we do not stop
-    /// working on this state. In the previous example, we're concerned with
-    /// states associated with the conflicting alternatives. Here alt 3 is not
-    /// associated with the conflicting configs, but since we can continue
-    /// looking for input reasonably, don't declare the state done.
-    ///
-    /// <p/>
-    ///
-    /// <strong>PURE SLL PARSING</strong>
-    ///
-    /// <p/>
-    ///
-    /// To handle pure SLL parsing, all we have to do is make sure that we
-    /// combine stack contexts for configurations that differ only by semantic
-    /// predicate. From there, we can do the usual SLL termination heuristic.
-    ///
-    /// <p/>
-    ///
-    /// <strong>PREDICATES IN SLL+LL PARSING</strong>
-    ///
-    /// <p/>
-    ///
-    /// SLL decisions don't evaluate predicates until after they reach DFA stop
-    /// states because they need to create the DFA cache that works in all
-    /// semantic situations. In contrast, full LL evaluates predicates collected
-    /// during start state computation so it can ignore predicates thereafter.
-    /// This means that SLL termination detection can totally ignore semantic
-    /// predicates.
-    ///
-    /// <p/>
-    ///
-    /// Implementation-wise, <seealso cref="ATNConfigSet"/> combines stack contexts
-    /// but not
-    /// semantic predicate contexts so we might see two configurations like the
-    /// following.
-    ///
-    /// <p/>
-    ///
-    /// {@code (s, 1, x, {}), (s, 1, x', {p})}
-    ///
-    /// <p/>
-    ///
-    /// Before testing these configurations against others, we have to merge
-    /// {@code x} and {@code x'} (without modifying the existing configurations).
-    /// For example, we test {@code (x+x')==x''} when looking for conflicts in
-    /// the following configurations.
-    ///
-    /// <p/>
-    ///
-    /// {@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}
-    ///
-    /// <p/>
-    ///
-    /// If the configuration set has predicates (as indicated by
-    /// <seealso cref="ATNConfigSet#hasSemanticContext"/>), this algorithm makes a
-    /// copy of
-    /// the configurations to strip out all of the predicates so that a standard
-    /// <seealso cref="ATNConfigSet"/> will merge everything ignoring predicates.
-    /// </summary>
+    /**
+     * Computes the SLL prediction termination condition.
+     *
+     * <p>
+     * This method computes the SLL prediction termination condition for both of
+     * the following cases.</p>
+     *
+     * <ul>
+     * <li>The usual SLL+LL fallback upon SLL conflict</li>
+     * <li>Pure SLL without LL fallback</li>
+     * </ul>
+     *
+     * <p><strong>COMBINED SLL+LL PARSING</strong></p>
+     *
+     * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
+     * ensured regardless of how the termination condition is computed by this
+     * method. Due to the substantially higher cost of LL prediction, the
+     * prediction should only fall back to LL when the additional lookahead
+     * cannot lead to a unique SLL prediction.</p>
+     *
+     * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
+     * conflicting subsets should fall back to full LL, even if the
+     * configuration sets don't resolve to the same alternative (e.g.
+     * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
+     * configuration, SLL could continue with the hopes that more lookahead will
+     * resolve via one of those non-conflicting configurations.</p>
+     *
+     * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
+     * stops when it sees only conflicting configuration subsets. In contrast,
+     * full LL keeps going when there is uncertainty.</p>
+     *
+     * <p><strong>HEURISTIC</strong></p>
+     *
+     * <p>As a heuristic, we stop prediction when we see any conflicting subset
+     * unless we see a state that only has one alternative associated with it.
+     * The single-alt-state thing lets prediction continue upon rules like
+     * (otherwise, it would admit defeat too soon):</p>
+     *
+     * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
+     *
+     * <p>When the ATN simulation reaches the state before {@code ';'}, it has a
+     * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
+     * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
+     * processing this node because alternative to has another way to continue,
+     * via {@code [6|2|[]]}.</p>
+     *
+     * <p>It also let's us continue for this rule:</p>
+     *
+     * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
+     *
+     * <p>After matching input A, we reach the stop state for rule A, state 1.
+     * State 8 is the state right before B. Clearly alternatives 1 and 2
+     * conflict and no amount of further lookahead will separate the two.
+     * However, alternative 3 will be able to continue and so we do not stop
+     * working on this state. In the previous example, we're concerned with
+     * states associated with the conflicting alternatives. Here alt 3 is not
+     * associated with the conflicting configs, but since we can continue
+     * looking for input reasonably, don't declare the state done.</p>
+     *
+     * <p><strong>PURE SLL PARSING</strong></p>
+     *
+     * <p>To handle pure SLL parsing, all we have to do is make sure that we
+     * combine stack contexts for configurations that differ only by semantic
+     * predicate. From there, we can do the usual SLL termination heuristic.</p>
+     *
+     * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
+     *
+     * <p>SLL decisions don't evaluate predicates until after they reach DFA stop
+     * states because they need to create the DFA cache that works in all
+     * semantic situations. In contrast, full LL evaluates predicates collected
+     * during start state computation so it can ignore predicates thereafter.
+     * This means that SLL termination detection can totally ignore semantic
+     * predicates.</p>
+     *
+     * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
+     * semantic predicate contexts so we might see two configurations like the
+     * following.</p>
+     *
+     * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
+     *
+     * <p>Before testing these configurations against others, we have to merge
+     * {@code x} and {@code x'} (without modifying the existing configurations).
+     * For example, we test {@code (x+x')==x''} when looking for conflicts in
+     * the following configurations.</p>
+     *
+     * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
+     *
+     * <p>If the configuration set has predicates (as indicated by
+     * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
+     * the configurations to strip out all of the predicates so that a standard
+     * {@link ATNConfigSet} will merge everything ignoring predicates.</p>
+     */
    static bool hasSLLConflictTerminatingPrediction(PredictionMode *mode, Ref<ATNConfigSet> configs);

    /// <summary>
@ -230,190 +228,147 @@ namespace atn {
    /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns>
    static bool allConfigsInRuleStopStates(Ref<ATNConfigSet> configs);

-    /// <summary>
-    /// Full LL prediction termination.
-    ///
-    /// <p/>
-    ///
-    /// Can we stop looking ahead during ATN simulation or is there some
-    /// uncertainty as to which alternative we will ultimately pick, after
-    /// consuming more input? Even if there are partial conflicts, we might know
-    /// that everything is going to resolve to the same minimum alternative. That
-    /// means we can stop since no more lookahead will change that fact. On the
-    /// other hand, there might be multiple conflicts that resolve to different
-    /// minimums. That means we need more look ahead to decide which of those
-    /// alternatives we should predict.
-    ///
-    /// <p/>
-    ///
-    /// The basic idea is to split the set of configurations {@code C}, into
-    /// conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
-    /// non-conflicting configurations. Two configurations conflict if they have
-    /// identical <seealso cref="ATNConfig#state"/> and <seealso
-    /// cref="ATNConfig#context"/> values
-    /// but different <seealso cref="ATNConfig#alt"/> value, e.g. {@code (s, i, ctx,
-    /// _)}
-    /// and {@code (s, j, ctx, _)} for {@code i!=j}.
-    ///
-    /// <p/>
-    ///
-    /// Reduce these configuration subsets to the set of possible alternatives.
-    /// You can compute the alternative subsets in one pass as follows:
-    ///
-    /// <p/>
-    ///
-    /// {@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
-    /// {@code C} holding {@code s} and {@code ctx} fixed.
-    ///
-    /// <p/>
-    ///
-    /// Or in pseudo-code, for each configuration {@code c} in {@code C}:
-    ///
-    /// <pre>
-    /// map[c] U= c.<seealso cref="ATNConfig#alt alt"/> # map hash/equals uses s and
-    /// x, not
-    /// alt and not pred
-    /// </pre>
-    ///
-    /// <p/>
-    ///
-    /// The values in {@code map} are the set of {@code A_s,ctx} sets.
-    ///
-    /// <p/>
-    ///
-    /// If {@code |A_s,ctx|=1} then there is no conflict associated with
-    /// {@code s} and {@code ctx}.
-    ///
-    /// <p/>
-    ///
-    /// Reduce the subsets to singletons by choosing a minimum of each subset. If
-    /// the union of these alternative subsets is a singleton, then no amount of
-    /// more lookahead will help us. We will always pick that alternative. If,
-    /// however, there is more than one alternative, then we are uncertain which
-    /// alternative to predict and must continue looking for resolution. We may
-    /// or may not discover an ambiguity in the future, even if there are no
-    /// conflicting subsets this round.
-    ///
-    /// <p/>
-    ///
-    /// The biggest sin is to terminate early because it means we've made a
-    /// decision but were uncertain as to the eventual outcome. We haven't used
-    /// enough lookahead. On the other hand, announcing a conflict too late is no
-    /// big deal; you will still have the conflict. It's just inefficient. It
-    /// might even look until the end of file.
-    ///
-    /// <p/>
-    ///
-    /// No special consideration for semantic predicates is required because
-    /// predicates are evaluated on-the-fly for full LL prediction, ensuring that
-    /// no configuration contains a semantic context during the termination
-    /// check.
-    ///
-    /// <p/>
-    ///
-    /// <strong>CONFLICTING CONFIGS</strong>
-    ///
-    /// <p/>
-    ///
-    /// Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
-    /// when {@code i!=j} but {@code x=x'}. Because we merge all
-    /// {@code (s, i, _)} configurations together, that means that there are at
-    /// most {@code n} configurations associated with state {@code s} for
-    /// {@code n} possible alternatives in the decision. The merged stacks
-    /// complicate the comparison of configuration contexts {@code x} and
-    /// {@code x'}. Sam checks to see if one is a subset of the other by calling
-    /// merge and checking to see if the merged result is either {@code x} or
-    /// {@code x'}. If the {@code x} associated with lowest alternative {@code i}
-    /// is the superset, then {@code i} is the only possible prediction since the
-    /// others resolve to {@code min(i)} as well. However, if {@code x} is
-    /// associated with {@code j>i} then at least one stack configuration for
-    /// {@code j} is not in conflict with alternative {@code i}. The algorithm
-    /// should keep going, looking for more lookahead due to the uncertainty.
-    ///
-    /// <p/>
-    ///
-    /// For simplicity, I'm doing a equality check between {@code x} and
-    /// {@code x'} that lets the algorithm continue to consume lookahead longer
-    /// than necessary. The reason I like the equality is of course the
-    /// simplicity but also because that is the test you need to detect the
-    /// alternatives that are actually in conflict.
-    ///
-    /// <p/>
-    ///
-    /// <strong>CONTINUE/STOP RULE</strong>
-    ///
-    /// <p/>
-    ///
-    /// Continue if union of resolved alternative sets from non-conflicting and
-    /// conflicting alternative subsets has more than one alternative. We are
-    /// uncertain about which alternative to predict.
-    ///
-    /// <p/>
-    ///
-    /// The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
-    /// alternatives are still in the running for the amount of input we've
-    /// consumed at this point. The conflicting sets let us to strip away
-    /// configurations that won't lead to more states because we resolve
-    /// conflicts to the configuration with a minimum alternate for the
-    /// conflicting set.
-    ///
-    /// <p/>
-    ///
-    /// <strong>CASES</strong>
-    ///
-    /// <ul>
-    ///
-    /// <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
-    ///
-    /// <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
-    /// {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
-    /// {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
-    /// {@code {1,3}} =&gt; continue
-    /// </li>
-    ///
-    /// <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
-    /// {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
-    /// {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
-    /// {@code {1}} =&gt; stop and predict 1</li>
-    ///
-    /// <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
-    /// {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
-    /// {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
-    /// ambiguity {@code {1,2}}</li>
-    ///
-    /// <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
-    /// {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
-    /// {@code {2}} = {@code {1,2}} =&gt; continue</li>
-    ///
-    /// <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
-    /// {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
-    /// {@code {3}} = {@code {1,3}} =&gt; continue</li>
-    ///
-    /// </ul>
-    ///
-    /// <strong>EXACT AMBIGUITY DETECTION</strong>
-    ///
-    /// <p/>
-    ///
-    /// If all states report the same conflicting set of alternatives, then we
-    /// know we have the exact ambiguity set.
-    ///
-    /// <p/>
-    ///
-    /// <code>|A_<em>i</em>|&gt;1</code> and
-    /// <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.
-    ///
-    /// <p/>
-    ///
-    /// In other words, we continue examining lookahead until all {@code A_i}
-    /// have more than one alternative and all {@code A_i} are the same. If
-    /// {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
-    /// because the resolved set is {@code {1}}. To determine what the real
-    /// ambiguity is, we have to know whether the ambiguity is between one and
-    /// two or one and three so we keep going. We can only stop prediction when
-    /// we need exact ambiguity detection when the sets look like
-    /// {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...
-    /// </summary>
+    /**
+     * Full LL prediction termination.
+     *
+     * <p>Can we stop looking ahead during ATN simulation or is there some
+     * uncertainty as to which alternative we will ultimately pick, after
+     * consuming more input? Even if there are partial conflicts, we might know
+     * that everything is going to resolve to the same minimum alternative. That
+     * means we can stop since no more lookahead will change that fact. On the
+     * other hand, there might be multiple conflicts that resolve to different
+     * minimums. That means we need more look ahead to decide which of those
+     * alternatives we should predict.</p>
+     *
+     * <p>The basic idea is to split the set of configurations {@code C}, into
+     * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
+     * non-conflicting configurations. Two configurations conflict if they have
+     * identical {@link ATNConfig#state} and {@link ATNConfig#context} values
+     * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
+     * and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
+     *
+     * <p>Reduce these configuration subsets to the set of possible alternatives.
+     * You can compute the alternative subsets in one pass as follows:</p>
+     *
+     * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
+     * {@code C} holding {@code s} and {@code ctx} fixed.</p>
+     *
+     * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
+     *
+     * <pre>
+     * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
+     * alt and not pred
+     * </pre>
+     *
+     * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
+     *
+     * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
+     * {@code s} and {@code ctx}.</p>
+     *
+     * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
+     * the union of these alternative subsets is a singleton, then no amount of
+     * more lookahead will help us. We will always pick that alternative. If,
+     * however, there is more than one alternative, then we are uncertain which
+     * alternative to predict and must continue looking for resolution. We may
+     * or may not discover an ambiguity in the future, even if there are no
+     * conflicting subsets this round.</p>
+     *
+     * <p>The biggest sin is to terminate early because it means we've made a
+     * decision but were uncertain as to the eventual outcome. We haven't used
+     * enough lookahead. On the other hand, announcing a conflict too late is no
+     * big deal; you will still have the conflict. It's just inefficient. It
+     * might even look until the end of file.</p>
+     *
+     * <p>No special consideration for semantic predicates is required because
+     * predicates are evaluated on-the-fly for full LL prediction, ensuring that
+     * no configuration contains a semantic context during the termination
+     * check.</p>
+     *
+     * <p><strong>CONFLICTING CONFIGS</strong></p>
+     *
+     * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
+     * when {@code i!=j} but {@code x=x'}. Because we merge all
+     * {@code (s, i, _)} configurations together, that means that there are at
+     * most {@code n} configurations associated with state {@code s} for
+     * {@code n} possible alternatives in the decision. The merged stacks
+     * complicate the comparison of configuration contexts {@code x} and
+     * {@code x'}. Sam checks to see if one is a subset of the other by calling
+     * merge and checking to see if the merged result is either {@code x} or
+     * {@code x'}. If the {@code x} associated with lowest alternative {@code i}
+     * is the superset, then {@code i} is the only possible prediction since the
+     * others resolve to {@code min(i)} as well. However, if {@code x} is
+     * associated with {@code j>i} then at least one stack configuration for
+     * {@code j} is not in conflict with alternative {@code i}. The algorithm
+     * should keep going, looking for more lookahead due to the uncertainty.</p>
+     *
+     * <p>For simplicity, I'm doing a equality check between {@code x} and
+     * {@code x'} that lets the algorithm continue to consume lookahead longer
+     * than necessary. The reason I like the equality is of course the
+     * simplicity but also because that is the test you need to detect the
+     * alternatives that are actually in conflict.</p>
+     *
+     * <p><strong>CONTINUE/STOP RULE</strong></p>
+     *
+     * <p>Continue if union of resolved alternative sets from non-conflicting and
+     * conflicting alternative subsets has more than one alternative. We are
+     * uncertain about which alternative to predict.</p>
+     *
+     * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
+     * alternatives are still in the running for the amount of input we've
+     * consumed at this point. The conflicting sets let us to strip away
+     * configurations that won't lead to more states because we resolve
+     * conflicts to the configuration with a minimum alternate for the
+     * conflicting set.</p>
+     *
+     * <p><strong>CASES</strong></p>
+     *
+     * <ul>
+     *
+     * <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
+     *
+     * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
+     * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
+     * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
+     * {@code {1,3}} =&gt; continue
+     * </li>
+     *
+     * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
+     * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
+     * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
+     * {@code {1}} =&gt; stop and predict 1</li>
+     *
+     * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
+     * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
+     * {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
+     * ambiguity {@code {1,2}}</li>
+     *
+     * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
+     * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
+     * {@code {2}} = {@code {1,2}} =&gt; continue</li>
+     *
+     * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
+     * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
+     * {@code {3}} = {@code {1,3}} =&gt; continue</li>
+     *
+     * </ul>
+     *
+     * <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
+     *
+     * <p>If all states report the same conflicting set of alternatives, then we
+     * know we have the exact ambiguity set.</p>
+     *
+     * <p><code>|A_<em>i</em>|&gt;1</code> and
+     * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
+     *
+     * <p>In other words, we continue examining lookahead until all {@code A_i}
+     * have more than one alternative and all {@code A_i} are the same. If
+     * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
+     * because the resolved set is {@code {1}}. To determine what the real
+     * ambiguity is, we have to know whether the ambiguity is between one and
+     * two or one and three so we keep going. We can only stop prediction when
+     * we need exact ambiguity detection when the sets look like
+     * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
+     */
    static int resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet> &altsets);

    /// <summary>
@ -475,6 +430,9 @@ namespace atn {
    /// <returns> the set of represented alternatives in {@code altsets} </returns>
    static antlrcpp::BitSet getAlts(const std::vector<antlrcpp::BitSet> &altsets);

+    /** Get union of all alts from configs. @since 4.5.1 */
+    static antlrcpp::BitSet getAlts(Ref<ATNConfigSet> configs);
+    
    /// <summary>
    /// This function gets the conflicting alt subsets from a configuration set.
    /// For each configuration {@code c} in {@code configs}:
--- a/runtime/Cpp/runtime/atn/Transition.h
+++ b/runtime/Cpp/runtime/atn/Transition.h
@ -81,8 +81,15 @@ namespace atn {
  public:
    virtual int getSerializationType() const = 0;

-    /// <summary>
-    /// Are we epsilon, action, sempred? </summary>
+    /**
+     * Determines if the transition is an "epsilon" transition.
+     *
+     * <p>The default implementation returns {@code false}.</p>
+     *
+     * @return {@code true} if traversing this transition in the ATN does not
+     * consume an input symbol; otherwise, {@code false} if traversing this
+     * transition consumes (matches) an input symbol.
+     */
    virtual bool isEpsilon() const;
    virtual misc::IntervalSet label() const;
    virtual bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) const = 0;
--- a/runtime/Cpp/runtime/misc/IntervalSet.cpp
+++ b/runtime/Cpp/runtime/misc/IntervalSet.cpp
@ -32,14 +32,24 @@
 #include "MurmurHash.h"
 #include "Lexer.h"
 #include "Exceptions.h"
+#include "VocabularyImpl.h"

 #include "IntervalSet.h"

 using namespace org::antlr::v4::runtime;
 using namespace org::antlr::v4::runtime::misc;

-IntervalSet const IntervalSet::COMPLETE_CHAR_SET = IntervalSet::of(0, Lexer::MAX_CHAR_VALUE);
-IntervalSet const IntervalSet::EMPTY_SET;
+IntervalSet const IntervalSet::COMPLETE_CHAR_SET = []() {
+  IntervalSet complete = IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
+  complete.setReadOnly(true);
+  return complete;
+}();
+
+IntervalSet const IntervalSet::EMPTY_SET = []() {
+  IntervalSet empty;
+  empty.setReadOnly(true);
+  return empty;
+}();

 IntervalSet::IntervalSet() {
  InitializeInstanceFields();
@ -50,6 +60,7 @@ IntervalSet::IntervalSet(const std::vector<Interval> &intervals) : IntervalSet()
 }

 IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
+  _intervals.clear();
  addAll(set);
 }

@ -162,53 +173,80 @@ IntervalSet IntervalSet::complement(int minElement, int maxElement) const {
 }

 IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const {
-  if (vocabulary == IntervalSet::EMPTY_SET) {
-    return IntervalSet::EMPTY_SET; // nothing in common with null set
-  }
-
-  int maxElement = vocabulary.getMaxElement();
-
-  IntervalSet compliment;
-  if (_intervals.empty()) {
-    return compliment;
-  }
-  Interval first = _intervals[0];
-
-  // Add a range from 0 to first.a constrained to vocab.
-  if (first.a > 0) {
-    IntervalSet s = IntervalSet::of(0, first.a - 1);
-    IntervalSet a = s.And(vocabulary);
-    compliment.addAll(a);
-  }
-
-  for (size_t i = 1; i < _intervals.size(); i++) { // from 2nd interval .. nth
-    const Interval &previous = _intervals[i - 1];
-    const Interval &current = _intervals[i];
-    IntervalSet s = IntervalSet::of(previous.b + 1, current.a - 1);
-    IntervalSet a = s.And(vocabulary);
-    compliment.addAll(a);
-  }
-
-  const Interval &last = _intervals.back();
-
-  // Add a range from last.b to maxElement constrained to vocab
-  if (last.b < maxElement) {
-    IntervalSet s = IntervalSet::of(last.b + 1, maxElement);
-    IntervalSet a = s.And(vocabulary);
-    compliment.addAll(a);
-  }
-
-  return compliment;
+  return vocabulary.subtract(*this);
 }

 IntervalSet IntervalSet::subtract(const IntervalSet &other) const {
-  // assume the whole unicode range here for the complement
-  // because it doesn't matter.  Anything beyond the max of this' set
-  // will be ignored since we are doing this & ~other.  The intersection
-  // will be empty.  The only problem would be when this' set max value
-  // goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
-  // will prevent this.
-  return And(other.complement(COMPLETE_CHAR_SET));
+  return subtract(*this, other);
+}
+
+IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) {
+  if (left.isEmpty()) {
+    return IntervalSet();
+  }
+
+  if (right.isEmpty()) {
+    // right set has no elements; just return the copy of the current set
+    return left;
+  }
+
+  IntervalSet result(left);
+  size_t resultI = 0;
+  size_t rightI = 0;
+  while (resultI < result._intervals.size() && rightI < right._intervals.size()) {
+    Interval &resultInterval = result._intervals[resultI];
+    const Interval &rightInterval = right._intervals[rightI];
+
+    // operation: (resultInterval - rightInterval) and update indexes
+
+    if (rightInterval.b < resultInterval.a) {
+      rightI++;
+      continue;
+    }
+
+    if (rightInterval.a > resultInterval.b) {
+      resultI++;
+      continue;
+    }
+
+    Interval beforeCurrent;
+    Interval afterCurrent;
+    if (rightInterval.a > resultInterval.a) {
+      beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1);
+    }
+
+    if (rightInterval.b < resultInterval.b) {
+      afterCurrent = Interval(rightInterval.b + 1, resultInterval.b);
+    }
+
+    if (beforeCurrent.a > -1) { // -1 is the default value
+      if (afterCurrent.a > -1) {
+        // split the current interval into two
+        result._intervals[resultI] = beforeCurrent;
+        result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent);
+        resultI++;
+        rightI++;
+      } else {
+        // replace the current interval
+        result._intervals[resultI] = beforeCurrent;
+        resultI++;
+      }
+    } else {
+      if (afterCurrent.a > -1) {
+        // replace the current interval
+        result._intervals[resultI] = afterCurrent;
+        rightI++;
+      } else {
+        // remove the current interval (thus no need to increment resultI)
+        result._intervals.erase(result._intervals.begin() + resultI);
+      }
+    }
+  }
+
+  // If rightI reached right.intervals.size(), no more intervals to subtract from result.
+  // If resultI reached result.intervals.size(), we would be subtracting from an empty set.
+  // Either way, we are done.
+  return result;
 }

 IntervalSet IntervalSet::Or(const IntervalSet &a) const {
@ -305,17 +343,7 @@ int IntervalSet::getMinElement() const {
    return Token::INVALID_TYPE;
  }

-  for (auto &interval : _intervals) {
-    int a = interval.a;
-    int b = interval.b;
-    for (int v = a; v <= b; v++) {
-      if (v >= 0) {
-        return v;
-      }
-    }
-  }
-
-  return Token::INVALID_TYPE;
+  return _intervals[0].a;
 }

 std::vector<Interval> IntervalSet::getIntervals() const {
@ -366,7 +394,7 @@ std::wstring IntervalSet::toString(bool elemAreChar) const {
    int a = interval.a;
    int b = interval.b;
    if (a == b) {
-      if (a == -1) {
+      if (a == EOF) {
        ss << L"<EOF>";
      } else if (elemAreChar) {
        ss << L"'" << static_cast<wchar_t>(a) << L"'";
@ -389,6 +417,10 @@ std::wstring IntervalSet::toString(bool elemAreChar) const {
 }

 std::wstring IntervalSet::toString(const std::vector<std::wstring> &tokenNames) const {
+  return toString(dfa::VocabularyImpl::fromTokenNames(tokenNames));
+}
+
+std::wstring IntervalSet::toString(Ref<dfa::Vocabulary> vocabulary) const {
  if (_intervals.empty()) {
    return L"{}";
  }
@ -408,13 +440,13 @@ std::wstring IntervalSet::toString(const std::vector<std::wstring> &tokenNames)
    ssize_t a = (ssize_t)interval.a;
    ssize_t b = (ssize_t)interval.b;
    if (a == b) {
-      ss << elementName(tokenNames, a);
+      ss << elementName(vocabulary, a);
    } else {
      for (ssize_t i = a; i <= b; i++) {
        if (i > a) {
          ss << L", ";
        }
-        ss << elementName(tokenNames, i);
+        ss << elementName(vocabulary, i);
      }
    }
  }
@ -426,12 +458,16 @@ std::wstring IntervalSet::toString(const std::vector<std::wstring> &tokenNames)
 }

 std::wstring IntervalSet::elementName(const std::vector<std::wstring> &tokenNames, ssize_t a) const {
+  return elementName(dfa::VocabularyImpl::fromTokenNames(tokenNames), a);
+}
+
+std::wstring IntervalSet::elementName(Ref<dfa::Vocabulary> vocabulary, ssize_t a) const {
  if (a == EOF) {
    return L"<EOF>";
  } else if (a == Token::EPSILON) {
    return L"<EPSILON>";
  } else {
-    return tokenNames[(size_t)a];
+    return vocabulary->getDisplayName(a);
  }
 }

@ -526,6 +562,8 @@ bool IntervalSet::isReadOnly() const {
 }

 void IntervalSet::setReadOnly(bool readonly) {
+  if (_readonly && !readonly)
+    throw IllegalStateException("Can't alter readonly IntervalSet");
  _readonly = readonly;
 }

--- a/runtime/Cpp/runtime/misc/IntervalSet.h
+++ b/runtime/Cpp/runtime/misc/IntervalSet.h
@ -39,21 +39,18 @@ namespace v4 {
 namespace runtime {
 namespace misc {

-  /// <summary>
-  /// A set of integers that relies on ranges being common to do
-  ///  "run-length-encoded" like compression (if you view an IntSet like
-  ///  a BitSet with runs of 0s and 1s).  Only ranges are recorded so that
-  ///  a few ints up near value 1000 don't cause massive bitsets, just two
-  ///  integer intervals.
-  ///
-  ///  element values may be negative.  Useful for sets of EPSILON and EOF.
-  ///
-  ///  0..9 char range is index pair ['\u0030','\u0039'].
-  ///  Multiple ranges are encoded with multiple index pairs.  Isolated
-  ///  elements are encoded with an index pair where both intervals are the same.
-  ///
-  ///  The ranges are ordered and disjoint so that 2..6 appears before 101..103.
-  /// </summary>
+  /**
+   * This class implements the {@link IntSet} backed by a sorted array of
+   * non-overlapping intervals. It is particularly efficient for representing
+   * large collections of numbers, where the majority of elements appear as part
+   * of a sequential range of numbers that are all part of the set. For example,
+   * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }.
+   *
+   * <p>
+   * This class is able to represent sets containing any combination of values in
+   * the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE}
+   * (inclusive).</p>
+   */
  class IntervalSet {
  public:
    static IntervalSet const COMPLETE_CHAR_SET;
@ -121,6 +118,13 @@ namespace misc {
    /// anything that is in other but not in this will be ignored.
    virtual IntervalSet subtract(const IntervalSet &other) const;

+    /**
+     * Compute the set difference between two interval sets. The specific
+     * operation is {@code left - right}. If either of the input sets is
+     * {@code null}, it is treated as though it was an empty set.
+     */
+    static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right);
+
    virtual IntervalSet Or(const IntervalSet &a) const;

    /// <summary>
@ -142,10 +146,20 @@ namespace misc {
    /// If this set is a single integer, return it otherwise Token.INVALID_TYPE </summary>
    virtual int getSingleElement() const;

+    /**
+     * Returns the maximum value contained in the set.
+     *
+     * @return the maximum value contained in the set. If the set is empty, this
+     * method returns {@link Token#INVALID_TYPE}.
+     */
    virtual int getMaxElement() const;

-    /// <summary>
-    /// Return minimum element >= 0 </summary>
+    /**
+     * Returns the minimum value contained in the set.
+     *
+     * @return the minimum value contained in the set. If the set is empty, this
+     * method returns {@link Token#INVALID_TYPE}.
+     */
    virtual int getMinElement() const;

    /// <summary>
@ -160,10 +174,19 @@ namespace misc {
    bool operator == (const IntervalSet &other) const;
    virtual std::wstring toString() const;
    virtual std::wstring toString(bool elemAreChar) const;
+
+    /**
+     * @deprecated Use {@link #toString(Vocabulary)} instead.
+     */
    virtual std::wstring toString(const std::vector<std::wstring> &tokenNames) const;
+    virtual std::wstring toString(Ref<dfa::Vocabulary> vocabulary) const;

  protected:
+    /**
+     * @deprecated Use {@link #elementName(Vocabulary, int)} instead.
+     */
    virtual std::wstring elementName(const std::vector<std::wstring> &tokenNames, ssize_t a) const;
+    virtual std::wstring elementName(Ref<dfa::Vocabulary> vocabulary, ssize_t a) const;

  public:
    virtual size_t size() const;