diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java b/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java
index 7bc07c5a2..86a9a09c0 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java
@@ -32,49 +32,166 @@ package org.antlr.v4.runtime.misc;
import java.util.List;
-/** A generic set of ints.
+/**
+ * A generic set of integers.
*
- * @see IntervalSet
+ * @see IntervalSet
*/
public interface IntSet {
- /** Add an element to the set */
- void add(int el);
+ /**
+ * Adds the specified value to the current set.
+ *
+ * @param el the value to add
+ *
+ * @exception IllegalStateException if the current set is read-only
+ */
+ void add(int el);
- /** Add all elements from incoming set to this set. Can limit
- * to set of its own type. Return "this" so we can chain calls.
- */
- IntSet addAll(IntSet set);
+ /**
+ * Modify the current {@link IntSet} object to contain all elements that are
+ * present in itself, the specified {@code set}, or both.
+ *
+ * @param set The set to add to the current set. A {@code null} argument is
+ * treated as though it were an empty set.
+ * @return {@code this} (to support chained calls)
+ *
+ * @exception IllegalStateException if the current set is read-only
+ */
+ @NotNull
+ IntSet addAll(@Nullable IntSet set);
- /** Return the intersection of this set with the argument, creating
- * a new set.
- */
- IntSet and(IntSet a);
+ /**
+ * Return a new {@link IntSet} object containing all elements that are
+ * present in both the current set and the specified set {@code a}.
+ *
+ * @param a The set to intersect with the current set. A {@code null}
+ * argument is treated as though it were an empty set.
+ * @return A new {@link IntSet} instance containing the intersection of the
+ * current set and {@code a}. The value {@code null} may be returned in
+ * place of an empty result set.
+ */
+ @Nullable
+ IntSet and(@Nullable IntSet a);
- IntSet complement(IntSet elements);
+ /**
+ * Return a new {@link IntSet} object containing all elements that are
+ * present in {@code elements} but not present in the current set. The
+ * following expressions are equivalent for input non-null {@link IntSet}
+ * instances {@code x} and {@code y}.
+ *
+ *
+ * - {@code x.complement(y)}
+ * - {@code y.subtract(x)}
+ *
+ *
+ * @param elements The set to compare with the current set. A {@code null}
+ * argument is treated as though it were an empty set.
+ * @return A new {@link IntSet} instance containing the elements present in
+ * {@code elements} but not present in the current set. The value
+ * {@code null} may be returned in place of an empty result set.
+ */
+ @Nullable
+ IntSet complement(@Nullable IntSet elements);
- IntSet or(IntSet a);
+ /**
+ * Return a new {@link IntSet} object containing all elements that are
+ * present in the current set, the specified set {@code a}, or both.
+ *
+ *
+ * This method is similar to {@link #addAll(IntSet)}, but returns a new
+ * {@link IntSet} instance instead of modifying the current set.
+ *
+ * @param a The set to union with the current set. A {@code null} argument
+ * is treated as though it were an empty set.
+ * @return A new {@link IntSet} instance containing the union of the current
+ * set and {@code a}. The value {@code null} may be returned in place of an
+ * empty result set.
+ */
+ @Nullable
+ IntSet or(@Nullable IntSet a);
- IntSet subtract(IntSet a);
+ /**
+ * Return a new {@link IntSet} object containing all elements that are
+ * present in the current set but not present in the input set {@code a}.
+ * The following expressions are equivalent for input non-null
+ * {@link IntSet} instances {@code x} and {@code y}.
+ *
+ *
+ * - {@code y.subtract(x)}
+ * - {@code x.complement(y)}
+ *
+ *
+ * @param elements The set to compare with the current set. A {@code null}
+ * argument is treated as though it were an empty set.
+ * @return A new {@link IntSet} instance containing the elements present in
+ * {@code elements} but not present in the current set. The value
+ * {@code null} may be returned in place of an empty result set.
+ */
+ @Nullable
+ IntSet subtract(@Nullable IntSet a);
- /** Return the size of this set (not the underlying implementation's
- * allocated memory size, for example).
- */
- int size();
+ /**
+ * Return the total number of elements represented by the current set.
+ *
+ * @return the total number of elements represented by the current set,
+ * regardless of the manner in which the elements are stored.
+ */
+ int size();
- boolean isNil();
+ /**
+ * Returns {@code true} if this set contains no elements.
+ *
+ * @return {@code true} if the current set contains no elements; otherwise,
+ * {@code false}.
+ */
+ boolean isNil();
- @Override
- boolean equals(Object obj);
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ boolean equals(Object obj);
- int getSingleElement();
+ /**
+ * Returns the single value contained in the set, if {@link #size} is 1;
+ * otherwise, returns {@link Token#INVALID_TYPE}.
+ *
+ * @return the single value contained in the set, if {@link #size} is 1;
+ * otherwise, returns {@link Token#INVALID_TYPE}.
+ */
+ int getSingleElement();
- boolean contains(int el);
+ /**
+ * Returns {@code true} if the set contains the specified element.
+ *
+ * @param el The element to check for.
+ * @return {@code true} if the set contains {@code el}; otherwise {@code false}.
+ */
+ boolean contains(int el);
- /** remove this element from this set */
- void remove(int el);
+ /**
+ * Removes the specified value from the current set. If the current set does
+ * not contain the element, no changes are made.
+ *
+ * @param el the value to remove
+ *
+ * @exception IllegalStateException if the current set is read-only
+ */
+ void remove(int el);
- List toList();
+ /**
+ * Return a list containing the elements represented by the current set. The
+ * list is returned in ascending numerical order.
+ *
+ * @return A list containing all element present in the current set, sorted
+ * in ascending numerical order.
+ */
+ @NotNull
+ List toList();
- @Override
- String toString();
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ String toString();
}
diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java
index e23c34776..17842e23e 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java
@@ -39,28 +39,28 @@ import java.util.List;
import java.util.ListIterator;
import java.util.Set;
-/** A set of integers that relies on ranges being common to do
- * "run-length-encoded" like compression (if you view an IntSet like
- * a BitSet with runs of 0s and 1s). Only ranges are recorded so that
- * a few ints up near value 1000 don't cause massive bitsets, just two
- * integer intervals.
+/**
+ * This class implements the {@link IntSet} backed by a sorted array of
+ * non-overlapping intervals. It is particularly efficient for representing
+ * large collections of numbers, where the majority of elements appear as part
+ * of a sequential range of numbers that are all part of the set. For example,
+ * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }.
*
- * element values may be negative. Useful for sets of EPSILON and EOF.
- *
- * 0..9 char range is index pair ['\u0030','\u0039'].
- * Multiple ranges are encoded with multiple index pairs. Isolated
- * elements are encoded with an index pair where both intervals are the same.
- *
- * The ranges are ordered and disjoint so that 2..6 appears before 101..103.
+ *
+ * This class is able to represent sets containing any combination of values in
+ * the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE}
+ * (inclusive).
*/
public class IntervalSet implements IntSet {
- public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(0, Lexer.MAX_CHAR_VALUE);
- public static final IntervalSet KNOWN_NEGATIVE_ELEMENTS =
- new IntervalSet().or(IntervalSet.of(Token.EOF)).or(IntervalSet.of(Token.EPSILON));
- public static final IntervalSet COMPLETE_SET =
- IntervalSet.of(0, Lexer.MAX_CHAR_VALUE).or(KNOWN_NEGATIVE_ELEMENTS);
+ public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE);
+ static {
+ COMPLETE_CHAR_SET.setReadonly(true);
+ }
public static final IntervalSet EMPTY_SET = new IntervalSet();
+ static {
+ EMPTY_SET.setReadonly(true);
+ }
/** The list of sorted, disjoint intervals. */
protected List intervals;
@@ -185,18 +185,22 @@ public class IntervalSet implements IntSet {
if ( set==null ) {
return this;
}
- if ( !(set instanceof IntervalSet) ) {
- throw new IllegalArgumentException("can't add non IntSet ("+
- set.getClass().getName()+
- ") to IntervalSet");
- }
- IntervalSet other = (IntervalSet)set;
- // walk set and add each interval
- int n = other.intervals.size();
- for (int i = 0; i < n; i++) {
- Interval I = other.intervals.get(i);
- this.add(I.a,I.b);
+
+ if (set instanceof IntervalSet) {
+ IntervalSet other = (IntervalSet)set;
+ // walk set and add each interval
+ int n = other.intervals.size();
+ for (int i = 0; i < n; i++) {
+ Interval I = other.intervals.get(i);
+ this.add(I.a,I.b);
+ }
}
+ else {
+ for (int value : set.toList()) {
+ add(value);
+ }
+ }
+
return this;
}
@@ -204,71 +208,120 @@ public class IntervalSet implements IntSet {
return this.complement(IntervalSet.of(minElement,maxElement));
}
- /** Given the set of possible values (rather than, say UNICODE or MAXINT),
- * return a new set containing all elements in vocabulary, but not in
- * this. The computation is (vocabulary - this).
- *
- * 'this' is assumed to be either a subset or equal to vocabulary.
- *
- * complement() does not add EOF or EPSILON or any other negative number
- * to the complement.
- */
+ /** {@inheritDoc */
@Override
public IntervalSet complement(IntSet vocabulary) {
- if ( vocabulary==null ) {
- return null; // nothing in common with null set
- }
- if ( !(vocabulary instanceof IntervalSet ) ) {
- throw new IllegalArgumentException("can't complement with non IntervalSet ("+
- vocabulary.getClass().getName()+")");
+ if ( vocabulary==null || vocabulary.isNil() ) {
+ return null; // nothing in common with null set
}
- IntervalSet vocabularyIS = ((IntervalSet)vocabulary);
- int maxElement = vocabularyIS.getMaxElement();
- IntervalSet compl = new IntervalSet();
- int n = intervals.size();
- if ( n ==0 ) {
- return compl;
+ IntervalSet vocabularyIS;
+ if (vocabulary instanceof IntervalSet) {
+ vocabularyIS = (IntervalSet)vocabulary;
}
- Interval first = intervals.get(0);
- // add a range from 0 to first.a constrained to vocab
- if ( first.a > 0 ) {
- int minKnownElement = KNOWN_NEGATIVE_ELEMENTS.getMinElement();
- IntervalSet s = IntervalSet.of(minKnownElement, first.a-1);
- IntervalSet a = s.and(vocabularyIS);
- compl.addAll(a);
+ else {
+ vocabularyIS = new IntervalSet();
+ vocabularyIS.addAll(vocabulary);
}
- for (int i=1; i resultInterval.b) {
+ resultI++;
+ continue;
+ }
+
+ Interval beforeCurrent = null;
+ Interval afterCurrent = null;
+ if (rightInterval.a > resultInterval.a) {
+ beforeCurrent = new Interval(resultInterval.a, rightInterval.a - 1);
+ }
+
+ if (rightInterval.b < resultInterval.b) {
+ afterCurrent = new Interval(rightInterval.b + 1, resultInterval.b);
+ }
+
+ if (beforeCurrent != null) {
+ if (afterCurrent != null) {
+ // split the current interval into two
+ result.intervals.set(resultI, beforeCurrent);
+ result.intervals.add(resultI + 1, afterCurrent);
+ resultI++;
+ rightI++;
+ continue;
+ }
+ else {
+ // replace the current interval
+ result.intervals.set(resultI, beforeCurrent);
+ resultI++;
+ continue;
+ }
+ }
+ else {
+ if (afterCurrent != null) {
+ // replace the current interval
+ result.intervals.set(resultI, afterCurrent);
+ rightI++;
+ continue;
+ }
+ else {
+ // remove the current interval (thus no need to increment resultI)
+ result.intervals.remove(resultI);
+ continue;
+ }
+ }
+ }
+
+ // If rightI reached right.intervals.size(), no more intervals to subtract from result.
+ // If resultI reached result.intervals.size(), we would be subtracting from an empty set.
+ // Either way, we are done.
+ return result;
}
@Override
@@ -279,11 +332,7 @@ public class IntervalSet implements IntSet {
return o;
}
- /** Return a new set with the intersection of this set with other. Because
- * the intervals are sorted, we can use an iterator for each list and
- * just walk them together. This is roughly O(min(n,m)) for interval
- * list lengths n and m.
- */
+ /** {@inheritDoc} */
@Override
public IntervalSet and(IntSet other) {
if ( other==null ) { //|| !(other instanceof IntervalSet) ) {
@@ -353,7 +402,7 @@ public class IntervalSet implements IntSet {
return intersection;
}
- /** Is el in any range of this set? */
+ /** {@inheritDoc} */
@Override
public boolean contains(int el) {
int n = intervals.size();
@@ -383,13 +432,13 @@ public class IntervalSet implements IntSet {
*/
}
- /** return true if this set has no members */
+ /** {@inheritDoc} */
@Override
public boolean isNil() {
return intervals==null || intervals.isEmpty();
}
- /** If this set is a single integer, return it otherwise Token.INVALID_TYPE */
+ /** {@inheritDoc} */
@Override
public int getSingleElement() {
if ( intervals!=null && intervals.size()==1 ) {
@@ -401,6 +450,12 @@ public class IntervalSet implements IntSet {
return Token.INVALID_TYPE;
}
+ /**
+ * Returns the maximum value contained in the set.
+ *
+ * @return the maximum value contained in the set. If the set is empty, this
+ * method returns {@link Token#INVALID_TYPE}.
+ */
public int getMaxElement() {
if ( isNil() ) {
return Token.INVALID_TYPE;
@@ -409,7 +464,12 @@ public class IntervalSet implements IntSet {
return last.b;
}
- /** Return minimum element */
+ /**
+ * Returns the minimum value contained in the set.
+ *
+ * @return the minimum value contained in the set. If the set is empty, this
+ * method returns {@link Token#INVALID_TYPE}.
+ */
public int getMinElement() {
if ( isNil() ) {
return Token.INVALID_TYPE;
@@ -644,6 +704,7 @@ public class IntervalSet implements IntSet {
}
public void setReadonly(boolean readonly) {
+ if ( this.readonly && !readonly ) throw new IllegalStateException("can't alter readonly IntervalSet");
this.readonly = readonly;
}
}
diff --git a/tool/test/org/antlr/v4/test/TestIntervalSet.java b/tool/test/org/antlr/v4/test/TestIntervalSet.java
index e5938a6f4..256d5633c 100644
--- a/tool/test/org/antlr/v4/test/TestIntervalSet.java
+++ b/tool/test/org/antlr/v4/test/TestIntervalSet.java
@@ -35,7 +35,9 @@ import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.junit.Test;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
public class TestIntervalSet extends BaseTest {