diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java b/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java index 7bc07c5a2..86a9a09c0 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/IntSet.java @@ -32,49 +32,166 @@ package org.antlr.v4.runtime.misc; import java.util.List; -/** A generic set of ints. +/** + * A generic set of integers. * - * @see IntervalSet + * @see IntervalSet */ public interface IntSet { - /** Add an element to the set */ - void add(int el); + /** + * Adds the specified value to the current set. + * + * @param el the value to add + * + * @exception IllegalStateException if the current set is read-only + */ + void add(int el); - /** Add all elements from incoming set to this set. Can limit - * to set of its own type. Return "this" so we can chain calls. - */ - IntSet addAll(IntSet set); + /** + * Modify the current {@link IntSet} object to contain all elements that are + * present in itself, the specified {@code set}, or both. + * + * @param set The set to add to the current set. A {@code null} argument is + * treated as though it were an empty set. + * @return {@code this} (to support chained calls) + * + * @exception IllegalStateException if the current set is read-only + */ + @NotNull + IntSet addAll(@Nullable IntSet set); - /** Return the intersection of this set with the argument, creating - * a new set. - */ - IntSet and(IntSet a); + /** + * Return a new {@link IntSet} object containing all elements that are + * present in both the current set and the specified set {@code a}. + * + * @param a The set to intersect with the current set. A {@code null} + * argument is treated as though it were an empty set. + * @return A new {@link IntSet} instance containing the intersection of the + * current set and {@code a}. The value {@code null} may be returned in + * place of an empty result set. + */ + @Nullable + IntSet and(@Nullable IntSet a); - IntSet complement(IntSet elements); + /** + * Return a new {@link IntSet} object containing all elements that are + * present in {@code elements} but not present in the current set. The + * following expressions are equivalent for input non-null {@link IntSet} + * instances {@code x} and {@code y}. + * + * + * + * @param elements The set to compare with the current set. A {@code null} + * argument is treated as though it were an empty set. + * @return A new {@link IntSet} instance containing the elements present in + * {@code elements} but not present in the current set. The value + * {@code null} may be returned in place of an empty result set. + */ + @Nullable + IntSet complement(@Nullable IntSet elements); - IntSet or(IntSet a); + /** + * Return a new {@link IntSet} object containing all elements that are + * present in the current set, the specified set {@code a}, or both. + * + *

+ * This method is similar to {@link #addAll(IntSet)}, but returns a new + * {@link IntSet} instance instead of modifying the current set.

+ * + * @param a The set to union with the current set. A {@code null} argument + * is treated as though it were an empty set. + * @return A new {@link IntSet} instance containing the union of the current + * set and {@code a}. The value {@code null} may be returned in place of an + * empty result set. + */ + @Nullable + IntSet or(@Nullable IntSet a); - IntSet subtract(IntSet a); + /** + * Return a new {@link IntSet} object containing all elements that are + * present in the current set but not present in the input set {@code a}. + * The following expressions are equivalent for input non-null + * {@link IntSet} instances {@code x} and {@code y}. + * + * + * + * @param elements The set to compare with the current set. A {@code null} + * argument is treated as though it were an empty set. + * @return A new {@link IntSet} instance containing the elements present in + * {@code elements} but not present in the current set. The value + * {@code null} may be returned in place of an empty result set. + */ + @Nullable + IntSet subtract(@Nullable IntSet a); - /** Return the size of this set (not the underlying implementation's - * allocated memory size, for example). - */ - int size(); + /** + * Return the total number of elements represented by the current set. + * + * @return the total number of elements represented by the current set, + * regardless of the manner in which the elements are stored. + */ + int size(); - boolean isNil(); + /** + * Returns {@code true} if this set contains no elements. + * + * @return {@code true} if the current set contains no elements; otherwise, + * {@code false}. + */ + boolean isNil(); - @Override - boolean equals(Object obj); + /** + * {@inheritDoc} + */ + @Override + boolean equals(Object obj); - int getSingleElement(); + /** + * Returns the single value contained in the set, if {@link #size} is 1; + * otherwise, returns {@link Token#INVALID_TYPE}. + * + * @return the single value contained in the set, if {@link #size} is 1; + * otherwise, returns {@link Token#INVALID_TYPE}. + */ + int getSingleElement(); - boolean contains(int el); + /** + * Returns {@code true} if the set contains the specified element. + * + * @param el The element to check for. + * @return {@code true} if the set contains {@code el}; otherwise {@code false}. + */ + boolean contains(int el); - /** remove this element from this set */ - void remove(int el); + /** + * Removes the specified value from the current set. If the current set does + * not contain the element, no changes are made. + * + * @param el the value to remove + * + * @exception IllegalStateException if the current set is read-only + */ + void remove(int el); - List toList(); + /** + * Return a list containing the elements represented by the current set. The + * list is returned in ascending numerical order. + * + * @return A list containing all element present in the current set, sorted + * in ascending numerical order. + */ + @NotNull + List toList(); - @Override - String toString(); + /** + * {@inheritDoc} + */ + @Override + String toString(); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java index e23c34776..17842e23e 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java @@ -39,28 +39,28 @@ import java.util.List; import java.util.ListIterator; import java.util.Set; -/** A set of integers that relies on ranges being common to do - * "run-length-encoded" like compression (if you view an IntSet like - * a BitSet with runs of 0s and 1s). Only ranges are recorded so that - * a few ints up near value 1000 don't cause massive bitsets, just two - * integer intervals. +/** + * This class implements the {@link IntSet} backed by a sorted array of + * non-overlapping intervals. It is particularly efficient for representing + * large collections of numbers, where the majority of elements appear as part + * of a sequential range of numbers that are all part of the set. For example, + * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. * - * element values may be negative. Useful for sets of EPSILON and EOF. - * - * 0..9 char range is index pair ['\u0030','\u0039']. - * Multiple ranges are encoded with multiple index pairs. Isolated - * elements are encoded with an index pair where both intervals are the same. - * - * The ranges are ordered and disjoint so that 2..6 appears before 101..103. + *

+ * This class is able to represent sets containing any combination of values in + * the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE} + * (inclusive).

*/ public class IntervalSet implements IntSet { - public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(0, Lexer.MAX_CHAR_VALUE); - public static final IntervalSet KNOWN_NEGATIVE_ELEMENTS = - new IntervalSet().or(IntervalSet.of(Token.EOF)).or(IntervalSet.of(Token.EPSILON)); - public static final IntervalSet COMPLETE_SET = - IntervalSet.of(0, Lexer.MAX_CHAR_VALUE).or(KNOWN_NEGATIVE_ELEMENTS); + public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE); + static { + COMPLETE_CHAR_SET.setReadonly(true); + } public static final IntervalSet EMPTY_SET = new IntervalSet(); + static { + EMPTY_SET.setReadonly(true); + } /** The list of sorted, disjoint intervals. */ protected List intervals; @@ -185,18 +185,22 @@ public class IntervalSet implements IntSet { if ( set==null ) { return this; } - if ( !(set instanceof IntervalSet) ) { - throw new IllegalArgumentException("can't add non IntSet ("+ - set.getClass().getName()+ - ") to IntervalSet"); - } - IntervalSet other = (IntervalSet)set; - // walk set and add each interval - int n = other.intervals.size(); - for (int i = 0; i < n; i++) { - Interval I = other.intervals.get(i); - this.add(I.a,I.b); + + if (set instanceof IntervalSet) { + IntervalSet other = (IntervalSet)set; + // walk set and add each interval + int n = other.intervals.size(); + for (int i = 0; i < n; i++) { + Interval I = other.intervals.get(i); + this.add(I.a,I.b); + } } + else { + for (int value : set.toList()) { + add(value); + } + } + return this; } @@ -204,71 +208,120 @@ public class IntervalSet implements IntSet { return this.complement(IntervalSet.of(minElement,maxElement)); } - /** Given the set of possible values (rather than, say UNICODE or MAXINT), - * return a new set containing all elements in vocabulary, but not in - * this. The computation is (vocabulary - this). - * - * 'this' is assumed to be either a subset or equal to vocabulary. - * - * complement() does not add EOF or EPSILON or any other negative number - * to the complement. - */ + /** {@inheritDoc */ @Override public IntervalSet complement(IntSet vocabulary) { - if ( vocabulary==null ) { - return null; // nothing in common with null set - } - if ( !(vocabulary instanceof IntervalSet ) ) { - throw new IllegalArgumentException("can't complement with non IntervalSet ("+ - vocabulary.getClass().getName()+")"); + if ( vocabulary==null || vocabulary.isNil() ) { + return null; // nothing in common with null set } - IntervalSet vocabularyIS = ((IntervalSet)vocabulary); - int maxElement = vocabularyIS.getMaxElement(); - IntervalSet compl = new IntervalSet(); - int n = intervals.size(); - if ( n ==0 ) { - return compl; + IntervalSet vocabularyIS; + if (vocabulary instanceof IntervalSet) { + vocabularyIS = (IntervalSet)vocabulary; } - Interval first = intervals.get(0); - // add a range from 0 to first.a constrained to vocab - if ( first.a > 0 ) { - int minKnownElement = KNOWN_NEGATIVE_ELEMENTS.getMinElement(); - IntervalSet s = IntervalSet.of(minKnownElement, first.a-1); - IntervalSet a = s.and(vocabularyIS); - compl.addAll(a); + else { + vocabularyIS = new IntervalSet(); + vocabularyIS.addAll(vocabulary); } - for (int i=1; i resultInterval.b) { + resultI++; + continue; + } + + Interval beforeCurrent = null; + Interval afterCurrent = null; + if (rightInterval.a > resultInterval.a) { + beforeCurrent = new Interval(resultInterval.a, rightInterval.a - 1); + } + + if (rightInterval.b < resultInterval.b) { + afterCurrent = new Interval(rightInterval.b + 1, resultInterval.b); + } + + if (beforeCurrent != null) { + if (afterCurrent != null) { + // split the current interval into two + result.intervals.set(resultI, beforeCurrent); + result.intervals.add(resultI + 1, afterCurrent); + resultI++; + rightI++; + continue; + } + else { + // replace the current interval + result.intervals.set(resultI, beforeCurrent); + resultI++; + continue; + } + } + else { + if (afterCurrent != null) { + // replace the current interval + result.intervals.set(resultI, afterCurrent); + rightI++; + continue; + } + else { + // remove the current interval (thus no need to increment resultI) + result.intervals.remove(resultI); + continue; + } + } + } + + // If rightI reached right.intervals.size(), no more intervals to subtract from result. + // If resultI reached result.intervals.size(), we would be subtracting from an empty set. + // Either way, we are done. + return result; } @Override @@ -279,11 +332,7 @@ public class IntervalSet implements IntSet { return o; } - /** Return a new set with the intersection of this set with other. Because - * the intervals are sorted, we can use an iterator for each list and - * just walk them together. This is roughly O(min(n,m)) for interval - * list lengths n and m. - */ + /** {@inheritDoc} */ @Override public IntervalSet and(IntSet other) { if ( other==null ) { //|| !(other instanceof IntervalSet) ) { @@ -353,7 +402,7 @@ public class IntervalSet implements IntSet { return intersection; } - /** Is el in any range of this set? */ + /** {@inheritDoc} */ @Override public boolean contains(int el) { int n = intervals.size(); @@ -383,13 +432,13 @@ public class IntervalSet implements IntSet { */ } - /** return true if this set has no members */ + /** {@inheritDoc} */ @Override public boolean isNil() { return intervals==null || intervals.isEmpty(); } - /** If this set is a single integer, return it otherwise Token.INVALID_TYPE */ + /** {@inheritDoc} */ @Override public int getSingleElement() { if ( intervals!=null && intervals.size()==1 ) { @@ -401,6 +450,12 @@ public class IntervalSet implements IntSet { return Token.INVALID_TYPE; } + /** + * Returns the maximum value contained in the set. + * + * @return the maximum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ public int getMaxElement() { if ( isNil() ) { return Token.INVALID_TYPE; @@ -409,7 +464,12 @@ public class IntervalSet implements IntSet { return last.b; } - /** Return minimum element */ + /** + * Returns the minimum value contained in the set. + * + * @return the minimum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ public int getMinElement() { if ( isNil() ) { return Token.INVALID_TYPE; @@ -644,6 +704,7 @@ public class IntervalSet implements IntSet { } public void setReadonly(boolean readonly) { + if ( this.readonly && !readonly ) throw new IllegalStateException("can't alter readonly IntervalSet"); this.readonly = readonly; } } diff --git a/tool/test/org/antlr/v4/test/TestIntervalSet.java b/tool/test/org/antlr/v4/test/TestIntervalSet.java index e5938a6f4..256d5633c 100644 --- a/tool/test/org/antlr/v4/test/TestIntervalSet.java +++ b/tool/test/org/antlr/v4/test/TestIntervalSet.java @@ -35,7 +35,9 @@ import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.misc.IntervalSet; import org.junit.Test; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; public class TestIntervalSet extends BaseTest {