* Updated documentation for IntSet

* Reimplemented IntervalSet.subtract and IntervalSet.complement to operate over the complete range of supported values
* Expanded several methods in IntervalSet to operate on any IntSet
* Mark COMPLETE_CHAR_SET and EMPTY_SET as read-only
This commit is contained in:
Sam Harwell 2014-06-16 14:23:52 -05:00
parent 51430018d0
commit 2920ad0d1a
3 changed files with 303 additions and 123 deletions

View File

@ -32,49 +32,166 @@ package org.antlr.v4.runtime.misc;
import java.util.List;
/** A generic set of ints.
/**
* A generic set of integers.
*
* @see IntervalSet
* @see IntervalSet
*/
public interface IntSet {
/** Add an element to the set */
void add(int el);
/**
* Adds the specified value to the current set.
*
* @param el the value to add
*
* @exception IllegalStateException if the current set is read-only
*/
void add(int el);
/** Add all elements from incoming set to this set. Can limit
* to set of its own type. Return "this" so we can chain calls.
*/
IntSet addAll(IntSet set);
/**
* Modify the current {@link IntSet} object to contain all elements that are
* present in itself, the specified {@code set}, or both.
*
* @param set The set to add to the current set. A {@code null} argument is
* treated as though it were an empty set.
* @return {@code this} (to support chained calls)
*
* @exception IllegalStateException if the current set is read-only
*/
@NotNull
IntSet addAll(@Nullable IntSet set);
/** Return the intersection of this set with the argument, creating
* a new set.
*/
IntSet and(IntSet a);
/**
* Return a new {@link IntSet} object containing all elements that are
* present in both the current set and the specified set {@code a}.
*
* @param a The set to intersect with the current set. A {@code null}
* argument is treated as though it were an empty set.
* @return A new {@link IntSet} instance containing the intersection of the
* current set and {@code a}. The value {@code null} may be returned in
* place of an empty result set.
*/
@Nullable
IntSet and(@Nullable IntSet a);
IntSet complement(IntSet elements);
/**
* Return a new {@link IntSet} object containing all elements that are
* present in {@code elements} but not present in the current set. The
* following expressions are equivalent for input non-null {@link IntSet}
* instances {@code x} and {@code y}.
*
* <ul>
* <li>{@code x.complement(y)}</li>
* <li>{@code y.subtract(x)}</li>
* </ul>
*
* @param elements The set to compare with the current set. A {@code null}
* argument is treated as though it were an empty set.
* @return A new {@link IntSet} instance containing the elements present in
* {@code elements} but not present in the current set. The value
* {@code null} may be returned in place of an empty result set.
*/
@Nullable
IntSet complement(@Nullable IntSet elements);
IntSet or(IntSet a);
/**
* Return a new {@link IntSet} object containing all elements that are
* present in the current set, the specified set {@code a}, or both.
*
* <p>
* This method is similar to {@link #addAll(IntSet)}, but returns a new
* {@link IntSet} instance instead of modifying the current set.</p>
*
* @param a The set to union with the current set. A {@code null} argument
* is treated as though it were an empty set.
* @return A new {@link IntSet} instance containing the union of the current
* set and {@code a}. The value {@code null} may be returned in place of an
* empty result set.
*/
@Nullable
IntSet or(@Nullable IntSet a);
IntSet subtract(IntSet a);
/**
* Return a new {@link IntSet} object containing all elements that are
* present in the current set but not present in the input set {@code a}.
* The following expressions are equivalent for input non-null
* {@link IntSet} instances {@code x} and {@code y}.
*
* <ul>
* <li>{@code y.subtract(x)}</li>
* <li>{@code x.complement(y)}</li>
* </ul>
*
* @param elements The set to compare with the current set. A {@code null}
* argument is treated as though it were an empty set.
* @return A new {@link IntSet} instance containing the elements present in
* {@code elements} but not present in the current set. The value
* {@code null} may be returned in place of an empty result set.
*/
@Nullable
IntSet subtract(@Nullable IntSet a);
/** Return the size of this set (not the underlying implementation's
* allocated memory size, for example).
*/
int size();
/**
* Return the total number of elements represented by the current set.
*
* @return the total number of elements represented by the current set,
* regardless of the manner in which the elements are stored.
*/
int size();
boolean isNil();
/**
* Returns {@code true} if this set contains no elements.
*
* @return {@code true} if the current set contains no elements; otherwise,
* {@code false}.
*/
boolean isNil();
@Override
boolean equals(Object obj);
/**
* {@inheritDoc}
*/
@Override
boolean equals(Object obj);
int getSingleElement();
/**
* Returns the single value contained in the set, if {@link #size} is 1;
* otherwise, returns {@link Token#INVALID_TYPE}.
*
* @return the single value contained in the set, if {@link #size} is 1;
* otherwise, returns {@link Token#INVALID_TYPE}.
*/
int getSingleElement();
boolean contains(int el);
/**
* Returns {@code true} if the set contains the specified element.
*
* @param el The element to check for.
* @return {@code true} if the set contains {@code el}; otherwise {@code false}.
*/
boolean contains(int el);
/** remove this element from this set */
void remove(int el);
/**
* Removes the specified value from the current set. If the current set does
* not contain the element, no changes are made.
*
* @param el the value to remove
*
* @exception IllegalStateException if the current set is read-only
*/
void remove(int el);
List<Integer> toList();
/**
* Return a list containing the elements represented by the current set. The
* list is returned in ascending numerical order.
*
* @return A list containing all element present in the current set, sorted
* in ascending numerical order.
*/
@NotNull
List<Integer> toList();
@Override
String toString();
/**
* {@inheritDoc}
*/
@Override
String toString();
}

View File

@ -39,28 +39,28 @@ import java.util.List;
import java.util.ListIterator;
import java.util.Set;
/** A set of integers that relies on ranges being common to do
* "run-length-encoded" like compression (if you view an IntSet like
* a BitSet with runs of 0s and 1s). Only ranges are recorded so that
* a few ints up near value 1000 don't cause massive bitsets, just two
* integer intervals.
/**
* This class implements the {@link IntSet} backed by a sorted array of
* non-overlapping intervals. It is particularly efficient for representing
* large collections of numbers, where the majority of elements appear as part
* of a sequential range of numbers that are all part of the set. For example,
* the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }.
*
* element values may be negative. Useful for sets of EPSILON and EOF.
*
* 0..9 char range is index pair ['\u0030','\u0039'].
* Multiple ranges are encoded with multiple index pairs. Isolated
* elements are encoded with an index pair where both intervals are the same.
*
* The ranges are ordered and disjoint so that 2..6 appears before 101..103.
* <p>
* This class is able to represent sets containing any combination of values in
* the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE}
* (inclusive).</p>
*/
public class IntervalSet implements IntSet {
public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(0, Lexer.MAX_CHAR_VALUE);
public static final IntervalSet KNOWN_NEGATIVE_ELEMENTS =
new IntervalSet().or(IntervalSet.of(Token.EOF)).or(IntervalSet.of(Token.EPSILON));
public static final IntervalSet COMPLETE_SET =
IntervalSet.of(0, Lexer.MAX_CHAR_VALUE).or(KNOWN_NEGATIVE_ELEMENTS);
public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE);
static {
COMPLETE_CHAR_SET.setReadonly(true);
}
public static final IntervalSet EMPTY_SET = new IntervalSet();
static {
EMPTY_SET.setReadonly(true);
}
/** The list of sorted, disjoint intervals. */
protected List<Interval> intervals;
@ -185,18 +185,22 @@ public class IntervalSet implements IntSet {
if ( set==null ) {
return this;
}
if ( !(set instanceof IntervalSet) ) {
throw new IllegalArgumentException("can't add non IntSet ("+
set.getClass().getName()+
") to IntervalSet");
}
IntervalSet other = (IntervalSet)set;
// walk set and add each interval
int n = other.intervals.size();
for (int i = 0; i < n; i++) {
Interval I = other.intervals.get(i);
this.add(I.a,I.b);
if (set instanceof IntervalSet) {
IntervalSet other = (IntervalSet)set;
// walk set and add each interval
int n = other.intervals.size();
for (int i = 0; i < n; i++) {
Interval I = other.intervals.get(i);
this.add(I.a,I.b);
}
}
else {
for (int value : set.toList()) {
add(value);
}
}
return this;
}
@ -204,71 +208,120 @@ public class IntervalSet implements IntSet {
return this.complement(IntervalSet.of(minElement,maxElement));
}
/** Given the set of possible values (rather than, say UNICODE or MAXINT),
* return a new set containing all elements in vocabulary, but not in
* this. The computation is (vocabulary - this).
*
* 'this' is assumed to be either a subset or equal to vocabulary.
*
* complement() does not add EOF or EPSILON or any other negative number
* to the complement.
*/
/** {@inheritDoc */
@Override
public IntervalSet complement(IntSet vocabulary) {
if ( vocabulary==null ) {
return null; // nothing in common with null set
}
if ( !(vocabulary instanceof IntervalSet ) ) {
throw new IllegalArgumentException("can't complement with non IntervalSet ("+
vocabulary.getClass().getName()+")");
if ( vocabulary==null || vocabulary.isNil() ) {
return null; // nothing in common with null set
}
IntervalSet vocabularyIS = ((IntervalSet)vocabulary);
int maxElement = vocabularyIS.getMaxElement();
IntervalSet compl = new IntervalSet();
int n = intervals.size();
if ( n ==0 ) {
return compl;
IntervalSet vocabularyIS;
if (vocabulary instanceof IntervalSet) {
vocabularyIS = (IntervalSet)vocabulary;
}
Interval first = intervals.get(0);
// add a range from 0 to first.a constrained to vocab
if ( first.a > 0 ) {
int minKnownElement = KNOWN_NEGATIVE_ELEMENTS.getMinElement();
IntervalSet s = IntervalSet.of(minKnownElement, first.a-1);
IntervalSet a = s.and(vocabularyIS);
compl.addAll(a);
else {
vocabularyIS = new IntervalSet();
vocabularyIS.addAll(vocabulary);
}
for (int i=1; i<n; i++) { // from 2nd interval .. nth
Interval previous = intervals.get(i-1);
Interval current = intervals.get(i);
IntervalSet s = IntervalSet.of(previous.b+1, current.a-1);
IntervalSet a = s.and(vocabularyIS);
compl.addAll(a);
}
Interval last = intervals.get(n -1);
// add a range from last.b to maxElement constrained to vocab
if ( last.b < maxElement ) {
IntervalSet s = IntervalSet.of(last.b+1, maxElement);
IntervalSet a = s.and(vocabularyIS);
compl.addAll(a);
}
return compl;
return vocabularyIS.subtract(this);
}
/** Compute this-other via this&amp;~other.
* Return a new set containing all elements in this but not in other.
* other is assumed to be a subset of this;
* anything that is in other but not in this will be ignored.
*/
@Override
public IntervalSet subtract(IntSet other) {
// assume the whole unicode range here for the complement
// because it doesn't matter. Anything beyond the max of this' set
// will be ignored since we are doing this & ~other. The intersection
// will be empty. The only problem would be when this' set max value
// goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
// will prevent this.
return this.and(((IntervalSet)other).complement(COMPLETE_SET));
public IntervalSet subtract(IntSet a) {
if (a == null || a.isNil()) {
return new IntervalSet(this);
}
if (a instanceof IntervalSet) {
return subtract(this, (IntervalSet)a);
}
IntervalSet other = new IntervalSet();
other.addAll(a);
return subtract(this, other);
}
/**
* Compute the set difference between two interval sets. The specific
* operation is {@code left - right}. If either of the input sets is
* {@code null}, it is treated as though it was an empty set.
*/
@NotNull
public static IntervalSet subtract(@Nullable IntervalSet left, @Nullable IntervalSet right) {
if (left == null || left.isNil()) {
return new IntervalSet();
}
IntervalSet result = new IntervalSet(left);
if (right == null || right.isNil()) {
// right set has no elements; just return the copy of the current set
return result;
}
int resultI = 0;
int rightI = 0;
while (resultI < result.intervals.size() && rightI < right.intervals.size()) {
Interval resultInterval = result.intervals.get(resultI);
Interval rightInterval = right.intervals.get(rightI);
// operation: (resultInterval - rightInterval) and update indexes
if (rightInterval.b < resultInterval.a) {
rightI++;
continue;
}
if (rightInterval.a > resultInterval.b) {
resultI++;
continue;
}
Interval beforeCurrent = null;
Interval afterCurrent = null;
if (rightInterval.a > resultInterval.a) {
beforeCurrent = new Interval(resultInterval.a, rightInterval.a - 1);
}
if (rightInterval.b < resultInterval.b) {
afterCurrent = new Interval(rightInterval.b + 1, resultInterval.b);
}
if (beforeCurrent != null) {
if (afterCurrent != null) {
// split the current interval into two
result.intervals.set(resultI, beforeCurrent);
result.intervals.add(resultI + 1, afterCurrent);
resultI++;
rightI++;
continue;
}
else {
// replace the current interval
result.intervals.set(resultI, beforeCurrent);
resultI++;
continue;
}
}
else {
if (afterCurrent != null) {
// replace the current interval
result.intervals.set(resultI, afterCurrent);
rightI++;
continue;
}
else {
// remove the current interval (thus no need to increment resultI)
result.intervals.remove(resultI);
continue;
}
}
}
// If rightI reached right.intervals.size(), no more intervals to subtract from result.
// If resultI reached result.intervals.size(), we would be subtracting from an empty set.
// Either way, we are done.
return result;
}
@Override
@ -279,11 +332,7 @@ public class IntervalSet implements IntSet {
return o;
}
/** Return a new set with the intersection of this set with other. Because
* the intervals are sorted, we can use an iterator for each list and
* just walk them together. This is roughly O(min(n,m)) for interval
* list lengths n and m.
*/
/** {@inheritDoc} */
@Override
public IntervalSet and(IntSet other) {
if ( other==null ) { //|| !(other instanceof IntervalSet) ) {
@ -353,7 +402,7 @@ public class IntervalSet implements IntSet {
return intersection;
}
/** Is el in any range of this set? */
/** {@inheritDoc} */
@Override
public boolean contains(int el) {
int n = intervals.size();
@ -383,13 +432,13 @@ public class IntervalSet implements IntSet {
*/
}
/** return true if this set has no members */
/** {@inheritDoc} */
@Override
public boolean isNil() {
return intervals==null || intervals.isEmpty();
}
/** If this set is a single integer, return it otherwise Token.INVALID_TYPE */
/** {@inheritDoc} */
@Override
public int getSingleElement() {
if ( intervals!=null && intervals.size()==1 ) {
@ -401,6 +450,12 @@ public class IntervalSet implements IntSet {
return Token.INVALID_TYPE;
}
/**
* Returns the maximum value contained in the set.
*
* @return the maximum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}.
*/
public int getMaxElement() {
if ( isNil() ) {
return Token.INVALID_TYPE;
@ -409,7 +464,12 @@ public class IntervalSet implements IntSet {
return last.b;
}
/** Return minimum element */
/**
* Returns the minimum value contained in the set.
*
* @return the minimum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}.
*/
public int getMinElement() {
if ( isNil() ) {
return Token.INVALID_TYPE;
@ -644,6 +704,7 @@ public class IntervalSet implements IntSet {
}
public void setReadonly(boolean readonly) {
if ( this.readonly && !readonly ) throw new IllegalStateException("can't alter readonly IntervalSet");
this.readonly = readonly;
}
}

View File

@ -35,7 +35,9 @@ import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.junit.Test;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class TestIntervalSet extends BaseTest {