forked from jasder/antlr
* Updated documentation for IntSet
* Reimplemented IntervalSet.subtract and IntervalSet.complement to operate over the complete range of supported values * Expanded several methods in IntervalSet to operate on any IntSet * Mark COMPLETE_CHAR_SET and EMPTY_SET as read-only
This commit is contained in:
parent
51430018d0
commit
2920ad0d1a
|
@ -32,49 +32,166 @@ package org.antlr.v4.runtime.misc;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
/** A generic set of ints.
|
||||
/**
|
||||
* A generic set of integers.
|
||||
*
|
||||
* @see IntervalSet
|
||||
* @see IntervalSet
|
||||
*/
|
||||
public interface IntSet {
|
||||
/** Add an element to the set */
|
||||
void add(int el);
|
||||
/**
|
||||
* Adds the specified value to the current set.
|
||||
*
|
||||
* @param el the value to add
|
||||
*
|
||||
* @exception IllegalStateException if the current set is read-only
|
||||
*/
|
||||
void add(int el);
|
||||
|
||||
/** Add all elements from incoming set to this set. Can limit
|
||||
* to set of its own type. Return "this" so we can chain calls.
|
||||
*/
|
||||
IntSet addAll(IntSet set);
|
||||
/**
|
||||
* Modify the current {@link IntSet} object to contain all elements that are
|
||||
* present in itself, the specified {@code set}, or both.
|
||||
*
|
||||
* @param set The set to add to the current set. A {@code null} argument is
|
||||
* treated as though it were an empty set.
|
||||
* @return {@code this} (to support chained calls)
|
||||
*
|
||||
* @exception IllegalStateException if the current set is read-only
|
||||
*/
|
||||
@NotNull
|
||||
IntSet addAll(@Nullable IntSet set);
|
||||
|
||||
/** Return the intersection of this set with the argument, creating
|
||||
* a new set.
|
||||
*/
|
||||
IntSet and(IntSet a);
|
||||
/**
|
||||
* Return a new {@link IntSet} object containing all elements that are
|
||||
* present in both the current set and the specified set {@code a}.
|
||||
*
|
||||
* @param a The set to intersect with the current set. A {@code null}
|
||||
* argument is treated as though it were an empty set.
|
||||
* @return A new {@link IntSet} instance containing the intersection of the
|
||||
* current set and {@code a}. The value {@code null} may be returned in
|
||||
* place of an empty result set.
|
||||
*/
|
||||
@Nullable
|
||||
IntSet and(@Nullable IntSet a);
|
||||
|
||||
IntSet complement(IntSet elements);
|
||||
/**
|
||||
* Return a new {@link IntSet} object containing all elements that are
|
||||
* present in {@code elements} but not present in the current set. The
|
||||
* following expressions are equivalent for input non-null {@link IntSet}
|
||||
* instances {@code x} and {@code y}.
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code x.complement(y)}</li>
|
||||
* <li>{@code y.subtract(x)}</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param elements The set to compare with the current set. A {@code null}
|
||||
* argument is treated as though it were an empty set.
|
||||
* @return A new {@link IntSet} instance containing the elements present in
|
||||
* {@code elements} but not present in the current set. The value
|
||||
* {@code null} may be returned in place of an empty result set.
|
||||
*/
|
||||
@Nullable
|
||||
IntSet complement(@Nullable IntSet elements);
|
||||
|
||||
IntSet or(IntSet a);
|
||||
/**
|
||||
* Return a new {@link IntSet} object containing all elements that are
|
||||
* present in the current set, the specified set {@code a}, or both.
|
||||
*
|
||||
* <p>
|
||||
* This method is similar to {@link #addAll(IntSet)}, but returns a new
|
||||
* {@link IntSet} instance instead of modifying the current set.</p>
|
||||
*
|
||||
* @param a The set to union with the current set. A {@code null} argument
|
||||
* is treated as though it were an empty set.
|
||||
* @return A new {@link IntSet} instance containing the union of the current
|
||||
* set and {@code a}. The value {@code null} may be returned in place of an
|
||||
* empty result set.
|
||||
*/
|
||||
@Nullable
|
||||
IntSet or(@Nullable IntSet a);
|
||||
|
||||
IntSet subtract(IntSet a);
|
||||
/**
|
||||
* Return a new {@link IntSet} object containing all elements that are
|
||||
* present in the current set but not present in the input set {@code a}.
|
||||
* The following expressions are equivalent for input non-null
|
||||
* {@link IntSet} instances {@code x} and {@code y}.
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code y.subtract(x)}</li>
|
||||
* <li>{@code x.complement(y)}</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param elements The set to compare with the current set. A {@code null}
|
||||
* argument is treated as though it were an empty set.
|
||||
* @return A new {@link IntSet} instance containing the elements present in
|
||||
* {@code elements} but not present in the current set. The value
|
||||
* {@code null} may be returned in place of an empty result set.
|
||||
*/
|
||||
@Nullable
|
||||
IntSet subtract(@Nullable IntSet a);
|
||||
|
||||
/** Return the size of this set (not the underlying implementation's
|
||||
* allocated memory size, for example).
|
||||
*/
|
||||
int size();
|
||||
/**
|
||||
* Return the total number of elements represented by the current set.
|
||||
*
|
||||
* @return the total number of elements represented by the current set,
|
||||
* regardless of the manner in which the elements are stored.
|
||||
*/
|
||||
int size();
|
||||
|
||||
boolean isNil();
|
||||
/**
|
||||
* Returns {@code true} if this set contains no elements.
|
||||
*
|
||||
* @return {@code true} if the current set contains no elements; otherwise,
|
||||
* {@code false}.
|
||||
*/
|
||||
boolean isNil();
|
||||
|
||||
@Override
|
||||
boolean equals(Object obj);
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
boolean equals(Object obj);
|
||||
|
||||
int getSingleElement();
|
||||
/**
|
||||
* Returns the single value contained in the set, if {@link #size} is 1;
|
||||
* otherwise, returns {@link Token#INVALID_TYPE}.
|
||||
*
|
||||
* @return the single value contained in the set, if {@link #size} is 1;
|
||||
* otherwise, returns {@link Token#INVALID_TYPE}.
|
||||
*/
|
||||
int getSingleElement();
|
||||
|
||||
boolean contains(int el);
|
||||
/**
|
||||
* Returns {@code true} if the set contains the specified element.
|
||||
*
|
||||
* @param el The element to check for.
|
||||
* @return {@code true} if the set contains {@code el}; otherwise {@code false}.
|
||||
*/
|
||||
boolean contains(int el);
|
||||
|
||||
/** remove this element from this set */
|
||||
void remove(int el);
|
||||
/**
|
||||
* Removes the specified value from the current set. If the current set does
|
||||
* not contain the element, no changes are made.
|
||||
*
|
||||
* @param el the value to remove
|
||||
*
|
||||
* @exception IllegalStateException if the current set is read-only
|
||||
*/
|
||||
void remove(int el);
|
||||
|
||||
List<Integer> toList();
|
||||
/**
|
||||
* Return a list containing the elements represented by the current set. The
|
||||
* list is returned in ascending numerical order.
|
||||
*
|
||||
* @return A list containing all element present in the current set, sorted
|
||||
* in ascending numerical order.
|
||||
*/
|
||||
@NotNull
|
||||
List<Integer> toList();
|
||||
|
||||
@Override
|
||||
String toString();
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String toString();
|
||||
}
|
||||
|
|
|
@ -39,28 +39,28 @@ import java.util.List;
|
|||
import java.util.ListIterator;
|
||||
import java.util.Set;
|
||||
|
||||
/** A set of integers that relies on ranges being common to do
|
||||
* "run-length-encoded" like compression (if you view an IntSet like
|
||||
* a BitSet with runs of 0s and 1s). Only ranges are recorded so that
|
||||
* a few ints up near value 1000 don't cause massive bitsets, just two
|
||||
* integer intervals.
|
||||
/**
|
||||
* This class implements the {@link IntSet} backed by a sorted array of
|
||||
* non-overlapping intervals. It is particularly efficient for representing
|
||||
* large collections of numbers, where the majority of elements appear as part
|
||||
* of a sequential range of numbers that are all part of the set. For example,
|
||||
* the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }.
|
||||
*
|
||||
* element values may be negative. Useful for sets of EPSILON and EOF.
|
||||
*
|
||||
* 0..9 char range is index pair ['\u0030','\u0039'].
|
||||
* Multiple ranges are encoded with multiple index pairs. Isolated
|
||||
* elements are encoded with an index pair where both intervals are the same.
|
||||
*
|
||||
* The ranges are ordered and disjoint so that 2..6 appears before 101..103.
|
||||
* <p>
|
||||
* This class is able to represent sets containing any combination of values in
|
||||
* the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE}
|
||||
* (inclusive).</p>
|
||||
*/
|
||||
public class IntervalSet implements IntSet {
|
||||
public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(0, Lexer.MAX_CHAR_VALUE);
|
||||
public static final IntervalSet KNOWN_NEGATIVE_ELEMENTS =
|
||||
new IntervalSet().or(IntervalSet.of(Token.EOF)).or(IntervalSet.of(Token.EPSILON));
|
||||
public static final IntervalSet COMPLETE_SET =
|
||||
IntervalSet.of(0, Lexer.MAX_CHAR_VALUE).or(KNOWN_NEGATIVE_ELEMENTS);
|
||||
public static final IntervalSet COMPLETE_CHAR_SET = IntervalSet.of(Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE);
|
||||
static {
|
||||
COMPLETE_CHAR_SET.setReadonly(true);
|
||||
}
|
||||
|
||||
public static final IntervalSet EMPTY_SET = new IntervalSet();
|
||||
static {
|
||||
EMPTY_SET.setReadonly(true);
|
||||
}
|
||||
|
||||
/** The list of sorted, disjoint intervals. */
|
||||
protected List<Interval> intervals;
|
||||
|
@ -185,18 +185,22 @@ public class IntervalSet implements IntSet {
|
|||
if ( set==null ) {
|
||||
return this;
|
||||
}
|
||||
if ( !(set instanceof IntervalSet) ) {
|
||||
throw new IllegalArgumentException("can't add non IntSet ("+
|
||||
set.getClass().getName()+
|
||||
") to IntervalSet");
|
||||
}
|
||||
IntervalSet other = (IntervalSet)set;
|
||||
// walk set and add each interval
|
||||
int n = other.intervals.size();
|
||||
for (int i = 0; i < n; i++) {
|
||||
Interval I = other.intervals.get(i);
|
||||
this.add(I.a,I.b);
|
||||
|
||||
if (set instanceof IntervalSet) {
|
||||
IntervalSet other = (IntervalSet)set;
|
||||
// walk set and add each interval
|
||||
int n = other.intervals.size();
|
||||
for (int i = 0; i < n; i++) {
|
||||
Interval I = other.intervals.get(i);
|
||||
this.add(I.a,I.b);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int value : set.toList()) {
|
||||
add(value);
|
||||
}
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -204,71 +208,120 @@ public class IntervalSet implements IntSet {
|
|||
return this.complement(IntervalSet.of(minElement,maxElement));
|
||||
}
|
||||
|
||||
/** Given the set of possible values (rather than, say UNICODE or MAXINT),
|
||||
* return a new set containing all elements in vocabulary, but not in
|
||||
* this. The computation is (vocabulary - this).
|
||||
*
|
||||
* 'this' is assumed to be either a subset or equal to vocabulary.
|
||||
*
|
||||
* complement() does not add EOF or EPSILON or any other negative number
|
||||
* to the complement.
|
||||
*/
|
||||
/** {@inheritDoc */
|
||||
@Override
|
||||
public IntervalSet complement(IntSet vocabulary) {
|
||||
if ( vocabulary==null ) {
|
||||
return null; // nothing in common with null set
|
||||
}
|
||||
if ( !(vocabulary instanceof IntervalSet ) ) {
|
||||
throw new IllegalArgumentException("can't complement with non IntervalSet ("+
|
||||
vocabulary.getClass().getName()+")");
|
||||
if ( vocabulary==null || vocabulary.isNil() ) {
|
||||
return null; // nothing in common with null set
|
||||
}
|
||||
IntervalSet vocabularyIS = ((IntervalSet)vocabulary);
|
||||
int maxElement = vocabularyIS.getMaxElement();
|
||||
|
||||
IntervalSet compl = new IntervalSet();
|
||||
int n = intervals.size();
|
||||
if ( n ==0 ) {
|
||||
return compl;
|
||||
IntervalSet vocabularyIS;
|
||||
if (vocabulary instanceof IntervalSet) {
|
||||
vocabularyIS = (IntervalSet)vocabulary;
|
||||
}
|
||||
Interval first = intervals.get(0);
|
||||
// add a range from 0 to first.a constrained to vocab
|
||||
if ( first.a > 0 ) {
|
||||
int minKnownElement = KNOWN_NEGATIVE_ELEMENTS.getMinElement();
|
||||
IntervalSet s = IntervalSet.of(minKnownElement, first.a-1);
|
||||
IntervalSet a = s.and(vocabularyIS);
|
||||
compl.addAll(a);
|
||||
else {
|
||||
vocabularyIS = new IntervalSet();
|
||||
vocabularyIS.addAll(vocabulary);
|
||||
}
|
||||
for (int i=1; i<n; i++) { // from 2nd interval .. nth
|
||||
Interval previous = intervals.get(i-1);
|
||||
Interval current = intervals.get(i);
|
||||
IntervalSet s = IntervalSet.of(previous.b+1, current.a-1);
|
||||
IntervalSet a = s.and(vocabularyIS);
|
||||
compl.addAll(a);
|
||||
}
|
||||
Interval last = intervals.get(n -1);
|
||||
// add a range from last.b to maxElement constrained to vocab
|
||||
if ( last.b < maxElement ) {
|
||||
IntervalSet s = IntervalSet.of(last.b+1, maxElement);
|
||||
IntervalSet a = s.and(vocabularyIS);
|
||||
compl.addAll(a);
|
||||
}
|
||||
return compl;
|
||||
|
||||
return vocabularyIS.subtract(this);
|
||||
}
|
||||
|
||||
/** Compute this-other via this&~other.
|
||||
* Return a new set containing all elements in this but not in other.
|
||||
* other is assumed to be a subset of this;
|
||||
* anything that is in other but not in this will be ignored.
|
||||
*/
|
||||
@Override
|
||||
public IntervalSet subtract(IntSet other) {
|
||||
// assume the whole unicode range here for the complement
|
||||
// because it doesn't matter. Anything beyond the max of this' set
|
||||
// will be ignored since we are doing this & ~other. The intersection
|
||||
// will be empty. The only problem would be when this' set max value
|
||||
// goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
|
||||
// will prevent this.
|
||||
return this.and(((IntervalSet)other).complement(COMPLETE_SET));
|
||||
public IntervalSet subtract(IntSet a) {
|
||||
if (a == null || a.isNil()) {
|
||||
return new IntervalSet(this);
|
||||
}
|
||||
|
||||
if (a instanceof IntervalSet) {
|
||||
return subtract(this, (IntervalSet)a);
|
||||
}
|
||||
|
||||
IntervalSet other = new IntervalSet();
|
||||
other.addAll(a);
|
||||
return subtract(this, other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the set difference between two interval sets. The specific
|
||||
* operation is {@code left - right}. If either of the input sets is
|
||||
* {@code null}, it is treated as though it was an empty set.
|
||||
*/
|
||||
@NotNull
|
||||
public static IntervalSet subtract(@Nullable IntervalSet left, @Nullable IntervalSet right) {
|
||||
if (left == null || left.isNil()) {
|
||||
return new IntervalSet();
|
||||
}
|
||||
|
||||
IntervalSet result = new IntervalSet(left);
|
||||
if (right == null || right.isNil()) {
|
||||
// right set has no elements; just return the copy of the current set
|
||||
return result;
|
||||
}
|
||||
|
||||
int resultI = 0;
|
||||
int rightI = 0;
|
||||
while (resultI < result.intervals.size() && rightI < right.intervals.size()) {
|
||||
Interval resultInterval = result.intervals.get(resultI);
|
||||
Interval rightInterval = right.intervals.get(rightI);
|
||||
|
||||
// operation: (resultInterval - rightInterval) and update indexes
|
||||
|
||||
if (rightInterval.b < resultInterval.a) {
|
||||
rightI++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rightInterval.a > resultInterval.b) {
|
||||
resultI++;
|
||||
continue;
|
||||
}
|
||||
|
||||
Interval beforeCurrent = null;
|
||||
Interval afterCurrent = null;
|
||||
if (rightInterval.a > resultInterval.a) {
|
||||
beforeCurrent = new Interval(resultInterval.a, rightInterval.a - 1);
|
||||
}
|
||||
|
||||
if (rightInterval.b < resultInterval.b) {
|
||||
afterCurrent = new Interval(rightInterval.b + 1, resultInterval.b);
|
||||
}
|
||||
|
||||
if (beforeCurrent != null) {
|
||||
if (afterCurrent != null) {
|
||||
// split the current interval into two
|
||||
result.intervals.set(resultI, beforeCurrent);
|
||||
result.intervals.add(resultI + 1, afterCurrent);
|
||||
resultI++;
|
||||
rightI++;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// replace the current interval
|
||||
result.intervals.set(resultI, beforeCurrent);
|
||||
resultI++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (afterCurrent != null) {
|
||||
// replace the current interval
|
||||
result.intervals.set(resultI, afterCurrent);
|
||||
rightI++;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// remove the current interval (thus no need to increment resultI)
|
||||
result.intervals.remove(resultI);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If rightI reached right.intervals.size(), no more intervals to subtract from result.
|
||||
// If resultI reached result.intervals.size(), we would be subtracting from an empty set.
|
||||
// Either way, we are done.
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -279,11 +332,7 @@ public class IntervalSet implements IntSet {
|
|||
return o;
|
||||
}
|
||||
|
||||
/** Return a new set with the intersection of this set with other. Because
|
||||
* the intervals are sorted, we can use an iterator for each list and
|
||||
* just walk them together. This is roughly O(min(n,m)) for interval
|
||||
* list lengths n and m.
|
||||
*/
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public IntervalSet and(IntSet other) {
|
||||
if ( other==null ) { //|| !(other instanceof IntervalSet) ) {
|
||||
|
@ -353,7 +402,7 @@ public class IntervalSet implements IntSet {
|
|||
return intersection;
|
||||
}
|
||||
|
||||
/** Is el in any range of this set? */
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public boolean contains(int el) {
|
||||
int n = intervals.size();
|
||||
|
@ -383,13 +432,13 @@ public class IntervalSet implements IntSet {
|
|||
*/
|
||||
}
|
||||
|
||||
/** return true if this set has no members */
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public boolean isNil() {
|
||||
return intervals==null || intervals.isEmpty();
|
||||
}
|
||||
|
||||
/** If this set is a single integer, return it otherwise Token.INVALID_TYPE */
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public int getSingleElement() {
|
||||
if ( intervals!=null && intervals.size()==1 ) {
|
||||
|
@ -401,6 +450,12 @@ public class IntervalSet implements IntSet {
|
|||
return Token.INVALID_TYPE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum value contained in the set.
|
||||
*
|
||||
* @return the maximum value contained in the set. If the set is empty, this
|
||||
* method returns {@link Token#INVALID_TYPE}.
|
||||
*/
|
||||
public int getMaxElement() {
|
||||
if ( isNil() ) {
|
||||
return Token.INVALID_TYPE;
|
||||
|
@ -409,7 +464,12 @@ public class IntervalSet implements IntSet {
|
|||
return last.b;
|
||||
}
|
||||
|
||||
/** Return minimum element */
|
||||
/**
|
||||
* Returns the minimum value contained in the set.
|
||||
*
|
||||
* @return the minimum value contained in the set. If the set is empty, this
|
||||
* method returns {@link Token#INVALID_TYPE}.
|
||||
*/
|
||||
public int getMinElement() {
|
||||
if ( isNil() ) {
|
||||
return Token.INVALID_TYPE;
|
||||
|
@ -644,6 +704,7 @@ public class IntervalSet implements IntSet {
|
|||
}
|
||||
|
||||
public void setReadonly(boolean readonly) {
|
||||
if ( this.readonly && !readonly ) throw new IllegalStateException("can't alter readonly IntervalSet");
|
||||
this.readonly = readonly;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,9 @@ import org.antlr.v4.runtime.Token;
|
|||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TestIntervalSet extends BaseTest {
|
||||
|
||||
|
|
Loading…
Reference in New Issue