Rework C++ Interval, IntervalSet, ATN and ATNState

- Remove the readonly status from IntervalSet.
- Remove virtual functions from IntervalSet and Interval. These are
  passed by value throughout the C++ runtime; meaningful inheritance is
  not possible anyway.
- Moving the atomic flag into ATNState as a "now cached" flag.
- Return a const reference from ATN::nextStates(ATNState*) so the readonly
  status is enforced by the compiler not at runtime in the code.
- Use value semantics using std::move to reduce the number of copies performed,
  constent with how these classes are used in the C++ runtime source.
- Remove type-unsafe varargs constructor in IntervalSet, replace with
  type-safe varadic templates implementation.
This commit is contained in:
Jan Martin Mikkelsen 2017-06-27 11:04:50 +10:00
parent 0c4473ea1a
commit d7f5e1834b
7 changed files with 104 additions and 136 deletions

View File

@ -87,12 +87,12 @@ misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const {
} }
misc::IntervalSet& ATN::nextTokens(ATNState *s) const { misc::IntervalSet const& ATN::nextTokens(ATNState *s) const {
if (!s->nextTokenWithinRule.isReadOnly()) { if (!s->nextTokenUpdated) {
std::unique_lock<std::mutex> lock { _mutex }; std::unique_lock<std::mutex> lock { _mutex };
if (!s->nextTokenWithinRule.isReadOnly()) { if (!s->nextTokenUpdated) {
s->nextTokenWithinRule = nextTokens(s, nullptr); s->nextTokenWithinRule = nextTokens(s, nullptr);
s->nextTokenWithinRule.setReadOnly(true); s->nextTokenUpdated = true;
} }
} }
return s->nextTokenWithinRule; return s->nextTokenWithinRule;
@ -101,7 +101,7 @@ misc::IntervalSet& ATN::nextTokens(ATNState *s) const {
void ATN::addState(ATNState *state) { void ATN::addState(ATNState *state) {
if (state != nullptr) { if (state != nullptr) {
//state->atn = this; //state->atn = this;
state->stateNumber = (int)states.size(); state->stateNumber = static_cast<int>(states.size());
} }
states.push_back(state); states.push_back(state);
@ -114,7 +114,7 @@ void ATN::removeState(ATNState *state) {
int ATN::defineDecisionState(DecisionState *s) { int ATN::defineDecisionState(DecisionState *s) {
decisionToState.push_back(s); decisionToState.push_back(s);
s->decision = (int)decisionToState.size() - 1; s->decision = static_cast<int>(decisionToState.size() - 1);
return s->decision; return s->decision;
} }
@ -154,7 +154,7 @@ misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *contex
if (ctx->parent == nullptr) { if (ctx->parent == nullptr) {
break; break;
} }
ctx = (RuleContext *)ctx->parent; ctx = static_cast<RuleContext *>(ctx->parent);
} }
if (following.contains(Token::EPSILON)) { if (following.contains(Token::EPSILON)) {

View File

@ -70,7 +70,7 @@ namespace atn {
/// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of /// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of
/// rule. /// rule.
/// </summary> /// </summary>
virtual misc::IntervalSet& nextTokens(ATNState *s) const; virtual misc::IntervalSet const& nextTokens(ATNState *s) const;
virtual void addState(ATNState *state); virtual void addState(ATNState *state);

View File

@ -6,6 +6,7 @@
#pragma once #pragma once
#include "misc/IntervalSet.h" #include "misc/IntervalSet.h"
#include <atomic>
namespace antlr4 { namespace antlr4 {
namespace atn { namespace atn {
@ -70,12 +71,17 @@ namespace atn {
/// ///
/// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/> /// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/>
/// </summary> /// </summary>
class ANTLR4CPP_PUBLIC ATN;
class ANTLR4CPP_PUBLIC ATNState { class ANTLR4CPP_PUBLIC ATNState {
public: public:
ATNState(); ATNState();
ATNState(ATNState const&) = delete;
virtual ~ATNState(); virtual ~ATNState();
ATNState& operator=(ATNState const&) = delete;
static const size_t INITIAL_NUM_TRANSITIONS = 4; static const size_t INITIAL_NUM_TRANSITIONS = 4;
static const size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max(); static const size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max();
@ -102,9 +108,6 @@ namespace atn {
bool epsilonOnlyTransitions = false; bool epsilonOnlyTransitions = false;
public: public:
/// Used to cache lookahead during parsing, not used during construction.
misc::IntervalSet nextTokenWithinRule;
virtual size_t hashCode(); virtual size_t hashCode();
bool operator == (const ATNState &other); bool operator == (const ATNState &other);
@ -117,6 +120,14 @@ namespace atn {
virtual void addTransition(size_t index, Transition *e); virtual void addTransition(size_t index, Transition *e);
virtual Transition* removeTransition(size_t index); virtual Transition* removeTransition(size_t index);
virtual size_t getStateType() = 0; virtual size_t getStateType() = 0;
private:
/// Used to cache lookahead during parsing, not used during construction.
misc::IntervalSet nextTokenWithinRule;
std::atomic<bool> nextTokenUpdated { false };
friend class ATN;
}; };
} // namespace atn } // namespace atn

View File

@ -7,8 +7,6 @@
using namespace antlr4::misc; using namespace antlr4::misc;
Interval::~Interval() = default;
size_t antlr4::misc::numericToSymbol(ssize_t v) { size_t antlr4::misc::numericToSymbol(ssize_t v) {
return (size_t)v; return (size_t)v;
} }

View File

@ -26,59 +26,56 @@ namespace misc {
ssize_t b; ssize_t b;
Interval(); Interval();
explicit Interval(size_t a_, size_t b_); // For unsigned -> signed mappings. Interval(size_t a_, size_t b_); // For unsigned -> signed mappings.
Interval(ssize_t a_, ssize_t b_); Interval(ssize_t a_, ssize_t b_);
Interval(Interval const&) = default;
virtual ~Interval();
Interval& operator=(Interval const&) = default;
/// return number of elements between a and b inclusively. x..x is length 1. /// return number of elements between a and b inclusively. x..x is length 1.
/// if b < a, then length is 0. 9..10 has length 2. /// if b < a, then length is 0. 9..10 has length 2.
virtual size_t length() const; size_t length() const;
bool operator == (const Interval &other) const; bool operator == (const Interval &other) const;
virtual size_t hashCode() const; size_t hashCode() const;
/// <summary> /// <summary>
/// Does this start completely before other? Disjoint </summary> /// Does this start completely before other? Disjoint </summary>
virtual bool startsBeforeDisjoint(const Interval &other) const; bool startsBeforeDisjoint(const Interval &other) const;
/// <summary> /// <summary>
/// Does this start at or before other? Nondisjoint </summary> /// Does this start at or before other? Nondisjoint </summary>
virtual bool startsBeforeNonDisjoint(const Interval &other) const; bool startsBeforeNonDisjoint(const Interval &other) const;
/// <summary> /// <summary>
/// Does this.a start after other.b? May or may not be disjoint </summary> /// Does this.a start after other.b? May or may not be disjoint </summary>
virtual bool startsAfter(const Interval &other) const; bool startsAfter(const Interval &other) const;
/// <summary> /// <summary>
/// Does this start completely after other? Disjoint </summary> /// Does this start completely after other? Disjoint </summary>
virtual bool startsAfterDisjoint(const Interval &other) const; bool startsAfterDisjoint(const Interval &other) const;
/// <summary> /// <summary>
/// Does this start after other? NonDisjoint </summary> /// Does this start after other? NonDisjoint </summary>
virtual bool startsAfterNonDisjoint(const Interval &other) const; bool startsAfterNonDisjoint(const Interval &other) const;
/// <summary> /// <summary>
/// Are both ranges disjoint? I.e., no overlap? </summary> /// Are both ranges disjoint? I.e., no overlap? </summary>
virtual bool disjoint(const Interval &other) const; bool disjoint(const Interval &other) const;
/// <summary> /// <summary>
/// Are two intervals adjacent such as 0..41 and 42..42? </summary> /// Are two intervals adjacent such as 0..41 and 42..42? </summary>
virtual bool adjacent(const Interval &other) const; bool adjacent(const Interval &other) const;
virtual bool properlyContains(const Interval &other) const; bool properlyContains(const Interval &other) const;
/// <summary> /// <summary>
/// Return the interval computed from combining this and other </summary> /// Return the interval computed from combining this and other </summary>
virtual Interval Union(const Interval &other) const; Interval Union(const Interval &other) const;
/// <summary> /// <summary>
/// Return the interval in common between this and o </summary> /// Return the interval in common between this and o </summary>
virtual Interval intersection(const Interval &other) const; Interval intersection(const Interval &other) const;
virtual std::string toString() const; std::string toString() const;
private: private:
}; };

View File

@ -13,52 +13,31 @@
using namespace antlr4; using namespace antlr4;
using namespace antlr4::misc; using namespace antlr4::misc;
IntervalSet const IntervalSet::COMPLETE_CHAR_SET = []() { IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
IntervalSet complete = IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
complete.setReadOnly(true);
return complete;
}();
IntervalSet const IntervalSet::EMPTY_SET = []() { IntervalSet const IntervalSet::EMPTY_SET;
IntervalSet empty;
empty.setReadOnly(true);
return empty;
}();
IntervalSet::IntervalSet() { IntervalSet::IntervalSet() : _intervals() {
InitializeInstanceFields();
} }
IntervalSet::IntervalSet(const std::vector<Interval> &intervals) : IntervalSet() { IntervalSet::IntervalSet(const IntervalSet &set) : _intervals(set._intervals) {
_intervals = intervals;
} }
IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() { IntervalSet::IntervalSet(IntervalSet&& set) : _intervals(std::move(set._intervals)) {
addAll(set);
} }
IntervalSet::IntervalSet(int n, ...) : IntervalSet() { IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
va_list vlist;
va_start(vlist, n);
for (int i = 0; i < n; i++) {
add(va_arg(vlist, int));
}
} }
IntervalSet::~IntervalSet() IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
{ _intervals = other._intervals;
return *this;
} }
IntervalSet& IntervalSet::operator=(const IntervalSet& other) IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
{ _intervals = move(other._intervals);
if (_readonly) { return *this;
throw IllegalStateException("can't alter read only IntervalSet");
}
_intervals.clear();
return addAll(other);
} }
IntervalSet IntervalSet::of(ssize_t a) { IntervalSet IntervalSet::of(ssize_t a) {
@ -70,16 +49,10 @@ IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
} }
void IntervalSet::clear() { void IntervalSet::clear() {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
_intervals.clear(); _intervals.clear();
} }
void IntervalSet::add(ssize_t el) { void IntervalSet::add(ssize_t el) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
add(el, el); add(el, el);
} }
@ -88,10 +61,6 @@ void IntervalSet::add(ssize_t a, ssize_t b) {
} }
void IntervalSet::add(const Interval &addition) { void IntervalSet::add(const Interval &addition) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
if (addition.b < addition.a) { if (addition.b < addition.a) {
return; return;
} }
@ -150,7 +119,7 @@ IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
IntervalSet& IntervalSet::addAll(const IntervalSet &set) { IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
// walk set and add each interval // walk set and add each interval
for (auto &interval : set._intervals) { for (auto const& interval : set._intervals) {
add(interval); add(interval);
} }
return *this; return *this;
@ -339,7 +308,7 @@ ssize_t IntervalSet::getMinElement() const {
return _intervals[0].a; return _intervals[0].a;
} }
std::vector<Interval> IntervalSet::getIntervals() const { std::vector<Interval> const& IntervalSet::getIntervals() const {
return _intervals; return _intervals;
} }
@ -516,10 +485,6 @@ void IntervalSet::remove(size_t el) {
} }
void IntervalSet::remove(ssize_t el) { void IntervalSet::remove(ssize_t el) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
for (size_t i = 0; i < _intervals.size(); ++i) { for (size_t i = 0; i < _intervals.size(); ++i) {
Interval &interval = _intervals[i]; Interval &interval = _intervals[i];
ssize_t a = interval.a; ssize_t a = interval.a;
@ -553,17 +518,3 @@ void IntervalSet::remove(ssize_t el) {
} }
} }
} }
bool IntervalSet::isReadOnly() const {
return _readonly;
}
void IntervalSet::setReadOnly(bool readonly) {
if (_readonly && !readonly)
throw IllegalStateException("Can't alter readonly IntervalSet");
_readonly = readonly;
}
void IntervalSet::InitializeInstanceFields() {
_readonly = false;
}

View File

@ -6,7 +6,7 @@
#pragma once #pragma once
#include "misc/Interval.h" #include "misc/Interval.h"
#include <atomic> #include "Exceptions.h"
namespace antlr4 { namespace antlr4 {
namespace misc { namespace misc {
@ -28,20 +28,27 @@ namespace misc {
static IntervalSet const COMPLETE_CHAR_SET; static IntervalSet const COMPLETE_CHAR_SET;
static IntervalSet const EMPTY_SET; static IntervalSet const EMPTY_SET;
protected: private:
/// The list of sorted, disjoint intervals. /// The list of sorted, disjoint intervals.
std::vector<Interval> _intervals; std::vector<Interval> _intervals;
std::atomic<bool> _readonly;
explicit IntervalSet(std::vector<Interval>&& intervals);
public: public:
IntervalSet(); IntervalSet();
IntervalSet(const std::vector<Interval> &intervals); IntervalSet(IntervalSet const& set);
IntervalSet(const IntervalSet &set); IntervalSet(IntervalSet&& set);
IntervalSet(int numArgs, ...);
virtual ~IntervalSet(); template<typename T1, typename... T_NEXT>
IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet()
{
// The first int argument is an ignored count for compatibility
// with the previous varargs based interface.
addItems(t1, std::forward<T_NEXT>(next)...);
}
IntervalSet& operator=(const IntervalSet &set); IntervalSet& operator=(IntervalSet const& set);
IntervalSet& operator=(IntervalSet&& set);
/// Create a set with a single element, el. /// Create a set with a single element, el.
static IntervalSet of(ssize_t a); static IntervalSet of(ssize_t a);
@ -49,11 +56,11 @@ namespace misc {
/// Create a set with all ints within range [a..b] (inclusive) /// Create a set with all ints within range [a..b] (inclusive)
static IntervalSet of(ssize_t a, ssize_t b); static IntervalSet of(ssize_t a, ssize_t b);
virtual void clear(); void clear();
/// Add a single element to the set. An isolated element is stored /// Add a single element to the set. An isolated element is stored
/// as a range el..el. /// as a range el..el.
virtual void add(ssize_t el); void add(ssize_t el);
/// Add interval; i.e., add all integers from a to b to set. /// Add interval; i.e., add all integers from a to b to set.
/// If b<a, do nothing. /// If b<a, do nothing.
@ -61,30 +68,36 @@ namespace misc {
/// If overlap, combine ranges. For example, /// If overlap, combine ranges. For example,
/// If this is {1..5, 10..20}, adding 6..7 yields /// If this is {1..5, 10..20}, adding 6..7 yields
/// {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. /// {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}.
virtual void add(ssize_t a, ssize_t b); void add(ssize_t a, ssize_t b);
public:
/// combine all sets in the array returned the or'd value /// combine all sets in the array returned the or'd value
static IntervalSet Or(const std::vector<IntervalSet> &sets); static IntervalSet Or(const std::vector<IntervalSet> &sets);
// Copy on write so we can cache a..a intervals and sets of that. // Copy on write so we can cache a..a intervals and sets of that.
virtual void add(const Interval &addition); void add(const Interval &addition);
virtual IntervalSet& addAll(const IntervalSet &set); IntervalSet& addAll(const IntervalSet &set);
virtual IntervalSet complement(ssize_t minElement, ssize_t maxElement) const; template<typename T1, typename... T_NEXT>
void addItems(T1 t1, T_NEXT&&... next)
{
add(t1);
addItems(std::forward<T_NEXT>(next)...);
}
IntervalSet complement(ssize_t minElement, ssize_t maxElement) const;
/// Given the set of possible values (rather than, say UNICODE or MAXINT), /// Given the set of possible values (rather than, say UNICODE or MAXINT),
/// return a new set containing all elements in vocabulary, but not in /// return a new set containing all elements in vocabulary, but not in
/// this. The computation is (vocabulary - this). /// this. The computation is (vocabulary - this).
/// ///
/// 'this' is assumed to be either a subset or equal to vocabulary. /// 'this' is assumed to be either a subset or equal to vocabulary.
virtual IntervalSet complement(const IntervalSet &vocabulary) const; IntervalSet complement(const IntervalSet &vocabulary) const;
/// Compute this-other via this&~other. /// Compute this-other via this&~other.
/// Return a new set containing all elements in this but not in other. /// Return a new set containing all elements in this but not in other.
/// other is assumed to be a subset of this; /// other is assumed to be a subset of this;
/// anything that is in other but not in this will be ignored. /// anything that is in other but not in this will be ignored.
virtual IntervalSet subtract(const IntervalSet &other) const; IntervalSet subtract(const IntervalSet &other) const;
/** /**
* Compute the set difference between two interval sets. The specific * Compute the set difference between two interval sets. The specific
@ -93,23 +106,23 @@ namespace misc {
*/ */
static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right); static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right);
virtual IntervalSet Or(const IntervalSet &a) const; IntervalSet Or(const IntervalSet &a) const;
/// Return a new set with the intersection of this set with other. Because /// Return a new set with the intersection of this set with other. Because
/// the intervals are sorted, we can use an iterator for each list and /// the intervals are sorted, we can use an iterator for each list and
/// just walk them together. This is roughly O(min(n,m)) for interval /// just walk them together. This is roughly O(min(n,m)) for interval
/// list lengths n and m. /// list lengths n and m.
virtual IntervalSet And(const IntervalSet &other) const; IntervalSet And(const IntervalSet &other) const;
/// Is el in any range of this set? /// Is el in any range of this set?
virtual bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc. bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc.
virtual bool contains(ssize_t el) const; bool contains(ssize_t el) const;
/// return true if this set has no members /// return true if this set has no members
virtual bool isEmpty() const; bool isEmpty() const;
/// If this set is a single integer, return it otherwise Token.INVALID_TYPE. /// If this set is a single integer, return it otherwise Token.INVALID_TYPE.
virtual ssize_t getSingleElement() const; ssize_t getSingleElement() const;
/** /**
* Returns the maximum value contained in the set. * Returns the maximum value contained in the set.
@ -117,7 +130,7 @@ namespace misc {
* @return the maximum value contained in the set. If the set is empty, this * @return the maximum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}. * method returns {@link Token#INVALID_TYPE}.
*/ */
virtual ssize_t getMaxElement() const; ssize_t getMaxElement() const;
/** /**
* Returns the minimum value contained in the set. * Returns the minimum value contained in the set.
@ -125,50 +138,48 @@ namespace misc {
* @return the minimum value contained in the set. If the set is empty, this * @return the minimum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}. * method returns {@link Token#INVALID_TYPE}.
*/ */
virtual ssize_t getMinElement() const; ssize_t getMinElement() const;
/// <summary> /// <summary>
/// Return a list of Interval objects. </summary> /// Return a list of Interval objects. </summary>
virtual std::vector<Interval> getIntervals() const; std::vector<Interval> const& getIntervals() const;
virtual size_t hashCode() const; size_t hashCode() const;
/// Are two IntervalSets equal? Because all intervals are sorted /// Are two IntervalSets equal? Because all intervals are sorted
/// and disjoint, equals is a simple linear walk over both lists /// and disjoint, equals is a simple linear walk over both lists
/// to make sure they are the same. /// to make sure they are the same.
bool operator == (const IntervalSet &other) const; bool operator == (const IntervalSet &other) const;
virtual std::string toString() const; std::string toString() const;
virtual std::string toString(bool elemAreChar) const; std::string toString(bool elemAreChar) const;
/** /**
* @deprecated Use {@link #toString(Vocabulary)} instead. * @deprecated Use {@link #toString(Vocabulary)} instead.
*/ */
virtual std::string toString(const std::vector<std::string> &tokenNames) const; std::string toString(const std::vector<std::string> &tokenNames) const;
virtual std::string toString(const dfa::Vocabulary &vocabulary) const; std::string toString(const dfa::Vocabulary &vocabulary) const;
protected: protected:
/** /**
* @deprecated Use {@link #elementName(Vocabulary, int)} instead. * @deprecated Use {@link #elementName(Vocabulary, int)} instead.
*/ */
virtual std::string elementName(const std::vector<std::string> &tokenNames, ssize_t a) const; std::string elementName(const std::vector<std::string> &tokenNames, ssize_t a) const;
virtual std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const; std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const;
public: public:
virtual size_t size() const; size_t size() const;
virtual std::vector<ssize_t> toList() const; std::vector<ssize_t> toList() const;
virtual std::set<ssize_t> toSet() const; std::set<ssize_t> toSet() const;
/// Get the ith element of ordered set. Used only by RandomPhrase so /// Get the ith element of ordered set. Used only by RandomPhrase so
/// don't bother to implement if you're not doing that for a new /// don't bother to implement if you're not doing that for a new
/// ANTLR code gen target. /// ANTLR code gen target.
virtual ssize_t get(size_t i) const; ssize_t get(size_t i) const;
virtual void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc. void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc.
virtual void remove(ssize_t el); void remove(ssize_t el);
virtual bool isReadOnly() const;
virtual void setReadOnly(bool readonly);
private: private:
void InitializeInstanceFields(); void addItems() { /* No-op */ }
}; };
} // namespace atn } // namespace atn