Rework C++ Interval, IntervalSet, ATN and ATNState

- Remove the readonly status from IntervalSet.
- Remove virtual functions from IntervalSet and Interval. These are
  passed by value throughout the C++ runtime; meaningful inheritance is
  not possible anyway.
- Moving the atomic flag into ATNState as a "now cached" flag.
- Return a const reference from ATN::nextStates(ATNState*) so the readonly
  status is enforced by the compiler not at runtime in the code.
- Use value semantics using std::move to reduce the number of copies performed,
  constent with how these classes are used in the C++ runtime source.
- Remove type-unsafe varargs constructor in IntervalSet, replace with
  type-safe varadic templates implementation.
This commit is contained in:
Jan Martin Mikkelsen 2017-06-27 11:04:50 +10:00
parent 0c4473ea1a
commit d7f5e1834b
7 changed files with 104 additions and 136 deletions

View File

@ -87,12 +87,12 @@ misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const {
}
misc::IntervalSet& ATN::nextTokens(ATNState *s) const {
if (!s->nextTokenWithinRule.isReadOnly()) {
misc::IntervalSet const& ATN::nextTokens(ATNState *s) const {
if (!s->nextTokenUpdated) {
std::unique_lock<std::mutex> lock { _mutex };
if (!s->nextTokenWithinRule.isReadOnly()) {
if (!s->nextTokenUpdated) {
s->nextTokenWithinRule = nextTokens(s, nullptr);
s->nextTokenWithinRule.setReadOnly(true);
s->nextTokenUpdated = true;
}
}
return s->nextTokenWithinRule;
@ -101,7 +101,7 @@ misc::IntervalSet& ATN::nextTokens(ATNState *s) const {
void ATN::addState(ATNState *state) {
if (state != nullptr) {
//state->atn = this;
state->stateNumber = (int)states.size();
state->stateNumber = static_cast<int>(states.size());
}
states.push_back(state);
@ -114,7 +114,7 @@ void ATN::removeState(ATNState *state) {
int ATN::defineDecisionState(DecisionState *s) {
decisionToState.push_back(s);
s->decision = (int)decisionToState.size() - 1;
s->decision = static_cast<int>(decisionToState.size() - 1);
return s->decision;
}
@ -154,7 +154,7 @@ misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *contex
if (ctx->parent == nullptr) {
break;
}
ctx = (RuleContext *)ctx->parent;
ctx = static_cast<RuleContext *>(ctx->parent);
}
if (following.contains(Token::EPSILON)) {

View File

@ -70,7 +70,7 @@ namespace atn {
/// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of
/// rule.
/// </summary>
virtual misc::IntervalSet& nextTokens(ATNState *s) const;
virtual misc::IntervalSet const& nextTokens(ATNState *s) const;
virtual void addState(ATNState *state);

View File

@ -6,6 +6,7 @@
#pragma once
#include "misc/IntervalSet.h"
#include <atomic>
namespace antlr4 {
namespace atn {
@ -70,12 +71,17 @@ namespace atn {
///
/// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/>
/// </summary>
class ANTLR4CPP_PUBLIC ATN;
class ANTLR4CPP_PUBLIC ATNState {
public:
ATNState();
ATNState(ATNState const&) = delete;
virtual ~ATNState();
ATNState& operator=(ATNState const&) = delete;
static const size_t INITIAL_NUM_TRANSITIONS = 4;
static const size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max();
@ -102,9 +108,6 @@ namespace atn {
bool epsilonOnlyTransitions = false;
public:
/// Used to cache lookahead during parsing, not used during construction.
misc::IntervalSet nextTokenWithinRule;
virtual size_t hashCode();
bool operator == (const ATNState &other);
@ -117,6 +120,14 @@ namespace atn {
virtual void addTransition(size_t index, Transition *e);
virtual Transition* removeTransition(size_t index);
virtual size_t getStateType() = 0;
private:
/// Used to cache lookahead during parsing, not used during construction.
misc::IntervalSet nextTokenWithinRule;
std::atomic<bool> nextTokenUpdated { false };
friend class ATN;
};
} // namespace atn

View File

@ -7,8 +7,6 @@
using namespace antlr4::misc;
Interval::~Interval() = default;
size_t antlr4::misc::numericToSymbol(ssize_t v) {
return (size_t)v;
}

View File

@ -26,59 +26,56 @@ namespace misc {
ssize_t b;
Interval();
explicit Interval(size_t a_, size_t b_); // For unsigned -> signed mappings.
Interval(size_t a_, size_t b_); // For unsigned -> signed mappings.
Interval(ssize_t a_, ssize_t b_);
Interval(Interval const&) = default;
virtual ~Interval();
Interval& operator=(Interval const&) = default;
/// return number of elements between a and b inclusively. x..x is length 1.
/// if b < a, then length is 0. 9..10 has length 2.
virtual size_t length() const;
size_t length() const;
bool operator == (const Interval &other) const;
virtual size_t hashCode() const;
size_t hashCode() const;
/// <summary>
/// Does this start completely before other? Disjoint </summary>
virtual bool startsBeforeDisjoint(const Interval &other) const;
bool startsBeforeDisjoint(const Interval &other) const;
/// <summary>
/// Does this start at or before other? Nondisjoint </summary>
virtual bool startsBeforeNonDisjoint(const Interval &other) const;
bool startsBeforeNonDisjoint(const Interval &other) const;
/// <summary>
/// Does this.a start after other.b? May or may not be disjoint </summary>
virtual bool startsAfter(const Interval &other) const;
bool startsAfter(const Interval &other) const;
/// <summary>
/// Does this start completely after other? Disjoint </summary>
virtual bool startsAfterDisjoint(const Interval &other) const;
bool startsAfterDisjoint(const Interval &other) const;
/// <summary>
/// Does this start after other? NonDisjoint </summary>
virtual bool startsAfterNonDisjoint(const Interval &other) const;
bool startsAfterNonDisjoint(const Interval &other) const;
/// <summary>
/// Are both ranges disjoint? I.e., no overlap? </summary>
virtual bool disjoint(const Interval &other) const;
bool disjoint(const Interval &other) const;
/// <summary>
/// Are two intervals adjacent such as 0..41 and 42..42? </summary>
virtual bool adjacent(const Interval &other) const;
bool adjacent(const Interval &other) const;
virtual bool properlyContains(const Interval &other) const;
bool properlyContains(const Interval &other) const;
/// <summary>
/// Return the interval computed from combining this and other </summary>
virtual Interval Union(const Interval &other) const;
Interval Union(const Interval &other) const;
/// <summary>
/// Return the interval in common between this and o </summary>
virtual Interval intersection(const Interval &other) const;
Interval intersection(const Interval &other) const;
virtual std::string toString() const;
std::string toString() const;
private:
};

View File

@ -13,52 +13,31 @@
using namespace antlr4;
using namespace antlr4::misc;
IntervalSet const IntervalSet::COMPLETE_CHAR_SET = []() {
IntervalSet complete = IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
complete.setReadOnly(true);
return complete;
}();
IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
IntervalSet const IntervalSet::EMPTY_SET = []() {
IntervalSet empty;
empty.setReadOnly(true);
return empty;
}();
IntervalSet const IntervalSet::EMPTY_SET;
IntervalSet::IntervalSet() {
InitializeInstanceFields();
IntervalSet::IntervalSet() : _intervals() {
}
IntervalSet::IntervalSet(const std::vector<Interval> &intervals) : IntervalSet() {
_intervals = intervals;
IntervalSet::IntervalSet(const IntervalSet &set) : _intervals(set._intervals) {
}
IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
addAll(set);
IntervalSet::IntervalSet(IntervalSet&& set) : _intervals(std::move(set._intervals)) {
}
IntervalSet::IntervalSet(int n, ...) : IntervalSet() {
va_list vlist;
va_start(vlist, n);
for (int i = 0; i < n; i++) {
add(va_arg(vlist, int));
}
IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
}
IntervalSet::~IntervalSet()
{
IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
_intervals = other._intervals;
return *this;
}
IntervalSet& IntervalSet::operator=(const IntervalSet& other)
{
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
_intervals.clear();
return addAll(other);
IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
_intervals = move(other._intervals);
return *this;
}
IntervalSet IntervalSet::of(ssize_t a) {
@ -70,16 +49,10 @@ IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
}
void IntervalSet::clear() {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
_intervals.clear();
}
void IntervalSet::add(ssize_t el) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
add(el, el);
}
@ -88,10 +61,6 @@ void IntervalSet::add(ssize_t a, ssize_t b) {
}
void IntervalSet::add(const Interval &addition) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
if (addition.b < addition.a) {
return;
}
@ -150,7 +119,7 @@ IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
// walk set and add each interval
for (auto &interval : set._intervals) {
for (auto const& interval : set._intervals) {
add(interval);
}
return *this;
@ -339,7 +308,7 @@ ssize_t IntervalSet::getMinElement() const {
return _intervals[0].a;
}
std::vector<Interval> IntervalSet::getIntervals() const {
std::vector<Interval> const& IntervalSet::getIntervals() const {
return _intervals;
}
@ -516,10 +485,6 @@ void IntervalSet::remove(size_t el) {
}
void IntervalSet::remove(ssize_t el) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
for (size_t i = 0; i < _intervals.size(); ++i) {
Interval &interval = _intervals[i];
ssize_t a = interval.a;
@ -553,17 +518,3 @@ void IntervalSet::remove(ssize_t el) {
}
}
}
bool IntervalSet::isReadOnly() const {
return _readonly;
}
void IntervalSet::setReadOnly(bool readonly) {
if (_readonly && !readonly)
throw IllegalStateException("Can't alter readonly IntervalSet");
_readonly = readonly;
}
void IntervalSet::InitializeInstanceFields() {
_readonly = false;
}

View File

@ -6,7 +6,7 @@
#pragma once
#include "misc/Interval.h"
#include <atomic>
#include "Exceptions.h"
namespace antlr4 {
namespace misc {
@ -28,20 +28,27 @@ namespace misc {
static IntervalSet const COMPLETE_CHAR_SET;
static IntervalSet const EMPTY_SET;
protected:
private:
/// The list of sorted, disjoint intervals.
std::vector<Interval> _intervals;
std::atomic<bool> _readonly;
explicit IntervalSet(std::vector<Interval>&& intervals);
public:
IntervalSet();
IntervalSet(const std::vector<Interval> &intervals);
IntervalSet(const IntervalSet &set);
IntervalSet(int numArgs, ...);
IntervalSet(IntervalSet const& set);
IntervalSet(IntervalSet&& set);
virtual ~IntervalSet();
template<typename T1, typename... T_NEXT>
IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet()
{
// The first int argument is an ignored count for compatibility
// with the previous varargs based interface.
addItems(t1, std::forward<T_NEXT>(next)...);
}
IntervalSet& operator=(const IntervalSet &set);
IntervalSet& operator=(IntervalSet const& set);
IntervalSet& operator=(IntervalSet&& set);
/// Create a set with a single element, el.
static IntervalSet of(ssize_t a);
@ -49,11 +56,11 @@ namespace misc {
/// Create a set with all ints within range [a..b] (inclusive)
static IntervalSet of(ssize_t a, ssize_t b);
virtual void clear();
void clear();
/// Add a single element to the set. An isolated element is stored
/// as a range el..el.
virtual void add(ssize_t el);
void add(ssize_t el);
/// Add interval; i.e., add all integers from a to b to set.
/// If b<a, do nothing.
@ -61,30 +68,36 @@ namespace misc {
/// If overlap, combine ranges. For example,
/// If this is {1..5, 10..20}, adding 6..7 yields
/// {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}.
virtual void add(ssize_t a, ssize_t b);
void add(ssize_t a, ssize_t b);
public:
/// combine all sets in the array returned the or'd value
static IntervalSet Or(const std::vector<IntervalSet> &sets);
// Copy on write so we can cache a..a intervals and sets of that.
virtual void add(const Interval &addition);
virtual IntervalSet& addAll(const IntervalSet &set);
void add(const Interval &addition);
IntervalSet& addAll(const IntervalSet &set);
virtual IntervalSet complement(ssize_t minElement, ssize_t maxElement) const;
template<typename T1, typename... T_NEXT>
void addItems(T1 t1, T_NEXT&&... next)
{
add(t1);
addItems(std::forward<T_NEXT>(next)...);
}
IntervalSet complement(ssize_t minElement, ssize_t maxElement) const;
/// Given the set of possible values (rather than, say UNICODE or MAXINT),
/// return a new set containing all elements in vocabulary, but not in
/// this. The computation is (vocabulary - this).
///
/// 'this' is assumed to be either a subset or equal to vocabulary.
virtual IntervalSet complement(const IntervalSet &vocabulary) const;
IntervalSet complement(const IntervalSet &vocabulary) const;
/// Compute this-other via this&~other.
/// Return a new set containing all elements in this but not in other.
/// other is assumed to be a subset of this;
/// anything that is in other but not in this will be ignored.
virtual IntervalSet subtract(const IntervalSet &other) const;
IntervalSet subtract(const IntervalSet &other) const;
/**
* Compute the set difference between two interval sets. The specific
@ -93,23 +106,23 @@ namespace misc {
*/
static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right);
virtual IntervalSet Or(const IntervalSet &a) const;
IntervalSet Or(const IntervalSet &a) const;
/// Return a new set with the intersection of this set with other. Because
/// the intervals are sorted, we can use an iterator for each list and
/// just walk them together. This is roughly O(min(n,m)) for interval
/// list lengths n and m.
virtual IntervalSet And(const IntervalSet &other) const;
IntervalSet And(const IntervalSet &other) const;
/// Is el in any range of this set?
virtual bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc.
virtual bool contains(ssize_t el) const;
bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc.
bool contains(ssize_t el) const;
/// return true if this set has no members
virtual bool isEmpty() const;
bool isEmpty() const;
/// If this set is a single integer, return it otherwise Token.INVALID_TYPE.
virtual ssize_t getSingleElement() const;
ssize_t getSingleElement() const;
/**
* Returns the maximum value contained in the set.
@ -117,7 +130,7 @@ namespace misc {
* @return the maximum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}.
*/
virtual ssize_t getMaxElement() const;
ssize_t getMaxElement() const;
/**
* Returns the minimum value contained in the set.
@ -125,50 +138,48 @@ namespace misc {
* @return the minimum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}.
*/
virtual ssize_t getMinElement() const;
ssize_t getMinElement() const;
/// <summary>
/// Return a list of Interval objects. </summary>
virtual std::vector<Interval> getIntervals() const;
std::vector<Interval> const& getIntervals() const;
virtual size_t hashCode() const;
size_t hashCode() const;
/// Are two IntervalSets equal? Because all intervals are sorted
/// and disjoint, equals is a simple linear walk over both lists
/// to make sure they are the same.
bool operator == (const IntervalSet &other) const;
virtual std::string toString() const;
virtual std::string toString(bool elemAreChar) const;
std::string toString() const;
std::string toString(bool elemAreChar) const;
/**
* @deprecated Use {@link #toString(Vocabulary)} instead.
*/
virtual std::string toString(const std::vector<std::string> &tokenNames) const;
virtual std::string toString(const dfa::Vocabulary &vocabulary) const;
std::string toString(const std::vector<std::string> &tokenNames) const;
std::string toString(const dfa::Vocabulary &vocabulary) const;
protected:
/**
* @deprecated Use {@link #elementName(Vocabulary, int)} instead.
*/
virtual std::string elementName(const std::vector<std::string> &tokenNames, ssize_t a) const;
virtual std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const;
std::string elementName(const std::vector<std::string> &tokenNames, ssize_t a) const;
std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const;
public:
virtual size_t size() const;
virtual std::vector<ssize_t> toList() const;
virtual std::set<ssize_t> toSet() const;
size_t size() const;
std::vector<ssize_t> toList() const;
std::set<ssize_t> toSet() const;
/// Get the ith element of ordered set. Used only by RandomPhrase so
/// don't bother to implement if you're not doing that for a new
/// ANTLR code gen target.
virtual ssize_t get(size_t i) const;
virtual void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc.
virtual void remove(ssize_t el);
virtual bool isReadOnly() const;
virtual void setReadOnly(bool readonly);
ssize_t get(size_t i) const;
void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc.
void remove(ssize_t el);
private:
void InitializeInstanceFields();
void addItems() { /* No-op */ }
};
} // namespace atn