From d7f5e1834bfcee98661889cf70a5eb6824e09c1e Mon Sep 17 00:00:00 2001 From: Jan Martin Mikkelsen Date: Tue, 27 Jun 2017 11:04:50 +1000 Subject: [PATCH] Rework C++ Interval, IntervalSet, ATN and ATNState - Remove the readonly status from IntervalSet. - Remove virtual functions from IntervalSet and Interval. These are passed by value throughout the C++ runtime; meaningful inheritance is not possible anyway. - Moving the atomic flag into ATNState as a "now cached" flag. - Return a const reference from ATN::nextStates(ATNState*) so the readonly status is enforced by the compiler not at runtime in the code. - Use value semantics using std::move to reduce the number of copies performed, constent with how these classes are used in the C++ runtime source. - Remove type-unsafe varargs constructor in IntervalSet, replace with type-safe varadic templates implementation. --- runtime/Cpp/runtime/src/atn/ATN.cpp | 14 +-- runtime/Cpp/runtime/src/atn/ATN.h | 2 +- runtime/Cpp/runtime/src/atn/ATNState.h | 17 +++- runtime/Cpp/runtime/src/misc/Interval.cpp | 2 - runtime/Cpp/runtime/src/misc/Interval.h | 31 +++---- runtime/Cpp/runtime/src/misc/IntervalSet.cpp | 79 ++++------------ runtime/Cpp/runtime/src/misc/IntervalSet.h | 95 +++++++++++--------- 7 files changed, 104 insertions(+), 136 deletions(-) diff --git a/runtime/Cpp/runtime/src/atn/ATN.cpp b/runtime/Cpp/runtime/src/atn/ATN.cpp index 98c0675a7..d9811bc44 100755 --- a/runtime/Cpp/runtime/src/atn/ATN.cpp +++ b/runtime/Cpp/runtime/src/atn/ATN.cpp @@ -87,12 +87,12 @@ misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const { } -misc::IntervalSet& ATN::nextTokens(ATNState *s) const { - if (!s->nextTokenWithinRule.isReadOnly()) { +misc::IntervalSet const& ATN::nextTokens(ATNState *s) const { + if (!s->nextTokenUpdated) { std::unique_lock lock { _mutex }; - if (!s->nextTokenWithinRule.isReadOnly()) { + if (!s->nextTokenUpdated) { s->nextTokenWithinRule = nextTokens(s, nullptr); - s->nextTokenWithinRule.setReadOnly(true); + s->nextTokenUpdated = true; } } return s->nextTokenWithinRule; @@ -101,7 +101,7 @@ misc::IntervalSet& ATN::nextTokens(ATNState *s) const { void ATN::addState(ATNState *state) { if (state != nullptr) { //state->atn = this; - state->stateNumber = (int)states.size(); + state->stateNumber = static_cast(states.size()); } states.push_back(state); @@ -114,7 +114,7 @@ void ATN::removeState(ATNState *state) { int ATN::defineDecisionState(DecisionState *s) { decisionToState.push_back(s); - s->decision = (int)decisionToState.size() - 1; + s->decision = static_cast(decisionToState.size() - 1); return s->decision; } @@ -154,7 +154,7 @@ misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *contex if (ctx->parent == nullptr) { break; } - ctx = (RuleContext *)ctx->parent; + ctx = static_cast(ctx->parent); } if (following.contains(Token::EPSILON)) { diff --git a/runtime/Cpp/runtime/src/atn/ATN.h b/runtime/Cpp/runtime/src/atn/ATN.h index 053dfdc1c..9c40cee30 100755 --- a/runtime/Cpp/runtime/src/atn/ATN.h +++ b/runtime/Cpp/runtime/src/atn/ATN.h @@ -70,7 +70,7 @@ namespace atn { /// staying in same rule. is in set if we reach end of /// rule. /// - virtual misc::IntervalSet& nextTokens(ATNState *s) const; + virtual misc::IntervalSet const& nextTokens(ATNState *s) const; virtual void addState(ATNState *state); diff --git a/runtime/Cpp/runtime/src/atn/ATNState.h b/runtime/Cpp/runtime/src/atn/ATNState.h index a6035b4c6..fc4d5a89e 100755 --- a/runtime/Cpp/runtime/src/atn/ATNState.h +++ b/runtime/Cpp/runtime/src/atn/ATNState.h @@ -6,6 +6,7 @@ #pragma once #include "misc/IntervalSet.h" +#include namespace antlr4 { namespace atn { @@ -70,12 +71,17 @@ namespace atn { /// /// /// + class ANTLR4CPP_PUBLIC ATN; + class ANTLR4CPP_PUBLIC ATNState { public: ATNState(); + ATNState(ATNState const&) = delete; virtual ~ATNState(); + ATNState& operator=(ATNState const&) = delete; + static const size_t INITIAL_NUM_TRANSITIONS = 4; static const size_t INVALID_STATE_NUMBER = std::numeric_limits::max(); @@ -102,9 +108,6 @@ namespace atn { bool epsilonOnlyTransitions = false; public: - /// Used to cache lookahead during parsing, not used during construction. - misc::IntervalSet nextTokenWithinRule; - virtual size_t hashCode(); bool operator == (const ATNState &other); @@ -117,6 +120,14 @@ namespace atn { virtual void addTransition(size_t index, Transition *e); virtual Transition* removeTransition(size_t index); virtual size_t getStateType() = 0; + + private: + /// Used to cache lookahead during parsing, not used during construction. + + misc::IntervalSet nextTokenWithinRule; + std::atomic nextTokenUpdated { false }; + + friend class ATN; }; } // namespace atn diff --git a/runtime/Cpp/runtime/src/misc/Interval.cpp b/runtime/Cpp/runtime/src/misc/Interval.cpp index 325b8621f..a29c31a18 100755 --- a/runtime/Cpp/runtime/src/misc/Interval.cpp +++ b/runtime/Cpp/runtime/src/misc/Interval.cpp @@ -7,8 +7,6 @@ using namespace antlr4::misc; -Interval::~Interval() = default; - size_t antlr4::misc::numericToSymbol(ssize_t v) { return (size_t)v; } diff --git a/runtime/Cpp/runtime/src/misc/Interval.h b/runtime/Cpp/runtime/src/misc/Interval.h index 19a17cac9..47500ed0f 100755 --- a/runtime/Cpp/runtime/src/misc/Interval.h +++ b/runtime/Cpp/runtime/src/misc/Interval.h @@ -26,59 +26,56 @@ namespace misc { ssize_t b; Interval(); - explicit Interval(size_t a_, size_t b_); // For unsigned -> signed mappings. + Interval(size_t a_, size_t b_); // For unsigned -> signed mappings. Interval(ssize_t a_, ssize_t b_); - Interval(Interval const&) = default; - virtual ~Interval(); - Interval& operator=(Interval const&) = default; /// return number of elements between a and b inclusively. x..x is length 1. /// if b < a, then length is 0. 9..10 has length 2. - virtual size_t length() const; + size_t length() const; bool operator == (const Interval &other) const; - virtual size_t hashCode() const; + size_t hashCode() const; /// /// Does this start completely before other? Disjoint - virtual bool startsBeforeDisjoint(const Interval &other) const; + bool startsBeforeDisjoint(const Interval &other) const; /// /// Does this start at or before other? Nondisjoint - virtual bool startsBeforeNonDisjoint(const Interval &other) const; + bool startsBeforeNonDisjoint(const Interval &other) const; /// /// Does this.a start after other.b? May or may not be disjoint - virtual bool startsAfter(const Interval &other) const; + bool startsAfter(const Interval &other) const; /// /// Does this start completely after other? Disjoint - virtual bool startsAfterDisjoint(const Interval &other) const; + bool startsAfterDisjoint(const Interval &other) const; /// /// Does this start after other? NonDisjoint - virtual bool startsAfterNonDisjoint(const Interval &other) const; + bool startsAfterNonDisjoint(const Interval &other) const; /// /// Are both ranges disjoint? I.e., no overlap? - virtual bool disjoint(const Interval &other) const; + bool disjoint(const Interval &other) const; /// /// Are two intervals adjacent such as 0..41 and 42..42? - virtual bool adjacent(const Interval &other) const; + bool adjacent(const Interval &other) const; - virtual bool properlyContains(const Interval &other) const; + bool properlyContains(const Interval &other) const; /// /// Return the interval computed from combining this and other - virtual Interval Union(const Interval &other) const; + Interval Union(const Interval &other) const; /// /// Return the interval in common between this and o - virtual Interval intersection(const Interval &other) const; + Interval intersection(const Interval &other) const; - virtual std::string toString() const; + std::string toString() const; private: }; diff --git a/runtime/Cpp/runtime/src/misc/IntervalSet.cpp b/runtime/Cpp/runtime/src/misc/IntervalSet.cpp index ab4d240cc..804767945 100755 --- a/runtime/Cpp/runtime/src/misc/IntervalSet.cpp +++ b/runtime/Cpp/runtime/src/misc/IntervalSet.cpp @@ -13,52 +13,31 @@ using namespace antlr4; using namespace antlr4::misc; -IntervalSet const IntervalSet::COMPLETE_CHAR_SET = []() { - IntervalSet complete = IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); - complete.setReadOnly(true); - return complete; -}(); +IntervalSet const IntervalSet::COMPLETE_CHAR_SET = + IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); -IntervalSet const IntervalSet::EMPTY_SET = []() { - IntervalSet empty; - empty.setReadOnly(true); - return empty; -}(); +IntervalSet const IntervalSet::EMPTY_SET; -IntervalSet::IntervalSet() { - InitializeInstanceFields(); +IntervalSet::IntervalSet() : _intervals() { } -IntervalSet::IntervalSet(const std::vector &intervals) : IntervalSet() { - _intervals = intervals; +IntervalSet::IntervalSet(const IntervalSet &set) : _intervals(set._intervals) { } -IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() { - addAll(set); +IntervalSet::IntervalSet(IntervalSet&& set) : _intervals(std::move(set._intervals)) { } -IntervalSet::IntervalSet(int n, ...) : IntervalSet() { - va_list vlist; - va_start(vlist, n); - - for (int i = 0; i < n; i++) { - add(va_arg(vlist, int)); - } +IntervalSet::IntervalSet(std::vector&& intervals) : _intervals(std::move(intervals)) { } -IntervalSet::~IntervalSet() -{ +IntervalSet& IntervalSet::operator=(const IntervalSet& other) { + _intervals = other._intervals; + return *this; } -IntervalSet& IntervalSet::operator=(const IntervalSet& other) -{ - if (_readonly) { - throw IllegalStateException("can't alter read only IntervalSet"); - } - - _intervals.clear(); - - return addAll(other); +IntervalSet& IntervalSet::operator=(IntervalSet&& other) { + _intervals = move(other._intervals); + return *this; } IntervalSet IntervalSet::of(ssize_t a) { @@ -70,16 +49,10 @@ IntervalSet IntervalSet::of(ssize_t a, ssize_t b) { } void IntervalSet::clear() { - if (_readonly) { - throw IllegalStateException("can't alter read only IntervalSet"); - } _intervals.clear(); } void IntervalSet::add(ssize_t el) { - if (_readonly) { - throw IllegalStateException("can't alter read only IntervalSet"); - } add(el, el); } @@ -88,10 +61,6 @@ void IntervalSet::add(ssize_t a, ssize_t b) { } void IntervalSet::add(const Interval &addition) { - if (_readonly) { - throw IllegalStateException("can't alter read only IntervalSet"); - } - if (addition.b < addition.a) { return; } @@ -150,7 +119,7 @@ IntervalSet IntervalSet::Or(const std::vector &sets) { IntervalSet& IntervalSet::addAll(const IntervalSet &set) { // walk set and add each interval - for (auto &interval : set._intervals) { + for (auto const& interval : set._intervals) { add(interval); } return *this; @@ -339,7 +308,7 @@ ssize_t IntervalSet::getMinElement() const { return _intervals[0].a; } -std::vector IntervalSet::getIntervals() const { +std::vector const& IntervalSet::getIntervals() const { return _intervals; } @@ -516,10 +485,6 @@ void IntervalSet::remove(size_t el) { } void IntervalSet::remove(ssize_t el) { - if (_readonly) { - throw IllegalStateException("can't alter read only IntervalSet"); - } - for (size_t i = 0; i < _intervals.size(); ++i) { Interval &interval = _intervals[i]; ssize_t a = interval.a; @@ -553,17 +518,3 @@ void IntervalSet::remove(ssize_t el) { } } } - -bool IntervalSet::isReadOnly() const { - return _readonly; -} - -void IntervalSet::setReadOnly(bool readonly) { - if (_readonly && !readonly) - throw IllegalStateException("Can't alter readonly IntervalSet"); - _readonly = readonly; -} - -void IntervalSet::InitializeInstanceFields() { - _readonly = false; -} diff --git a/runtime/Cpp/runtime/src/misc/IntervalSet.h b/runtime/Cpp/runtime/src/misc/IntervalSet.h index c9d38756d..af09e3465 100755 --- a/runtime/Cpp/runtime/src/misc/IntervalSet.h +++ b/runtime/Cpp/runtime/src/misc/IntervalSet.h @@ -6,7 +6,7 @@ #pragma once #include "misc/Interval.h" -#include +#include "Exceptions.h" namespace antlr4 { namespace misc { @@ -28,20 +28,27 @@ namespace misc { static IntervalSet const COMPLETE_CHAR_SET; static IntervalSet const EMPTY_SET; - protected: + private: /// The list of sorted, disjoint intervals. std::vector _intervals; - std::atomic _readonly; + + explicit IntervalSet(std::vector&& intervals); public: IntervalSet(); - IntervalSet(const std::vector &intervals); - IntervalSet(const IntervalSet &set); - IntervalSet(int numArgs, ...); + IntervalSet(IntervalSet const& set); + IntervalSet(IntervalSet&& set); - virtual ~IntervalSet(); + template + IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet() + { + // The first int argument is an ignored count for compatibility + // with the previous varargs based interface. + addItems(t1, std::forward(next)...); + } - IntervalSet& operator=(const IntervalSet &set); + IntervalSet& operator=(IntervalSet const& set); + IntervalSet& operator=(IntervalSet&& set); /// Create a set with a single element, el. static IntervalSet of(ssize_t a); @@ -49,11 +56,11 @@ namespace misc { /// Create a set with all ints within range [a..b] (inclusive) static IntervalSet of(ssize_t a, ssize_t b); - virtual void clear(); + void clear(); /// Add a single element to the set. An isolated element is stored /// as a range el..el. - virtual void add(ssize_t el); + void add(ssize_t el); /// Add interval; i.e., add all integers from a to b to set. /// If b &sets); // Copy on write so we can cache a..a intervals and sets of that. - virtual void add(const Interval &addition); - virtual IntervalSet& addAll(const IntervalSet &set); + void add(const Interval &addition); + IntervalSet& addAll(const IntervalSet &set); - virtual IntervalSet complement(ssize_t minElement, ssize_t maxElement) const; + template + void addItems(T1 t1, T_NEXT&&... next) + { + add(t1); + addItems(std::forward(next)...); + } + + IntervalSet complement(ssize_t minElement, ssize_t maxElement) const; /// Given the set of possible values (rather than, say UNICODE or MAXINT), /// return a new set containing all elements in vocabulary, but not in /// this. The computation is (vocabulary - this). /// /// 'this' is assumed to be either a subset or equal to vocabulary. - virtual IntervalSet complement(const IntervalSet &vocabulary) const; + IntervalSet complement(const IntervalSet &vocabulary) const; /// Compute this-other via this&~other. /// Return a new set containing all elements in this but not in other. /// other is assumed to be a subset of this; /// anything that is in other but not in this will be ignored. - virtual IntervalSet subtract(const IntervalSet &other) const; + IntervalSet subtract(const IntervalSet &other) const; /** * Compute the set difference between two interval sets. The specific @@ -93,23 +106,23 @@ namespace misc { */ static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right); - virtual IntervalSet Or(const IntervalSet &a) const; + IntervalSet Or(const IntervalSet &a) const; /// Return a new set with the intersection of this set with other. Because /// the intervals are sorted, we can use an iterator for each list and /// just walk them together. This is roughly O(min(n,m)) for interval /// list lengths n and m. - virtual IntervalSet And(const IntervalSet &other) const; + IntervalSet And(const IntervalSet &other) const; /// Is el in any range of this set? - virtual bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc. - virtual bool contains(ssize_t el) const; + bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc. + bool contains(ssize_t el) const; /// return true if this set has no members - virtual bool isEmpty() const; + bool isEmpty() const; /// If this set is a single integer, return it otherwise Token.INVALID_TYPE. - virtual ssize_t getSingleElement() const; + ssize_t getSingleElement() const; /** * Returns the maximum value contained in the set. @@ -117,7 +130,7 @@ namespace misc { * @return the maximum value contained in the set. If the set is empty, this * method returns {@link Token#INVALID_TYPE}. */ - virtual ssize_t getMaxElement() const; + ssize_t getMaxElement() const; /** * Returns the minimum value contained in the set. @@ -125,50 +138,48 @@ namespace misc { * @return the minimum value contained in the set. If the set is empty, this * method returns {@link Token#INVALID_TYPE}. */ - virtual ssize_t getMinElement() const; + ssize_t getMinElement() const; /// /// Return a list of Interval objects. - virtual std::vector getIntervals() const; + std::vector const& getIntervals() const; - virtual size_t hashCode() const; + size_t hashCode() const; /// Are two IntervalSets equal? Because all intervals are sorted /// and disjoint, equals is a simple linear walk over both lists /// to make sure they are the same. bool operator == (const IntervalSet &other) const; - virtual std::string toString() const; - virtual std::string toString(bool elemAreChar) const; + std::string toString() const; + std::string toString(bool elemAreChar) const; /** * @deprecated Use {@link #toString(Vocabulary)} instead. */ - virtual std::string toString(const std::vector &tokenNames) const; - virtual std::string toString(const dfa::Vocabulary &vocabulary) const; + std::string toString(const std::vector &tokenNames) const; + std::string toString(const dfa::Vocabulary &vocabulary) const; protected: /** * @deprecated Use {@link #elementName(Vocabulary, int)} instead. */ - virtual std::string elementName(const std::vector &tokenNames, ssize_t a) const; - virtual std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const; + std::string elementName(const std::vector &tokenNames, ssize_t a) const; + std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const; public: - virtual size_t size() const; - virtual std::vector toList() const; - virtual std::set toSet() const; + size_t size() const; + std::vector toList() const; + std::set toSet() const; /// Get the ith element of ordered set. Used only by RandomPhrase so /// don't bother to implement if you're not doing that for a new /// ANTLR code gen target. - virtual ssize_t get(size_t i) const; - virtual void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc. - virtual void remove(ssize_t el); - virtual bool isReadOnly() const; - virtual void setReadOnly(bool readonly); + ssize_t get(size_t i) const; + void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc. + void remove(ssize_t el); private: - void InitializeInstanceFields(); + void addItems() { /* No-op */ } }; } // namespace atn