From 3c0dbd83dd23eea2ae1dce8470015c25f25c3921 Mon Sep 17 00:00:00 2001 From: irabeson Date: Thu, 4 Jun 2020 22:29:27 -0400 Subject: [PATCH 1/7] [C++ runtime] Optimizations of Vocabulary::fromTokenNames This commit doesn't change the behavior, it contains few small improvements: - prevent useless copy when creating the variable tokenName - avoid to check if tokenName is empty twice - use std::string::empty() instead of creating an empty string to compare with - use std::string::clear() instead of assigning an empty C string to clear a string --- contributors.txt | 3 ++- runtime/Cpp/runtime/src/Vocabulary.cpp | 30 ++++++++++---------------- runtime/Cpp/runtime/src/Vocabulary.h | 7 +++--- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/contributors.txt b/contributors.txt index d1c759981..e1dc8347b 100644 --- a/contributors.txt +++ b/contributors.txt @@ -242,4 +242,5 @@ YYYY/MM/DD, github id, Full name, email 2020/02/21, StochasticTinkr, Daniel Pitts, github@coloraura.com 2020/03/17, XsongyangX, Song Yang, songyang1218@gmail.com 2020/04/07, deniskyashif, Denis Kyashif, denis.kyashif@gmail.com -2020/04/30, TristonianJones, Tristan Swadell, tswadell@google.com \ No newline at end of file +2020/04/30, TristonianJones, Tristan Swadell, tswadell@google.com +2020/06/04, IohannRabeson, Iohann Rabeson, iotaka6@gmail.com diff --git a/runtime/Cpp/runtime/src/Vocabulary.cpp b/runtime/Cpp/runtime/src/Vocabulary.cpp index dcfa45e4b..9bbf0b23a 100755 --- a/runtime/Cpp/runtime/src/Vocabulary.cpp +++ b/runtime/Cpp/runtime/src/Vocabulary.cpp @@ -22,8 +22,7 @@ Vocabulary::Vocabulary(const std::vector &literalNames, // See note here on -1 part: https://github.com/antlr/antlr4/pull/1146 } -Vocabulary::~Vocabulary() { -} +Vocabulary::~Vocabulary() = default; Vocabulary Vocabulary::fromTokenNames(const std::vector &tokenNames) { if (tokenNames.empty()) { @@ -34,25 +33,18 @@ Vocabulary Vocabulary::fromTokenNames(const std::vector &tokenNames std::vector symbolicNames = tokenNames; std::locale locale; for (size_t i = 0; i < tokenNames.size(); i++) { - std::string tokenName = tokenNames[i]; - if (tokenName == "") { + const std::string& tokenName = tokenNames[i]; + if (tokenName.empty()) { continue; + } else if (tokenName.front() == '\'') { + symbolicNames[i].clear(); + } else if (std::isupper(tokenName.front(), locale)) { + literalNames[i].clear(); + } else { + // wasn't a literal or symbolic name + literalNames[i].clear(); + symbolicNames[i].clear(); } - - if (!tokenName.empty()) { - char firstChar = tokenName[0]; - if (firstChar == '\'') { - symbolicNames[i] = ""; - continue; - } else if (std::isupper(firstChar, locale)) { - literalNames[i] = ""; - continue; - } - } - - // wasn't a literal or symbolic name - literalNames[i] = ""; - symbolicNames[i] = ""; } return Vocabulary(literalNames, symbolicNames, tokenNames); diff --git a/runtime/Cpp/runtime/src/Vocabulary.h b/runtime/Cpp/runtime/src/Vocabulary.h index 7dbf85cd3..f06ce6978 100755 --- a/runtime/Cpp/runtime/src/Vocabulary.h +++ b/runtime/Cpp/runtime/src/Vocabulary.h @@ -14,9 +14,6 @@ namespace dfa { /// interface. class ANTLR4CPP_PUBLIC Vocabulary { public: - Vocabulary(Vocabulary const&) = default; - virtual ~Vocabulary(); - /// Gets an empty instance. /// /// @@ -25,7 +22,9 @@ namespace dfa { /// except . static const Vocabulary EMPTY_VOCABULARY; - Vocabulary() {} + Vocabulary() = default; + Vocabulary(Vocabulary const&) = default; + virtual ~Vocabulary(); /// /// Constructs a new instance of from the specified From 254b144bf2be35996747520d2babc4ee3a0b5b65 Mon Sep 17 00:00:00 2001 From: Nathan Burles Date: Fri, 12 Jun 2020 12:55:42 +0100 Subject: [PATCH 2/7] Add AntlrInputStream(std::string_view) constructor Guarded by the __cplusplus macro so it is only available in C++17 or newer. --- runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj | 8 ++++++++ runtime/Cpp/runtime/src/ANTLRInputStream.cpp | 15 +++++++++++++++ runtime/Cpp/runtime/src/ANTLRInputStream.h | 8 ++++++++ 3 files changed, 31 insertions(+) diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj b/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj index 42a81fc06..54f0aeb14 100644 --- a/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj +++ b/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj @@ -182,6 +182,7 @@ 4251 true false + /Zc:__cplusplus %(AdditionalOptions) Windows @@ -201,6 +202,7 @@ 4251 true false + /Zc:__cplusplus %(AdditionalOptions) Windows @@ -220,6 +222,7 @@ 4251 true false + /Zc:__cplusplus %(AdditionalOptions) Windows @@ -239,6 +242,7 @@ 4251 true false + /Zc:__cplusplus %(AdditionalOptions) Windows @@ -259,6 +263,7 @@ 4251 true + /Zc:__cplusplus %(AdditionalOptions) Windows @@ -281,6 +286,7 @@ 4251 true + /Zc:__cplusplus %(AdditionalOptions) Windows @@ -303,6 +309,7 @@ 4251 true + /Zc:__cplusplus %(AdditionalOptions) Windows @@ -325,6 +332,7 @@ 4251 true + /Zc:__cplusplus %(AdditionalOptions) Windows diff --git a/runtime/Cpp/runtime/src/ANTLRInputStream.cpp b/runtime/Cpp/runtime/src/ANTLRInputStream.cpp index a5e21618e..2e06c5efc 100755 --- a/runtime/Cpp/runtime/src/ANTLRInputStream.cpp +++ b/runtime/Cpp/runtime/src/ANTLRInputStream.cpp @@ -17,7 +17,11 @@ using namespace antlrcpp; using misc::Interval; +#if __cplusplus >= 201703L +ANTLRInputStream::ANTLRInputStream(std::string_view input) { +#else ANTLRInputStream::ANTLRInputStream(const std::string &input) { +#endif InitializeInstanceFields(); load(input); } @@ -31,6 +35,16 @@ ANTLRInputStream::ANTLRInputStream(std::istream &stream) { load(stream); } +#if __cplusplus >= 201703L +void ANTLRInputStream::load(std::string_view input) { + // Remove the UTF-8 BOM if present. + constexpr std::string_view bom = "\xef\xbb\xbf"; + if (input.compare(0, 3, bom) == 0) + input.remove_prefix(3); + _data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size()); + p = 0; +} +#else void ANTLRInputStream::load(const std::string &input) { // Remove the UTF-8 BOM if present. const char bom[4] = "\xef\xbb\xbf"; @@ -40,6 +54,7 @@ void ANTLRInputStream::load(const std::string &input) { _data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size()); p = 0; } +#endif void ANTLRInputStream::load(std::istream &stream) { if (!stream.good() || stream.eof()) // No fail, bad or EOF. diff --git a/runtime/Cpp/runtime/src/ANTLRInputStream.h b/runtime/Cpp/runtime/src/ANTLRInputStream.h index e9850504d..7b575df2e 100755 --- a/runtime/Cpp/runtime/src/ANTLRInputStream.h +++ b/runtime/Cpp/runtime/src/ANTLRInputStream.h @@ -25,11 +25,19 @@ namespace antlr4 { /// What is name or source of this char stream? std::string name; +#if __cplusplus >= 201703L + ANTLRInputStream(std::string_view input = ""); +#else ANTLRInputStream(const std::string &input = ""); +#endif ANTLRInputStream(const char data_[], size_t numberOfActualCharsInArray); ANTLRInputStream(std::istream &stream); +#if __cplusplus >= 201703L + virtual void load(std::string_view input); +#else virtual void load(const std::string &input); +#endif virtual void load(std::istream &stream); /// Reset the stream so that it's in the same state it was From 09eb905332c3abece6ade0d9a51b7cc3cbeac536 Mon Sep 17 00:00:00 2001 From: Nathan Burles Date: Mon, 22 Jun 2020 10:07:01 +0100 Subject: [PATCH 3/7] Remove use of the u8"" string literal prefix Prior to C++20, the type of a "UTF-8 encoded string literal" is char const * - i.e. it is exactly the same as a non-prefixed string literal (they are semantically different, but technically the same). Since C++20, the type of a UTF-8 encoded string literal is changed to char8_t const * - which is not convertable to char const *. Even in C++20, there is no actual change to how characters are stored, only the type is changed (they are now semantically different, and supposed to be technically different - but nothing in the language actually uses the semantics). In short, removing the u8"" prefix has no effect prior to C++20, and simply allows compilation to succeed since c++20. --- .../org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg index 4c367dda5..7cd5f2e63 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg @@ -192,23 +192,23 @@ atn::ATN ::_atn; std::vector\ ::_serializedATN; std::vector\ ::_ruleNames = { - "}; separator = ", ", wrap, anchor> + "}; separator = ", ", wrap, anchor> }; std::vector\ ::_channelNames = { - "DEFAULT_TOKEN_CHANNEL", "HIDDEN", "}; separator = ", ", wrap, anchor> + "DEFAULT_TOKEN_CHANNEL", "HIDDEN", "}; separator = ", ", wrap, anchor> }; std::vector\ ::_modeNames = { - "}; separator = ", ", wrap, anchor> + "}; separator = ", ", wrap, anchor> }; std::vector\ ::_literalNames = { - }; null = "\"\"", separator = ", ", wrap, anchor> + }; null = "\"\"", separator = ", ", wrap, anchor> }; std::vector\ ::_symbolicNames = { - }; null = "\"\"", separator = ", ", wrap, anchor> + }; null = "\"\"", separator = ", ", wrap, anchor> }; dfa::Vocabulary ::_vocabulary(_literalNames, _symbolicNames); From 1692ed2a17321d63cef5778b592542f793699c7d Mon Sep 17 00:00:00 2001 From: Nathan Burles Date: Tue, 16 Jun 2020 13:47:38 +0100 Subject: [PATCH 4/7] Add explicit keyword to constructors To avoid unexpected implicit casts. --- .../resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg index 4c367dda5..a86f94eaf 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg @@ -63,7 +63,7 @@ public: }; - (antlr4::CharStream *input); + explicit (antlr4::CharStream *input); ~(); @@ -290,7 +290,7 @@ public: }; - (antlr4::TokenStream *input); + explicit (antlr4::TokenStream *input); ~(); virtual std::string getGrammarFileName() const override; From 8b706e24272b6adc5f65b1fa5fd909715d66ed41 Mon Sep 17 00:00:00 2001 From: Nathan Burles Date: Wed, 15 Jul 2020 14:32:21 +0100 Subject: [PATCH 5/7] Add default cases to AltBlock and OptionalBlock All other switch statements have a default case (either "break" or ""), but these are missing. Does not change functionality, simply fixes a warning - and allows a project to build with -Werror. --- .../resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg index 4c367dda5..7159276f3 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg @@ -710,6 +710,8 @@ switch (getInterpreter\()->adaptivePredict(_input, +default: + break; } >> @@ -724,6 +726,8 @@ switch (getInterpreter\()->adaptivePredict(_input, +default: + break; } >> From 8270f329ad2e683deb0bc9928ed6cc5d6587c94d Mon Sep 17 00:00:00 2001 From: Stevie Johnstone Date: Sat, 22 Aug 2020 17:48:15 +0100 Subject: [PATCH 6/7] Fix C++ target README example tree::ParseTreeWalker::DEFAULT is not a pointer --- contributors.txt | 3 ++- doc/cpp-target.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/contributors.txt b/contributors.txt index 5a341d3c2..23fd7d4f7 100644 --- a/contributors.txt +++ b/contributors.txt @@ -250,4 +250,5 @@ YYYY/MM/DD, github id, Full name, email 2020/04/30, TristonianJones, Tristan Swadell, tswadell@google.com 2020/05/25, graknol, Sindre van der Linden, graknol@gmail.com 2020/05/31, d-markey, David Markey, dmarkey@free.fr -2020/07/01, sha-N, Shan M Mathews, admin@bluestarqatar.com \ No newline at end of file +2020/07/01, sha-N, Shan M Mathews, admin@bluestarqatar.com +2020/08/22, stevenjohnstone, Steven Johnstone, steven.james.johnstone@gmail.com diff --git a/doc/cpp-target.md b/doc/cpp-target.md index eec7cf88b..1fc8e8684 100644 --- a/doc/cpp-target.md +++ b/doc/cpp-target.md @@ -65,7 +65,7 @@ int main(int argc, const char* argv[]) { tree::ParseTree *tree = parser.key(); TreeShapeListener listener; - tree::ParseTreeWalker::DEFAULT->walk(&listener, tree); + tree::ParseTreeWalker::DEFAULT.walk(&listener, tree); return 0; } From f68c47a9886c5180a85ef354e62f03f45e174fbd Mon Sep 17 00:00:00 2001 From: ArthurSonzogni Date: Sun, 6 Sep 2020 15:30:54 +0200 Subject: [PATCH 7/7] Fix clang compile error with --stdlib=libc++ On linux. Most users haven't installed libc++-dev, but ANTLR is passing the "--stdlib=libc++" argument. As a result, this won't compile. Users will see: ``` /tmp/antlr4/runtime/Cpp/runtime/src/antlr4-common.h:8:10: fatal error: 'algorithm' file not found ``` This is caused by the "WITH_LIBCXX" option. It was introduced by: https://github.com/antlr/antlr4/commit/d46ef90aa03066300752895d57fc57a1c5aa8ee6 It causes the option "--stdlib=libc++" to be appended by default. I believe its default value should have been left as "Off". With "off" by default, clang will use its default C++ library, which is always available. The WITH_LIBCXX option is kept, being able to change the C++ library might be useful? BUG=https://github.com/antlr/antlr4/issues/2898 --- contributors.txt | 3 ++- runtime/Cpp/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/contributors.txt b/contributors.txt index 5a341d3c2..1026b2e53 100644 --- a/contributors.txt +++ b/contributors.txt @@ -250,4 +250,5 @@ YYYY/MM/DD, github id, Full name, email 2020/04/30, TristonianJones, Tristan Swadell, tswadell@google.com 2020/05/25, graknol, Sindre van der Linden, graknol@gmail.com 2020/05/31, d-markey, David Markey, dmarkey@free.fr -2020/07/01, sha-N, Shan M Mathews, admin@bluestarqatar.com \ No newline at end of file +2020/07/01, sha-N, Shan M Mathews, admin@bluestarqatar.com +2020/09/06, ArthurSonzogni, Sonzogni Arthur, arthursonzogni@gmail.com diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt index 9610d7b9b..e763f85d4 100644 --- a/runtime/Cpp/CMakeLists.txt +++ b/runtime/Cpp/CMakeLists.txt @@ -19,7 +19,7 @@ if(NOT WITH_DEMO) FORCE) endif(NOT WITH_DEMO) -option(WITH_LIBCXX "Building with clang++ and libc++(in Linux). To enable with: -DWITH_LIBCXX=On" On) +option(WITH_LIBCXX "Building with clang++ and libc++(in Linux). To enable with: -DWITH_LIBCXX=On" Off) option(WITH_STATIC_CRT "(Visual C++) Enable to statically link CRT, which avoids requiring users to install the redistribution package. To disable with: -DWITH_STATIC_CRT=Off" On)