From 2dc9fb50bee8f9b2f95a2b3a61b486197a672f85 Mon Sep 17 00:00:00 2001 From: Charles Ruan Date: Mon, 31 Aug 2020 20:02:49 +0800 Subject: [PATCH 1/7] 1. remove wstring_convert and codecvt 2. let clang::fallthrough and throw_with_nested be compatible to gcc 4.9 3. downgrade gcc version from 5 to 4.9 --- contributors.txt | 3 +- runtime/Cpp/CMakeLists.txt | 29 +++++++-------- runtime/Cpp/runtime/CMakeLists.txt | 35 +++++++++++++++--- runtime/Cpp/runtime/src/antlr4-common.h | 1 - runtime/Cpp/runtime/src/support/CPPUtils.cpp | 3 ++ .../Cpp/runtime/src/support/StringUtils.cpp | 10 ++--- runtime/Cpp/runtime/src/support/StringUtils.h | 37 +++++++------------ .../tree/pattern/ParseTreePatternMatcher.cpp | 5 +++ 8 files changed, 72 insertions(+), 51 deletions(-) diff --git a/contributors.txt b/contributors.txt index 48b6449d2..a146f9c02 100644 --- a/contributors.txt +++ b/contributors.txt @@ -255,4 +255,5 @@ YYYY/MM/DD, github id, Full name, email 2020/06/04, IohannRabeson, Iohann Rabeson, iotaka6@gmail.com 2020/07/01, sha-N, Shan M Mathews, admin@bluestarqatar.com 2020/08/22, stevenjohnstone, Steven Johnstone, steven.james.johnstone@gmail.com -2020/09/06, ArthurSonzogni, Sonzogni Arthur, arthursonzogni@gmail.com \ No newline at end of file +2020/09/06, ArthurSonzogni, Sonzogni Arthur, arthursonzogni@gmail.com +2020/09/12, Clcanny, Charles Ruan, a837940593@gmail.com diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt index e763f85d4..94a57d5dc 100644 --- a/runtime/Cpp/CMakeLists.txt +++ b/runtime/Cpp/CMakeLists.txt @@ -100,14 +100,13 @@ endif() if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel") execute_process( COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - # Just g++-5.0 and greater contain header. (test in ubuntu) - if(NOT (GCC_VERSION VERSION_GREATER 5.0 OR GCC_VERSION VERSION_EQUAL 5.0)) - message(FATAL_ERROR "${PROJECT_NAME} requires g++ 5.0 or greater.") + if(NOT (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)) + message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.9 or greater.") endif () -elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ANDROID) - # Need -Os cflag and cxxflags here to work with exception handling on armeabi. - # see https://github.com/android-ndk/ndk/issues/573 - # and without -stdlib=libc++ cxxflags +elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ANDROID) + # Need -Os cflag and cxxflags here to work with exception handling on armeabi. + # see https://github.com/android-ndk/ndk/issues/573 + # and without -stdlib=libc++ cxxflags elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -stdlib=libc++") elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ( CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "FreeBSD") ) @@ -121,7 +120,7 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ( CMAKE_SYSTEM_NAME MATCH set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() elseif(MSVC_VERSION GREATER 1800 OR MSVC_VERSION EQUAL 1800) - # Visual Studio 2012+ supports c++11 features + # Visual Studio 2012+ supports c++11 features elseif(CMAKE_SYSTEM_NAME MATCHES "Emscripten") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -stdlib=libc++") else() @@ -153,7 +152,7 @@ if (ANTLR4_INSTALL) set(ANTLR4_LIB_DIR ${CMAKE_INSTALL_LIBDIR} CACHE STRING "Installation directory for libraries, relative to ${CMAKE_INSTALL_PREFIX}.") - + set(ANTLR4_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/antlr4-runtime CACHE STRING "Installation directory for include files, relative to ${CMAKE_INSTALL_PREFIX}.") @@ -161,18 +160,18 @@ if (ANTLR4_INSTALL) cmake/antlr4-runtime.cmake.in ${project_runtime_config} INSTALL_DESTINATION ${ANTLR4_CMAKE_DIR}/antlr4-runtime - PATH_VARS + PATH_VARS ANTLR4_INCLUDE_DIR ANTLR4_LIB_DIR ) - + configure_package_config_file( cmake/antlr4-generator.cmake.in ${project_generator_config} INSTALL_DESTINATION ${ANTLR4_CMAKE_DIR}/antlr4-generator - PATH_VARS + PATH_VARS ANTLR4_INCLUDE_DIR ANTLR4_LIB_DIR ) - + write_basic_package_version_file( ${version_runtime_config} VERSION ${ANTLR_VERSION} @@ -199,12 +198,12 @@ endif(ANTLR4_INSTALL) if(EXISTS LICENSE.txt) install(FILES LICENSE.txt DESTINATION "share/doc/libantlr4") -elseif(EXISTS ../../LICENSE.txt) +elseif(EXISTS ../../LICENSE.txt) install(FILES ../../LICENSE.txt DESTINATION "share/doc/libantlr4") endif() -install(FILES README.md VERSION +install(FILES README.md VERSION DESTINATION "share/doc/libantlr4") set(CPACK_PACKAGE_CONTACT "antlr-discussion@googlegroups.com") diff --git a/runtime/Cpp/runtime/CMakeLists.txt b/runtime/Cpp/runtime/CMakeLists.txt index 2c5e7376f..624606656 100644 --- a/runtime/Cpp/runtime/CMakeLists.txt +++ b/runtime/Cpp/runtime/CMakeLists.txt @@ -1,4 +1,19 @@ +include(${CMAKE_ROOT}/Modules/ExternalProject.cmake) + +set(THIRDPARTY_DIR ${CMAKE_BINARY_DIR}/runtime/thirdparty) +set(UTFCPP_DIR ${THIRDPARTY_DIR}/utfcpp) +ExternalProject_Add( + utfcpp + GIT_REPOSITORY "git://github.com/nemtrif/utfcpp" + GIT_TAG "v3.1.1" + SOURCE_DIR ${UTFCPP_DIR} + UPDATE_DISCONNECTED 1 + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${UTFCPP_DIR}/install + TEST_AFTER_INSTALL 1 + STEP_TARGETS build) + + include_directories( ${PROJECT_SOURCE_DIR}/runtime/src ${PROJECT_SOURCE_DIR}/runtime/src/atn @@ -8,6 +23,8 @@ include_directories( ${PROJECT_SOURCE_DIR}/runtime/src/tree ${PROJECT_SOURCE_DIR}/runtime/src/tree/pattern ${PROJECT_SOURCE_DIR}/runtime/src/tree/xpath + ${UTFCPP_DIR}/install/include/utf8cpp + ${UTFCPP_DIR}/install/include/utf8cpp/utf8 ) @@ -33,8 +50,8 @@ add_custom_target(make_lib_output_dir ALL COMMAND ${CMAKE_COMMAND} -E make_directory ${LIB_OUTPUT_DIR} ) -add_dependencies(antlr4_shared make_lib_output_dir) -add_dependencies(antlr4_static make_lib_output_dir) +add_dependencies(antlr4_shared make_lib_output_dir utfcpp) +add_dependencies(antlr4_static make_lib_output_dir utfcpp) if(CMAKE_SYSTEM_NAME MATCHES "Linux") target_link_libraries(antlr4_shared ${UUID_LIBRARIES}) @@ -102,15 +119,23 @@ set_target_properties(antlr4_static COMPILE_FLAGS "${disabled_compile_warnings} ${extra_static_compile_flags}") install(TARGETS antlr4_shared - DESTINATION lib + DESTINATION lib EXPORT antlr4-targets) install(TARGETS antlr4_static DESTINATION lib EXPORT antlr4-targets) -install(DIRECTORY "${PROJECT_SOURCE_DIR}/runtime/src/" +install(DIRECTORY "${PROJECT_SOURCE_DIR}/runtime/src/" DESTINATION "include/antlr4-runtime" - COMPONENT dev + COMPONENT dev + FILES_MATCHING PATTERN "*.h" + ) + +install(FILES "${UTFCPP_DIR}/source/utf8.h" + DESTINATION "include/antlr4-runtime") +install(DIRECTORY "${UTFCPP_DIR}/source/utf8" + DESTINATION "include/antlr4-runtime" + COMPONENT dev FILES_MATCHING PATTERN "*.h" ) diff --git a/runtime/Cpp/runtime/src/antlr4-common.h b/runtime/Cpp/runtime/src/antlr4-common.h index 25d890b3f..1366e60b6 100644 --- a/runtime/Cpp/runtime/src/antlr4-common.h +++ b/runtime/Cpp/runtime/src/antlr4-common.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/runtime/Cpp/runtime/src/support/CPPUtils.cpp b/runtime/Cpp/runtime/src/support/CPPUtils.cpp index 86a3751a2..f7e9e3672 100755 --- a/runtime/Cpp/runtime/src/support/CPPUtils.cpp +++ b/runtime/Cpp/runtime/src/support/CPPUtils.cpp @@ -51,9 +51,12 @@ namespace antlrcpp { } // else fall through #ifndef _MSC_VER +// GCC 4.9 can't recognize clang::fallthrough. +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC___ >= 5)) #if __has_cpp_attribute(clang::fallthrough) [[clang::fallthrough]]; #endif +#endif #endif default: diff --git a/runtime/Cpp/runtime/src/support/StringUtils.cpp b/runtime/Cpp/runtime/src/support/StringUtils.cpp index 552f1031a..101a9b3f5 100644 --- a/runtime/Cpp/runtime/src/support/StringUtils.cpp +++ b/runtime/Cpp/runtime/src/support/StringUtils.cpp @@ -20,16 +20,16 @@ void replaceAll(std::string& str, std::string const& from, std::string const& to } std::string ws2s(std::wstring const& wstr) { - std::wstring_convert> converter; - std::string narrow = converter.to_bytes(wstr); + std::string narrow; + utf8::utf32to8(wstr.begin(), wstr.end(), std::back_inserter(narrow)); return narrow; } std::wstring s2ws(const std::string &str) { - std::wstring_convert> converter; - std::wstring wide = converter.from_bytes(str); - + std::wstring wide; + utf8::utf8to32(str.begin(), str.end(), std::back_inserter(wide)); + return wide; } diff --git a/runtime/Cpp/runtime/src/support/StringUtils.h b/runtime/Cpp/runtime/src/support/StringUtils.h index af6cb7b64..bb5bb2fc0 100644 --- a/runtime/Cpp/runtime/src/support/StringUtils.h +++ b/runtime/Cpp/runtime/src/support/StringUtils.h @@ -6,44 +6,33 @@ #pragma once #include "antlr4-common.h" +#include "utf8.h" namespace antlrcpp { - // For all conversions utf8 <-> utf32. - // VS 2015 and VS 2017 have different bugs in std::codecvt_utf8 (VS 2013 works fine). -#if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 - typedef std::wstring_convert, __int32> UTF32Converter; -#else - typedef std::wstring_convert, char32_t> UTF32Converter; -#endif + // I wouldn't prefer wstring_convert for two reasons: + // 1. According to https://en.cppreference.com/w/cpp/locale/wstring_convert, + // wstring_convert is deprecated in C++17. + // 2. GCC 4.9 doesn't support codecvt header. And many projects still use + // GCC 4.9 as compiler. + // utfcpp (https://github.com/nemtrif/utfcpp) is a substitution. // The conversion functions fails in VS2017, so we explicitly use a workaround. template inline std::string utf32_to_utf8(T const& data) { - // Don't make the converter static or we have to serialize access to it. - thread_local UTF32Converter converter; + std::string narrow; + utf8::utf32to8(data.begin(), data.end(), std::back_inserter(narrow)); - #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 - const auto p = reinterpret_cast(data.data()); - return converter.to_bytes(p, p + data.size()); - #else - return converter.to_bytes(data); - #endif + return narrow; } inline UTF32String utf8_to_utf32(const char* first, const char* last) { - thread_local UTF32Converter converter; + UTF32String wide; + utf8::utf8to32(first, last, std::back_inserter(wide)); - #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 - auto r = converter.from_bytes(first, last); - i32string s = reinterpret_cast(r.data()); - #else - std::u32string s = converter.from_bytes(first, last); - #endif - - return s; + return wide; } void replaceAll(std::string &str, std::string const& from, std::string const& to); diff --git a/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp b/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp index 2e58a9625..1b5fa72b3 100755 --- a/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp +++ b/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp @@ -109,7 +109,12 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::string &pattern, in throw e; #else } catch (std::exception & /*e*/) { +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC___ >= 5)) std::throw_with_nested((const char*)"Cannot invoke start rule"); // Wrap any other exception. We should however probably use one of the ANTLR exceptions here. +#else + // throw_with_nested doesn't accept const char* as argument in GCC 4.9. + std::throw_with_nested(std::runtime_error("Cannot invoke start rule")); +#endif #endif } From 72d86f91edaf8c27a63537a0f80474014eb43d3f Mon Sep 17 00:00:00 2001 From: Charles Ruan Date: Sat, 12 Sep 2020 01:09:50 +0800 Subject: [PATCH 2/7] 1. Revert change: downgrade gcc version from 5 to 4.9. 2. Add macro USE_UTF8_INSTEAD_OF_CODECVT to judge use codecvt or utf8. --- runtime/Cpp/CMakeLists.txt | 29 +++++----- runtime/Cpp/runtime/src/antlr4-common.h | 4 ++ runtime/Cpp/runtime/src/support/CPPUtils.cpp | 3 - .../Cpp/runtime/src/support/StringUtils.cpp | 10 ++++ runtime/Cpp/runtime/src/support/StringUtils.h | 55 +++++++++++++++---- .../tree/pattern/ParseTreePatternMatcher.cpp | 5 -- 6 files changed, 73 insertions(+), 33 deletions(-) diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt index 94a57d5dc..e763f85d4 100644 --- a/runtime/Cpp/CMakeLists.txt +++ b/runtime/Cpp/CMakeLists.txt @@ -100,13 +100,14 @@ endif() if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel") execute_process( COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if(NOT (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)) - message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.9 or greater.") + # Just g++-5.0 and greater contain header. (test in ubuntu) + if(NOT (GCC_VERSION VERSION_GREATER 5.0 OR GCC_VERSION VERSION_EQUAL 5.0)) + message(FATAL_ERROR "${PROJECT_NAME} requires g++ 5.0 or greater.") endif () -elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ANDROID) - # Need -Os cflag and cxxflags here to work with exception handling on armeabi. - # see https://github.com/android-ndk/ndk/issues/573 - # and without -stdlib=libc++ cxxflags +elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ANDROID) + # Need -Os cflag and cxxflags here to work with exception handling on armeabi. + # see https://github.com/android-ndk/ndk/issues/573 + # and without -stdlib=libc++ cxxflags elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -stdlib=libc++") elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ( CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "FreeBSD") ) @@ -120,7 +121,7 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ( CMAKE_SYSTEM_NAME MATCH set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() elseif(MSVC_VERSION GREATER 1800 OR MSVC_VERSION EQUAL 1800) - # Visual Studio 2012+ supports c++11 features + # Visual Studio 2012+ supports c++11 features elseif(CMAKE_SYSTEM_NAME MATCHES "Emscripten") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -stdlib=libc++") else() @@ -152,7 +153,7 @@ if (ANTLR4_INSTALL) set(ANTLR4_LIB_DIR ${CMAKE_INSTALL_LIBDIR} CACHE STRING "Installation directory for libraries, relative to ${CMAKE_INSTALL_PREFIX}.") - + set(ANTLR4_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/antlr4-runtime CACHE STRING "Installation directory for include files, relative to ${CMAKE_INSTALL_PREFIX}.") @@ -160,18 +161,18 @@ if (ANTLR4_INSTALL) cmake/antlr4-runtime.cmake.in ${project_runtime_config} INSTALL_DESTINATION ${ANTLR4_CMAKE_DIR}/antlr4-runtime - PATH_VARS + PATH_VARS ANTLR4_INCLUDE_DIR ANTLR4_LIB_DIR ) - + configure_package_config_file( cmake/antlr4-generator.cmake.in ${project_generator_config} INSTALL_DESTINATION ${ANTLR4_CMAKE_DIR}/antlr4-generator - PATH_VARS + PATH_VARS ANTLR4_INCLUDE_DIR ANTLR4_LIB_DIR ) - + write_basic_package_version_file( ${version_runtime_config} VERSION ${ANTLR_VERSION} @@ -198,12 +199,12 @@ endif(ANTLR4_INSTALL) if(EXISTS LICENSE.txt) install(FILES LICENSE.txt DESTINATION "share/doc/libantlr4") -elseif(EXISTS ../../LICENSE.txt) +elseif(EXISTS ../../LICENSE.txt) install(FILES ../../LICENSE.txt DESTINATION "share/doc/libantlr4") endif() -install(FILES README.md VERSION +install(FILES README.md VERSION DESTINATION "share/doc/libantlr4") set(CPACK_PACKAGE_CONTACT "antlr-discussion@googlegroups.com") diff --git a/runtime/Cpp/runtime/src/antlr4-common.h b/runtime/Cpp/runtime/src/antlr4-common.h index 1366e60b6..47312978a 100644 --- a/runtime/Cpp/runtime/src/antlr4-common.h +++ b/runtime/Cpp/runtime/src/antlr4-common.h @@ -36,6 +36,10 @@ #include #include +#ifndef USE_UTF8_INSTEAD_OF_CODECVT + #include +#endif + // Defines for the Guid class and other platform dependent stuff. #ifdef _WIN32 #ifdef _MSC_VER diff --git a/runtime/Cpp/runtime/src/support/CPPUtils.cpp b/runtime/Cpp/runtime/src/support/CPPUtils.cpp index f7e9e3672..86a3751a2 100755 --- a/runtime/Cpp/runtime/src/support/CPPUtils.cpp +++ b/runtime/Cpp/runtime/src/support/CPPUtils.cpp @@ -51,12 +51,9 @@ namespace antlrcpp { } // else fall through #ifndef _MSC_VER -// GCC 4.9 can't recognize clang::fallthrough. -#if defined(__clang__) || (defined(__GNUC__) && (__GNUC___ >= 5)) #if __has_cpp_attribute(clang::fallthrough) [[clang::fallthrough]]; #endif -#endif #endif default: diff --git a/runtime/Cpp/runtime/src/support/StringUtils.cpp b/runtime/Cpp/runtime/src/support/StringUtils.cpp index 101a9b3f5..15e7b8368 100644 --- a/runtime/Cpp/runtime/src/support/StringUtils.cpp +++ b/runtime/Cpp/runtime/src/support/StringUtils.cpp @@ -20,15 +20,25 @@ void replaceAll(std::string& str, std::string const& from, std::string const& to } std::string ws2s(std::wstring const& wstr) { +#ifndef USE_UTF8_INSTEAD_OF_CODECVT + std::wstring_convert> converter; + std::string narrow = converter.to_bytes(wstr); +#else std::string narrow; utf8::utf32to8(wstr.begin(), wstr.end(), std::back_inserter(narrow)); +#endif return narrow; } std::wstring s2ws(const std::string &str) { +#ifndef USE_UTF8_INSTEAD_OF_CODECVT + std::wstring_convert> converter; + std::wstring wide = converter.from_bytes(str); +#else std::wstring wide; utf8::utf8to32(str.begin(), str.end(), std::back_inserter(wide)); +#endif return wide; } diff --git a/runtime/Cpp/runtime/src/support/StringUtils.h b/runtime/Cpp/runtime/src/support/StringUtils.h index bb5bb2fc0..acd916a8c 100644 --- a/runtime/Cpp/runtime/src/support/StringUtils.h +++ b/runtime/Cpp/runtime/src/support/StringUtils.h @@ -6,33 +6,66 @@ #pragma once #include "antlr4-common.h" + +#ifdef USE_UTF8_INSTEAD_OF_CODECVT #include "utf8.h" +#endif namespace antlrcpp { - // I wouldn't prefer wstring_convert for two reasons: - // 1. According to https://en.cppreference.com/w/cpp/locale/wstring_convert, - // wstring_convert is deprecated in C++17. - // 2. GCC 4.9 doesn't support codecvt header. And many projects still use - // GCC 4.9 as compiler. + // For all conversions utf8 <-> utf32. + // I wouldn't prefer wstring_convert because: according to + // https://en.cppreference.com/w/cpp/locale/wstring_convert, + // wstring_convert is deprecated in C++17. // utfcpp (https://github.com/nemtrif/utfcpp) is a substitution. +#ifndef USE_UTF8_INSTEAD_OF_CODECVT + // VS 2015 and VS 2017 have different bugs in std::codecvt_utf8 (VS 2013 works fine). + #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 + typedef std::wstring_convert, __int32> UTF32Converter; + #else + typedef std::wstring_convert, char32_t> UTF32Converter; + #endif +#endif // The conversion functions fails in VS2017, so we explicitly use a workaround. template inline std::string utf32_to_utf8(T const& data) { - std::string narrow; - utf8::utf32to8(data.begin(), data.end(), std::back_inserter(narrow)); + #ifndef USE_UTF8_INSTEAD_OF_CODECVT + // Don't make the converter static or we have to serialize access to it. + thread_local UTF32Converter converter; - return narrow; + #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 + auto p = reinterpret_cast(data.data()); + return converter.to_bytes(p, p + data.size()); + #else + return converter.to_bytes(data); + #endif + #else + std::string narrow; + utf8::utf32to8(data.begin(), data.end(), std::back_inserter(narrow)); + return narrow; + #endif } inline UTF32String utf8_to_utf32(const char* first, const char* last) { - UTF32String wide; - utf8::utf8to32(first, last, std::back_inserter(wide)); + #ifndef USE_UTF8_INSTEAD_OF_CODECVT + thread_local UTF32Converter converter; - return wide; + #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 + auto r = converter.from_bytes(first, last); + i32string s = reinterpret_cast(r.data()); + return s; + #else + std::u32string s = converter.from_bytes(first, last); + return s; + #endif + #else + UTF32String wide; + utf8::utf8to32(first, last, std::back_inserter(wide)); + return wide; + #endif } void replaceAll(std::string &str, std::string const& from, std::string const& to); diff --git a/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp b/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp index 1b5fa72b3..2e58a9625 100755 --- a/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp +++ b/runtime/Cpp/runtime/src/tree/pattern/ParseTreePatternMatcher.cpp @@ -109,12 +109,7 @@ ParseTreePattern ParseTreePatternMatcher::compile(const std::string &pattern, in throw e; #else } catch (std::exception & /*e*/) { -#if defined(__clang__) || (defined(__GNUC__) && (__GNUC___ >= 5)) std::throw_with_nested((const char*)"Cannot invoke start rule"); // Wrap any other exception. We should however probably use one of the ANTLR exceptions here. -#else - // throw_with_nested doesn't accept const char* as argument in GCC 4.9. - std::throw_with_nested(std::runtime_error("Cannot invoke start rule")); -#endif #endif } From 2d86b34c4016b012a0a434be96e6be7693db4605 Mon Sep 17 00:00:00 2001 From: Charles Ruan Date: Sat, 12 Sep 2020 17:19:59 +0800 Subject: [PATCH 3/7] Compile with utf8 on macOS. --- runtime/Cpp/deploy-macos.sh | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/runtime/Cpp/deploy-macos.sh b/runtime/Cpp/deploy-macos.sh index 55528470f..cf977652d 100755 --- a/runtime/Cpp/deploy-macos.sh +++ b/runtime/Cpp/deploy-macos.sh @@ -4,9 +4,27 @@ rm -f -R antlr4-runtime build lib 2> /dev/null rm antlr4-cpp-runtime-macos.zip 2> /dev/null +# Get utf8 dependency. +mkdir -p runtime/thirdparty 2> /dev/null +pushd runtime/thirdparty +if [ ! -d utfcpp ] +then + git clone https://github.com/nemtrif/utfcpp.git utfcpp + pushd utfcpp + git checkout tags/v3.1.1 + popd +fi +popd + # Binaries -xcodebuild -project runtime/antlrcpp.xcodeproj -target antlr4 -configuration Release -xcodebuild -project runtime/antlrcpp.xcodeproj -target antlr4_static -configuration Release +xcodebuild -project runtime/antlrcpp.xcodeproj \ + -target antlr4 \ + # GCC_PREPROCESSOR_DEFINITIONS='$GCC_PREPROCESSOR_DEFINITIONS USE_UTF8_INSTEAD_OF_CODECVT' \ + -configuration Release +xcodebuild -project runtime/antlrcpp.xcodeproj \ + -target antlr4_static \ + # GCC_PREPROCESSOR_DEFINITIONS='$GCC_PREPROCESSOR_DEFINITIONS USE_UTF8_INSTEAD_OF_CODECVT' \ + -configuration Release rm -f -R lib mkdir lib mv runtime/build/Release/libantlr4-runtime.a lib/ @@ -17,6 +35,9 @@ rm -f -R antlr4-runtime pushd runtime/src find . -name '*.h' | cpio -pdm ../../antlr4-runtime popd +pushd runtime/thirdparty/utfcpp/source +find . -name '*.h' | cpio -pdm ../../../../antlr4-runtime +popd # Zip up and clean up zip -r antlr4-cpp-runtime-macos.zip antlr4-runtime lib From 4bacd99255bb566b66ee8483717e397d9c6bc6fa Mon Sep 17 00:00:00 2001 From: Charles Ruan Date: Sat, 12 Sep 2020 20:41:02 +0800 Subject: [PATCH 4/7] add prompt for USE_UTF8_INSTEAD_OF_CODECVT macro in CMakeLists.txt. --- runtime/Cpp/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt index e763f85d4..27f0742f9 100644 --- a/runtime/Cpp/CMakeLists.txt +++ b/runtime/Cpp/CMakeLists.txt @@ -83,6 +83,9 @@ else() set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++11") endif() +# Define USE_UTF8_INSTEAD_OF_CODECVT macro. +# add_compile_definitions(USE_UTF8_INSTEAD_OF_CODECVT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_CXX_WARNING_FLAGS}") if(MSVC_VERSION) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Od /Zi /MP ${MY_CXX_WARNING_FLAGS}") From 73f157fb91a69cef00388e38bedbedad69c58f2b Mon Sep 17 00:00:00 2001 From: Charles Ruan Date: Sat, 12 Sep 2020 20:51:09 +0800 Subject: [PATCH 5/7] Add utfcpp include paths in project.pbxproj for macOS compilation. --- runtime/Cpp/runtime/antlrcpp.xcodeproj/project.pbxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/Cpp/runtime/antlrcpp.xcodeproj/project.pbxproj b/runtime/Cpp/runtime/antlrcpp.xcodeproj/project.pbxproj index a256e089a..277186ce0 100644 --- a/runtime/Cpp/runtime/antlrcpp.xcodeproj/project.pbxproj +++ b/runtime/Cpp/runtime/antlrcpp.xcodeproj/project.pbxproj @@ -2889,7 +2889,7 @@ GCC_WARN_UNUSED_LABEL = YES; GCC_WARN_UNUSED_PARAMETER = YES; GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = src/; + HEADER_SEARCH_PATHS = "src/ thirdparty/utfcpp/source/ thirdparty/utfcpp/source/utf8/"; MACOSX_DEPLOYMENT_TARGET = 10.9; ONLY_ACTIVE_ARCH = YES; SDKROOT = macosx; @@ -2945,7 +2945,7 @@ GCC_WARN_UNUSED_LABEL = YES; GCC_WARN_UNUSED_PARAMETER = YES; GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = src/; + HEADER_SEARCH_PATHS = "src/ thirdparty/utfcpp/source/ thirdparty/utfcpp/source/utf8/"; MACOSX_DEPLOYMENT_TARGET = 10.9; SDKROOT = macosx; }; From ae0c7a31f83a4c2ee01bbfc1dc32a3500e415936 Mon Sep 17 00:00:00 2001 From: Charles Ruan Date: Sat, 12 Sep 2020 21:29:00 +0800 Subject: [PATCH 6/7] Implement USE_UTF8_INSTEAD_OF_CODECVT macro in CMakeLists.txt in another way. --- runtime/Cpp/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt index 27f0742f9..32b34bfee 100644 --- a/runtime/Cpp/CMakeLists.txt +++ b/runtime/Cpp/CMakeLists.txt @@ -71,6 +71,9 @@ else() set(MY_CXX_WARNING_FLAGS " -Wall -pedantic -W") endif() +# Define USE_UTF8_INSTEAD_OF_CODECVT macro. +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_UTF8_INSTEAD_OF_CODECVT") + # Initialize CXXFLAGS. if("${CMAKE_VERSION}" VERSION_GREATER 3.1.0) set(CMAKE_CXX_STANDARD 11) @@ -83,9 +86,6 @@ else() set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++11") endif() -# Define USE_UTF8_INSTEAD_OF_CODECVT macro. -# add_compile_definitions(USE_UTF8_INSTEAD_OF_CODECVT) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_CXX_WARNING_FLAGS}") if(MSVC_VERSION) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Od /Zi /MP ${MY_CXX_WARNING_FLAGS}") From 734d18cb35b50ffb4c1b9a2f95efec0c727297a6 Mon Sep 17 00:00:00 2001 From: Charles Ruan Date: Sat, 12 Sep 2020 23:13:52 +0800 Subject: [PATCH 7/7] Add missing const in StringUtils.h --- runtime/Cpp/runtime/src/support/StringUtils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/Cpp/runtime/src/support/StringUtils.h b/runtime/Cpp/runtime/src/support/StringUtils.h index acd916a8c..d00cc52d9 100644 --- a/runtime/Cpp/runtime/src/support/StringUtils.h +++ b/runtime/Cpp/runtime/src/support/StringUtils.h @@ -36,7 +36,7 @@ namespace antlrcpp { thread_local UTF32Converter converter; #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 - auto p = reinterpret_cast(data.data()); + const auto p = reinterpret_cast(data.data()); return converter.to_bytes(p, p + data.size()); #else return converter.to_bytes(data);