From de6f04be0beebd17ea6232f554635c5262127aaa Mon Sep 17 00:00:00 2001 From: "shravanrn@gmail.com" Date: Sun, 30 Apr 2017 16:40:58 -0700 Subject: [PATCH] Fix the utf8 convertor in VS2017 --- contributors.txt | 1 + runtime/Cpp/runtime/src/ANTLRInputStream.cpp | 8 +++--- .../Cpp/runtime/src/UnbufferedCharStream.cpp | 2 +- runtime/Cpp/runtime/src/support/StringUtils.h | 27 +++++++++++++++++-- 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/contributors.txt b/contributors.txt index 88e5140c4..9713008b5 100644 --- a/contributors.txt +++ b/contributors.txt @@ -144,3 +144,4 @@ YYYY/MM/DD, github id, Full name, email 2017/03/15, robertvanderhulst, Robert van der Hulst, robert@xsharp.eu 2017/03/28, cmd-johnson, Jonas Auer, jonas.auer.94@gmail.com 2017/04/12, lys0716, Yishuang Lu, luyscmu@gmail.com +2017/04/30, shravanrn, Shravan Narayan, shravanrn@gmail.com diff --git a/runtime/Cpp/runtime/src/ANTLRInputStream.cpp b/runtime/Cpp/runtime/src/ANTLRInputStream.cpp index df9cb29e6..714ea3ab1 100755 --- a/runtime/Cpp/runtime/src/ANTLRInputStream.cpp +++ b/runtime/Cpp/runtime/src/ANTLRInputStream.cpp @@ -35,9 +35,9 @@ void ANTLRInputStream::load(const std::string &input) { // Remove the UTF-8 BOM if present. const char bom[4] = "\xef\xbb\xbf"; if (input.compare(0, 3, bom, 3) == 0) - _data = antlrcpp::utfConverter.from_bytes(input.data() + 3, input.data() + input.size()); + _data = antlrcpp::utf8_to_utf32(input.data() + 3, input.data() + input.size()); else - _data = antlrcpp::utfConverter.from_bytes(input); + _data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size()); p = 0; } @@ -136,7 +136,7 @@ std::string ANTLRInputStream::getText(const Interval &interval) { return ""; } - return antlrcpp::utfConverter.to_bytes(_data.substr(start, count)); + return antlrcpp::utf32_to_utf8(_data.substr(start, count)); } std::string ANTLRInputStream::getSourceName() const { @@ -147,7 +147,7 @@ std::string ANTLRInputStream::getSourceName() const { } std::string ANTLRInputStream::toString() const { - return antlrcpp::utfConverter.to_bytes(_data); + return antlrcpp::utf32_to_utf8(_data); } void ANTLRInputStream::InitializeInstanceFields() { diff --git a/runtime/Cpp/runtime/src/UnbufferedCharStream.cpp b/runtime/Cpp/runtime/src/UnbufferedCharStream.cpp index 5c2c9e9d2..6a9152b50 100755 --- a/runtime/Cpp/runtime/src/UnbufferedCharStream.cpp +++ b/runtime/Cpp/runtime/src/UnbufferedCharStream.cpp @@ -195,7 +195,7 @@ std::string UnbufferedCharStream::getText(const misc::Interval &interval) { } // convert from absolute to local index size_t i = interval.a - bufferStartIndex; - return utfConverter.to_bytes(_data.substr(i, interval.length())); + return utf32_to_utf8(_data.substr(i, interval.length())); } size_t UnbufferedCharStream::getBufferStartIndex() const { diff --git a/runtime/Cpp/runtime/src/support/StringUtils.h b/runtime/Cpp/runtime/src/support/StringUtils.h index 16c7ee646..cd4d81c1d 100644 --- a/runtime/Cpp/runtime/src/support/StringUtils.h +++ b/runtime/Cpp/runtime/src/support/StringUtils.h @@ -9,13 +9,36 @@ namespace antlrcpp { // For all conversions utf8 <-> utf32. - // VS 2015 has a bug in std::codecvt_utf8 (VS 2013 works fine). -#if defined(_MSC_VER) && _MSC_VER == 1900 + // VS 2015 and VS 2017 have different bugs in std::codecvt_utf8 (VS 2013 works fine). +#if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 static std::wstring_convert, __int32> utfConverter; #else static std::wstring_convert, char32_t> utfConverter; #endif + //the conversion functions fails in VS2017, so we explicitly use a workaround + template + inline std::string utf32_to_utf8(T _data) + { + #if _MSC_VER > 1900 && _MSC_VER < 2000 + auto p = reinterpret_cast(_data.data()); + return antlrcpp::utfConverter.to_bytes(p, p + _data.size()); + #else + return antlrcpp::utfConverter.to_bytes(_data); + #endif + } + + inline auto utf8_to_utf32(const char* first, const char* last) + { + #if _MSC_VER > 1900 && _MSC_VER < 2000 + auto r = antlrcpp::utfConverter.from_bytes(first, last); + std::u32string s = reinterpret_cast(r.data()); + return s; + #else + return antlrcpp::utfConverter.from_bytes(first, last); + #endif + } + void replaceAll(std::string& str, const std::string& from, const std::string& to); // string <-> wstring conversion (UTF-16), e.g. for use with Window's wide APIs.