Fix the utf8 convertor in VS2017

This commit is contained in:
shravanrn@gmail.com 2017-04-30 16:40:58 -07:00
parent b404abb11e
commit de6f04be0b
4 changed files with 31 additions and 7 deletions

View File

@ -144,3 +144,4 @@ YYYY/MM/DD, github id, Full name, email
2017/03/15, robertvanderhulst, Robert van der Hulst, robert@xsharp.eu
2017/03/28, cmd-johnson, Jonas Auer, jonas.auer.94@gmail.com
2017/04/12, lys0716, Yishuang Lu, luyscmu@gmail.com
2017/04/30, shravanrn, Shravan Narayan, shravanrn@gmail.com

View File

@ -35,9 +35,9 @@ void ANTLRInputStream::load(const std::string &input) {
// Remove the UTF-8 BOM if present.
const char bom[4] = "\xef\xbb\xbf";
if (input.compare(0, 3, bom, 3) == 0)
_data = antlrcpp::utfConverter.from_bytes(input.data() + 3, input.data() + input.size());
_data = antlrcpp::utf8_to_utf32(input.data() + 3, input.data() + input.size());
else
_data = antlrcpp::utfConverter.from_bytes(input);
_data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size());
p = 0;
}
@ -136,7 +136,7 @@ std::string ANTLRInputStream::getText(const Interval &interval) {
return "";
}
return antlrcpp::utfConverter.to_bytes(_data.substr(start, count));
return antlrcpp::utf32_to_utf8(_data.substr(start, count));
}
std::string ANTLRInputStream::getSourceName() const {
@ -147,7 +147,7 @@ std::string ANTLRInputStream::getSourceName() const {
}
std::string ANTLRInputStream::toString() const {
return antlrcpp::utfConverter.to_bytes(_data);
return antlrcpp::utf32_to_utf8(_data);
}
void ANTLRInputStream::InitializeInstanceFields() {

View File

@ -195,7 +195,7 @@ std::string UnbufferedCharStream::getText(const misc::Interval &interval) {
}
// convert from absolute to local index
size_t i = interval.a - bufferStartIndex;
return utfConverter.to_bytes(_data.substr(i, interval.length()));
return utf32_to_utf8(_data.substr(i, interval.length()));
}
size_t UnbufferedCharStream::getBufferStartIndex() const {

View File

@ -9,13 +9,36 @@
namespace antlrcpp {
// For all conversions utf8 <-> utf32.
// VS 2015 has a bug in std::codecvt_utf8<char32_t> (VS 2013 works fine).
#if defined(_MSC_VER) && _MSC_VER == 1900
// VS 2015 and VS 2017 have different bugs in std::codecvt_utf8<char32_t> (VS 2013 works fine).
#if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000
static std::wstring_convert<std::codecvt_utf8<__int32>, __int32> utfConverter;
#else
static std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utfConverter;
#endif
//the conversion functions fails in VS2017, so we explicitly use a workaround
template<typename T>
inline std::string utf32_to_utf8(T _data)
{
#if _MSC_VER > 1900 && _MSC_VER < 2000
auto p = reinterpret_cast<const int32_t *>(_data.data());
return antlrcpp::utfConverter.to_bytes(p, p + _data.size());
#else
return antlrcpp::utfConverter.to_bytes(_data);
#endif
}
inline auto utf8_to_utf32(const char* first, const char* last)
{
#if _MSC_VER > 1900 && _MSC_VER < 2000
auto r = antlrcpp::utfConverter.from_bytes(first, last);
std::u32string s = reinterpret_cast<const char32_t *>(r.data());
return s;
#else
return antlrcpp::utfConverter.from_bytes(first, last);
#endif
}
void replaceAll(std::string& str, const std::string& from, const std::string& to);
// string <-> wstring conversion (UTF-16), e.g. for use with Window's wide APIs.