forked from jasder/antlr
Fix the utf8 convertor in VS2017
This commit is contained in:
parent
b404abb11e
commit
de6f04be0b
|
@ -144,3 +144,4 @@ YYYY/MM/DD, github id, Full name, email
|
||||||
2017/03/15, robertvanderhulst, Robert van der Hulst, robert@xsharp.eu
|
2017/03/15, robertvanderhulst, Robert van der Hulst, robert@xsharp.eu
|
||||||
2017/03/28, cmd-johnson, Jonas Auer, jonas.auer.94@gmail.com
|
2017/03/28, cmd-johnson, Jonas Auer, jonas.auer.94@gmail.com
|
||||||
2017/04/12, lys0716, Yishuang Lu, luyscmu@gmail.com
|
2017/04/12, lys0716, Yishuang Lu, luyscmu@gmail.com
|
||||||
|
2017/04/30, shravanrn, Shravan Narayan, shravanrn@gmail.com
|
||||||
|
|
|
@ -35,9 +35,9 @@ void ANTLRInputStream::load(const std::string &input) {
|
||||||
// Remove the UTF-8 BOM if present.
|
// Remove the UTF-8 BOM if present.
|
||||||
const char bom[4] = "\xef\xbb\xbf";
|
const char bom[4] = "\xef\xbb\xbf";
|
||||||
if (input.compare(0, 3, bom, 3) == 0)
|
if (input.compare(0, 3, bom, 3) == 0)
|
||||||
_data = antlrcpp::utfConverter.from_bytes(input.data() + 3, input.data() + input.size());
|
_data = antlrcpp::utf8_to_utf32(input.data() + 3, input.data() + input.size());
|
||||||
else
|
else
|
||||||
_data = antlrcpp::utfConverter.from_bytes(input);
|
_data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size());
|
||||||
p = 0;
|
p = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ std::string ANTLRInputStream::getText(const Interval &interval) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
return antlrcpp::utfConverter.to_bytes(_data.substr(start, count));
|
return antlrcpp::utf32_to_utf8(_data.substr(start, count));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ANTLRInputStream::getSourceName() const {
|
std::string ANTLRInputStream::getSourceName() const {
|
||||||
|
@ -147,7 +147,7 @@ std::string ANTLRInputStream::getSourceName() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ANTLRInputStream::toString() const {
|
std::string ANTLRInputStream::toString() const {
|
||||||
return antlrcpp::utfConverter.to_bytes(_data);
|
return antlrcpp::utf32_to_utf8(_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ANTLRInputStream::InitializeInstanceFields() {
|
void ANTLRInputStream::InitializeInstanceFields() {
|
||||||
|
|
|
@ -195,7 +195,7 @@ std::string UnbufferedCharStream::getText(const misc::Interval &interval) {
|
||||||
}
|
}
|
||||||
// convert from absolute to local index
|
// convert from absolute to local index
|
||||||
size_t i = interval.a - bufferStartIndex;
|
size_t i = interval.a - bufferStartIndex;
|
||||||
return utfConverter.to_bytes(_data.substr(i, interval.length()));
|
return utf32_to_utf8(_data.substr(i, interval.length()));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t UnbufferedCharStream::getBufferStartIndex() const {
|
size_t UnbufferedCharStream::getBufferStartIndex() const {
|
||||||
|
|
|
@ -9,13 +9,36 @@
|
||||||
|
|
||||||
namespace antlrcpp {
|
namespace antlrcpp {
|
||||||
// For all conversions utf8 <-> utf32.
|
// For all conversions utf8 <-> utf32.
|
||||||
// VS 2015 has a bug in std::codecvt_utf8<char32_t> (VS 2013 works fine).
|
// VS 2015 and VS 2017 have different bugs in std::codecvt_utf8<char32_t> (VS 2013 works fine).
|
||||||
#if defined(_MSC_VER) && _MSC_VER == 1900
|
#if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000
|
||||||
static std::wstring_convert<std::codecvt_utf8<__int32>, __int32> utfConverter;
|
static std::wstring_convert<std::codecvt_utf8<__int32>, __int32> utfConverter;
|
||||||
#else
|
#else
|
||||||
static std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utfConverter;
|
static std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utfConverter;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//the conversion functions fails in VS2017, so we explicitly use a workaround
|
||||||
|
template<typename T>
|
||||||
|
inline std::string utf32_to_utf8(T _data)
|
||||||
|
{
|
||||||
|
#if _MSC_VER > 1900 && _MSC_VER < 2000
|
||||||
|
auto p = reinterpret_cast<const int32_t *>(_data.data());
|
||||||
|
return antlrcpp::utfConverter.to_bytes(p, p + _data.size());
|
||||||
|
#else
|
||||||
|
return antlrcpp::utfConverter.to_bytes(_data);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline auto utf8_to_utf32(const char* first, const char* last)
|
||||||
|
{
|
||||||
|
#if _MSC_VER > 1900 && _MSC_VER < 2000
|
||||||
|
auto r = antlrcpp::utfConverter.from_bytes(first, last);
|
||||||
|
std::u32string s = reinterpret_cast<const char32_t *>(r.data());
|
||||||
|
return s;
|
||||||
|
#else
|
||||||
|
return antlrcpp::utfConverter.from_bytes(first, last);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void replaceAll(std::string& str, const std::string& from, const std::string& to);
|
void replaceAll(std::string& str, const std::string& from, const std::string& to);
|
||||||
|
|
||||||
// string <-> wstring conversion (UTF-16), e.g. for use with Window's wide APIs.
|
// string <-> wstring conversion (UTF-16), e.g. for use with Window's wide APIs.
|
||||||
|
|
Loading…
Reference in New Issue