Fixed + simplified loading text to parse from a file.

- The previous approach to load and convert UTF-8 data via a stream didn't work well, so I replaced that with a simple load-to-buffer + convert buffer from UTF-8 to UTF-32.
- Removed deleted Token.cpp file from XCode project.
This commit is contained in:
Mike Lischke 2016-06-21 16:23:28 +02:00
parent 8c62740f16
commit a2f5cf12fd
6 changed files with 17 additions and 31 deletions

View File

@ -657,9 +657,6 @@
276E5FC81CDB57AA003FF4B4 /* StringUtils.h in Headers */ = {isa = PBXBuildFile; fileRef = 276E5CEE1CDB57AA003FF4B4 /* StringUtils.h */; };
276E5FC91CDB57AA003FF4B4 /* StringUtils.h in Headers */ = {isa = PBXBuildFile; fileRef = 276E5CEE1CDB57AA003FF4B4 /* StringUtils.h */; };
276E5FCA1CDB57AA003FF4B4 /* StringUtils.h in Headers */ = {isa = PBXBuildFile; fileRef = 276E5CEE1CDB57AA003FF4B4 /* StringUtils.h */; settings = {ATTRIBUTES = (Public, ); }; };
276E5FCB1CDB57AA003FF4B4 /* Token.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5CEF1CDB57AA003FF4B4 /* Token.cpp */; };
276E5FCC1CDB57AA003FF4B4 /* Token.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5CEF1CDB57AA003FF4B4 /* Token.cpp */; };
276E5FCD1CDB57AA003FF4B4 /* Token.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5CEF1CDB57AA003FF4B4 /* Token.cpp */; };
276E5FCE1CDB57AA003FF4B4 /* Token.h in Headers */ = {isa = PBXBuildFile; fileRef = 276E5CF01CDB57AA003FF4B4 /* Token.h */; };
276E5FCF1CDB57AA003FF4B4 /* Token.h in Headers */ = {isa = PBXBuildFile; fileRef = 276E5CF01CDB57AA003FF4B4 /* Token.h */; };
276E5FD01CDB57AA003FF4B4 /* Token.h in Headers */ = {isa = PBXBuildFile; fileRef = 276E5CF01CDB57AA003FF4B4 /* Token.h */; settings = {ATTRIBUTES = (Public, ); }; };
@ -1107,7 +1104,6 @@
276E5CEC1CDB57AA003FF4B4 /* guid.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = guid.h; sourceTree = "<group>"; };
276E5CED1CDB57AA003FF4B4 /* StringUtils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = StringUtils.cpp; sourceTree = "<group>"; };
276E5CEE1CDB57AA003FF4B4 /* StringUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = StringUtils.h; sourceTree = "<group>"; };
276E5CEF1CDB57AA003FF4B4 /* Token.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Token.cpp; sourceTree = "<group>"; wrapsLines = 0; };
276E5CF01CDB57AA003FF4B4 /* Token.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Token.h; sourceTree = "<group>"; };
276E5CF21CDB57AA003FF4B4 /* TokenFactory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TokenFactory.h; sourceTree = "<group>"; };
276E5CF41CDB57AA003FF4B4 /* TokenSource.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TokenSource.h; sourceTree = "<group>"; };
@ -1299,7 +1295,6 @@
27745EFA1CE49C000067C6A3 /* RuleContextWithAltNum.h */,
27745EFB1CE49C000067C6A3 /* RuntimeMetaData.cpp */,
27745EFC1CE49C000067C6A3 /* RuntimeMetaData.h */,
276E5CEF1CDB57AA003FF4B4 /* Token.cpp */,
276E5CF01CDB57AA003FF4B4 /* Token.h */,
276E5CF21CDB57AA003FF4B4 /* TokenFactory.h */,
276E5CF41CDB57AA003FF4B4 /* TokenSource.h */,
@ -2286,7 +2281,6 @@
276E5E8C1CDB57AA003FF4B4 /* RuleStartState.cpp in Sources */,
276E5EA41CDB57AA003FF4B4 /* SetTransition.cpp in Sources */,
276E5D841CDB57AA003FF4B4 /* ATNState.cpp in Sources */,
276E5FCD1CDB57AA003FF4B4 /* Token.cpp in Sources */,
276E60241CDB57AA003FF4B4 /* RuleTagToken.cpp in Sources */,
276E5E501CDB57AA003FF4B4 /* ParserATNSimulator.cpp in Sources */,
276E602A1CDB57AA003FF4B4 /* TagChunk.cpp in Sources */,
@ -2426,7 +2420,6 @@
276E5E8B1CDB57AA003FF4B4 /* RuleStartState.cpp in Sources */,
276E5EA31CDB57AA003FF4B4 /* SetTransition.cpp in Sources */,
276E5D831CDB57AA003FF4B4 /* ATNState.cpp in Sources */,
276E5FCC1CDB57AA003FF4B4 /* Token.cpp in Sources */,
276E60231CDB57AA003FF4B4 /* RuleTagToken.cpp in Sources */,
276E5E4F1CDB57AA003FF4B4 /* ParserATNSimulator.cpp in Sources */,
276E60291CDB57AA003FF4B4 /* TagChunk.cpp in Sources */,
@ -2566,7 +2559,6 @@
276E5E8A1CDB57AA003FF4B4 /* RuleStartState.cpp in Sources */,
276E5EA21CDB57AA003FF4B4 /* SetTransition.cpp in Sources */,
276E5D821CDB57AA003FF4B4 /* ATNState.cpp in Sources */,
276E5FCB1CDB57AA003FF4B4 /* Token.cpp in Sources */,
276E60221CDB57AA003FF4B4 /* RuleTagToken.cpp in Sources */,
276E5E4E1CDB57AA003FF4B4 /* ParserATNSimulator.cpp in Sources */,
276E60281CDB57AA003FF4B4 /* TagChunk.cpp in Sources */,

View File

@ -29,6 +29,8 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "StringUtils.h"
#include "ANTLRFileStream.h"
using namespace antlr4;
@ -44,7 +46,12 @@ void ANTLRFileStream::loadFromFile(const std::string &fileName) {
return;
}
std::wifstream stream(fileName, std::ios::binary);
#ifdef _WIN32
std::ifstream stream(antlrcpp::s2ws(fileName), std::ios::binary);
#else
std::ifstream stream(fileName, std::ios::binary);
#endif
ANTLRInputStream::load(stream);
}

View File

@ -52,7 +52,7 @@ ANTLRInputStream::ANTLRInputStream(const char data_[], size_t numberOfActualChar
: ANTLRInputStream(std::string(data_, numberOfActualCharsInArray)) {
}
ANTLRInputStream::ANTLRInputStream(std::wistream &stream) {
ANTLRInputStream::ANTLRInputStream(std::istream &stream) {
load(stream);
}
@ -61,29 +61,15 @@ void ANTLRInputStream::load(const std::string &input) {
p = 0;
}
void ANTLRInputStream::load(std::wistream &stream) {
void ANTLRInputStream::load(std::istream &stream) {
if (!stream.good() || stream.eof()) // No fail, bad or EOF.
return;
_data.clear();
p = 0;
std::streampos startPosition = stream.tellg();
stream.seekg(0, std::ios::end);
_data.reserve(size_t(stream.tellg() - startPosition));
stream.seekg(startPosition, std::ios::beg);
#if defined(_MSC_VER) && _MSC_VER == 1900
stream.imbue(std::locale(stream.getloc(), new std::codecvt_utf8<__int32>));
#else
stream.imbue(std::locale(stream.getloc(), new std::codecvt_utf8<char32_t>));
#endif
wchar_t c = 0xFFFE;
stream >> std::noskipws >> c;
if (c != 0xFFFE) // Ignore BOM if theres one.
_data += c;
for ( ; stream >> c; )
_data += c;
std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
_data = antlrcpp::utfConverter.from_bytes(s);
}
void ANTLRInputStream::reset() {

View File

@ -57,10 +57,10 @@ namespace antlr4 {
ANTLRInputStream(const std::string &input = "");
ANTLRInputStream(const char data_[], size_t numberOfActualCharsInArray);
ANTLRInputStream(std::wistream &stream);
ANTLRInputStream(std::istream &stream);
virtual void load(const std::string &input);
virtual void load(std::wistream &stream);
virtual void load(std::istream &stream);
/// Reset the stream so that it's in the same state it was
/// when the object was created *except* the data array is not

View File

@ -44,14 +44,14 @@ void replaceAll(std::string& str, const std::string& from, const std::string& to
}
}
static std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::string ws2s(const std::wstring &wstr) {
static std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::string narrow = converter.to_bytes(wstr);
return narrow;
}
std::wstring s2ws(const std::string &str) {
static std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::wstring wide = converter.from_bytes(str);
return wide;
}

View File

@ -43,6 +43,7 @@ namespace antlrcpp {
void replaceAll(std::string& str, const std::string& from, const std::string& to);
// string <-> wstring conversion (UTF-16), e.g. for use with Window's wide APIs.
ANTLR4CPP_PUBLIC std::string ws2s(const std::wstring &wstr);
ANTLR4CPP_PUBLIC std::wstring s2ws(const std::string &str);
}