Lowercase Architecture and ErrorValues (#487)

ErrorValues -> error_code, Architecture -> architecture
This commit is contained in:
John Keiser 2020-02-14 15:21:28 -08:00 committed by GitHub
parent 083569fca8
commit bc8bc7d1a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 232 additions and 214 deletions

View File

@ -70,10 +70,10 @@ LIBHEADERS_HASWELL= src/haswell/bitmanipulation.h src/haswell/bitmask.h src/h
LIBHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h LIBHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
LIBHEADERS=src/jsoncharutils.h src/simdprune_tables.h $(LIBHEADERS_GENERIC) $(LIBHEADERS_ARM64) $(LIBHEADERS_HASWELL) $(LIBHEADERS_WESTMERE) LIBHEADERS=src/jsoncharutils.h src/simdprune_tables.h $(LIBHEADERS_GENERIC) $(LIBHEADERS_ARM64) $(LIBHEADERS_HASWELL) $(LIBHEADERS_WESTMERE)
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document/iterator.h include/simdjson/document/parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document/iterator.h include/simdjson/document/parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/architecture.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
HEADERS=$(PUBHEADERS) $(LIBHEADERS) HEADERS=$(PUBHEADERS) $(LIBHEADERS)
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document/parser.cpp LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document/parser.cpp
MINIFIERHEADERS=include/simdjson/jsonminifier.h MINIFIERHEADERS=include/simdjson/jsonminifier.h
MINIFIERLIBFILES=src/jsonminifier.cpp MINIFIERLIBFILES=src/jsonminifier.cpp
@ -205,7 +205,7 @@ basictests:tests/basictests.cpp $(HEADERS) $(LIBFILES)
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES) numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS $(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES) integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o integer_tests $(LIBFILES) tests/integer_tests.cpp -I. $(LIBFLAGS) $(CXX) $(CXXFLAGS) -o integer_tests $(LIBFILES) tests/integer_tests.cpp -I. $(LIBFLAGS)
@ -213,7 +213,7 @@ integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES) stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS $(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
pointercheck:tests/pointercheck.cpp $(HEADERS) $(LIBFILES) pointercheck:tests/pointercheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o pointercheck $(LIBFILES) tests/pointercheck.cpp -I. $(LIBFLAGS) $(CXX) $(CXXFLAGS) -o pointercheck $(LIBFILES) tests/pointercheck.cpp -I. $(LIBFLAGS)

View File

@ -16,7 +16,7 @@ INCLUDEPATH="$SCRIPTPATH/include"
# this list excludes the "src/generic headers" # this list excludes the "src/generic headers"
ALLCFILES=" ALLCFILES="
simdjson.cpp error.cpp
jsonioutil.cpp jsonioutil.cpp
jsonminifier.cpp jsonminifier.cpp
jsonparser.cpp jsonparser.cpp

View File

@ -82,7 +82,7 @@ void exit_usage(string message) {
} }
struct option_struct { struct option_struct {
Architecture architecture = Architecture::UNSUPPORTED; architecture arch = architecture::UNSUPPORTED;
bool stage1_only = false; bool stage1_only = false;
int32_t iterations = 400; int32_t iterations = 400;
@ -106,8 +106,8 @@ struct option_struct {
verbose = true; verbose = true;
break; break;
case 'a': case 'a':
architecture = parse_architecture(optarg); arch = parse_architecture(optarg);
if (architecture == Architecture::UNSUPPORTED) { if (arch == architecture::UNSUPPORTED) {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64"); exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
} }
break; break;
@ -129,8 +129,8 @@ struct option_struct {
#endif #endif
// If architecture is not specified, pick the best supported architecture by default // If architecture is not specified, pick the best supported architecture by default
if (architecture == Architecture::UNSUPPORTED) { if (arch == architecture::UNSUPPORTED) {
architecture = find_best_supported_architecture(); arch = find_best_supported_architecture();
} }
} }
@ -410,7 +410,7 @@ int main(int argc, char *argv[]) {
event_collector collector; event_collector collector;
// Set up benchmarkers by reading all files // Set up benchmarkers by reading all files
json_parser parser(options.architecture); json_parser parser(options.arch);
feature_benchmarker features(parser, collector); feature_benchmarker features(parser, collector);
benchmarker gsoc_2018("jsonexamples/gsoc-2018.json", parser, collector); benchmarker gsoc_2018("jsonexamples/gsoc-2018.json", parser, collector);

View File

@ -45,17 +45,17 @@ using stage2_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj); using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
using jsonparse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, bool streaming); using jsonparse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, bool streaming);
stage1_functype* get_stage1_func(const Architecture architecture) { stage1_functype* get_stage1_func(const architecture arch) {
switch (architecture) { switch (arch) {
#ifdef IS_X86_64 #ifdef IS_X86_64
case Architecture::HASWELL: case architecture::HASWELL:
return &find_structural_bits<Architecture::HASWELL>; return &find_structural_bits<architecture::HASWELL>;
case Architecture::WESTMERE: case architecture::WESTMERE:
return &find_structural_bits<Architecture::WESTMERE>; return &find_structural_bits<architecture::WESTMERE>;
#endif #endif
#ifdef IS_ARM64 #ifdef IS_ARM64
case Architecture::ARM64: case architecture::ARM64:
return &find_structural_bits<Architecture::ARM64>; return &find_structural_bits<architecture::ARM64>;
#endif #endif
default: default:
std::cerr << "The processor is not supported by simdjson." << std::endl; std::cerr << "The processor is not supported by simdjson." << std::endl;
@ -63,19 +63,19 @@ stage1_functype* get_stage1_func(const Architecture architecture) {
} }
} }
stage2_functype* get_stage2_func(const Architecture architecture) { stage2_functype* get_stage2_func(const architecture arch) {
switch (architecture) { switch (arch) {
#ifdef IS_X86_64 #ifdef IS_X86_64
case Architecture::HASWELL: case architecture::HASWELL:
return &unified_machine<Architecture::HASWELL>; return &unified_machine<architecture::HASWELL>;
break; break;
case Architecture::WESTMERE: case architecture::WESTMERE:
return &unified_machine<Architecture::WESTMERE>; return &unified_machine<architecture::WESTMERE>;
break; break;
#endif #endif
#ifdef IS_ARM64 #ifdef IS_ARM64
case Architecture::ARM64: case architecture::ARM64:
return &unified_machine<Architecture::ARM64>; return &unified_machine<architecture::ARM64>;
break; break;
#endif #endif
default: default:
@ -84,19 +84,19 @@ stage2_functype* get_stage2_func(const Architecture architecture) {
} }
} }
jsonparse_functype* get_jsonparse_func(const Architecture architecture) { jsonparse_functype* get_jsonparse_func(const architecture arch) {
switch (architecture) { switch (arch) {
#ifdef IS_X86_64 #ifdef IS_X86_64
case Architecture::HASWELL: case architecture::HASWELL:
return &json_parse_implementation<Architecture::HASWELL>; return &json_parse_implementation<architecture::HASWELL>;
break; break;
case Architecture::WESTMERE: case architecture::WESTMERE:
return &json_parse_implementation<Architecture::WESTMERE>; return &json_parse_implementation<architecture::WESTMERE>;
break; break;
#endif #endif
#ifdef IS_ARM64 #ifdef IS_ARM64
case Architecture::ARM64: case architecture::ARM64:
return &json_parse_implementation<Architecture::ARM64>; return &json_parse_implementation<architecture::ARM64>;
break; break;
#endif #endif
default: default:
@ -106,15 +106,15 @@ jsonparse_functype* get_jsonparse_func(const Architecture architecture) {
} }
struct json_parser { struct json_parser {
const Architecture architecture; const architecture arch;
stage1_functype *stage1_func; stage1_functype *stage1_func;
stage2_functype *stage2_func; stage2_functype *stage2_func;
jsonparse_functype *jsonparse_func; jsonparse_functype *jsonparse_func;
json_parser(const Architecture _architecture) : architecture(_architecture) { json_parser(const architecture _arch) : arch(_arch) {
this->stage1_func = get_stage1_func(architecture); this->stage1_func = get_stage1_func(arch);
this->stage2_func = get_stage2_func(architecture); this->stage2_func = get_stage2_func(arch);
this->jsonparse_func = get_jsonparse_func(architecture); this->jsonparse_func = get_jsonparse_func(arch);
} }
json_parser() : json_parser(find_best_supported_architecture()) {} json_parser() : json_parser(find_best_supported_architecture()) {}

View File

@ -85,7 +85,7 @@ void exit_usage(string message) {
struct option_struct { struct option_struct {
vector<char*> files; vector<char*> files;
Architecture architecture = Architecture::UNSUPPORTED; architecture arch = architecture::UNSUPPORTED;
bool stage1_only = false; bool stage1_only = false;
int32_t iterations = 200; int32_t iterations = 200;
@ -114,8 +114,8 @@ struct option_struct {
verbose = true; verbose = true;
break; break;
case 'a': case 'a':
architecture = parse_architecture(optarg); arch = parse_architecture(optarg);
if (architecture == Architecture::UNSUPPORTED) { if (arch == architecture::UNSUPPORTED) {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64"); exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
} }
break; break;
@ -143,8 +143,8 @@ struct option_struct {
#endif #endif
// If architecture is not specified, pick the best supported architecture by default // If architecture is not specified, pick the best supported architecture by default
if (architecture == Architecture::UNSUPPORTED) { if (arch == architecture::UNSUPPORTED) {
architecture = find_best_supported_architecture(); arch = find_best_supported_architecture();
} }
// All remaining arguments are considered to be files // All remaining arguments are considered to be files
@ -186,7 +186,7 @@ int main(int argc, char *argv[]) {
} }
// Set up benchmarkers by reading all files // Set up benchmarkers by reading all files
json_parser parser(options.architecture); json_parser parser(options.arch);
vector<benchmarker*> benchmarkers; vector<benchmarker*> benchmarkers;
for (size_t i=0; i<options.files.size(); i++) { for (size_t i=0; i<options.files.size(); i++) {
benchmarkers.push_back(new benchmarker(options.files[i], parser, collector)); benchmarkers.push_back(new benchmarker(options.files[i], parser, collector));

View File

@ -185,7 +185,7 @@ int main(int argc, char *argv[]) {
results.resize(evts.size()); results.resize(evts.size());
for (uint32_t i = 0; i < iterations; i++) { for (uint32_t i = 0; i < iterations; i++) {
unified.start(); unified.start();
// The default template is simdjson::Architecture::NATIVE. // The default template is simdjson::architecture::NATIVE.
bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) ==
simdjson::SUCCESS); simdjson::SUCCESS);
unified.end(results); unified.end(results);

View File

@ -0,0 +1,30 @@
#ifndef SIMDJSON_ARCHITECTURE_H
#define SIMDJSON_ARCHITECTURE_H
namespace simdjson {
// Represents the minimal architecture that would support an implementation
enum class architecture {
UNSUPPORTED,
WESTMERE,
HASWELL,
ARM64,
// TODO remove 'native' in favor of runtime dispatch?
// the 'native' enum class value should point at a good default on the current
// machine
#ifdef IS_X86_64
NATIVE = WESTMERE
#elif defined(IS_ARM64)
NATIVE = ARM64
#endif
};
architecture find_best_supported_architecture();
architecture parse_architecture(char *arch_name);
// backcompat
using Architecture = architecture;
} // namespace simdjson
#endif // SIMDJSON_ARCHITECTURE_H

View File

@ -72,14 +72,14 @@ public:
// //
// Returns != SUCCESS if the JSON is invalid. // Returns != SUCCESS if the JSON is invalid.
// //
static WARN_UNUSED ErrorValues try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept; static WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
static WARN_UNUSED ErrorValues try_parse(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) { static WARN_UNUSED error_code try_parse(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) {
return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed); return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
} }
static WARN_UNUSED ErrorValues try_parse(const std::string &s, document &dst, bool realloc_if_needed = true) { static WARN_UNUSED error_code try_parse(const std::string &s, document &dst, bool realloc_if_needed = true) {
return try_parse(s.data(), s.length(), dst, realloc_if_needed); return try_parse(s.data(), s.length(), dst, realloc_if_needed);
} }
static WARN_UNUSED ErrorValues try_parse(const padded_string &s, document &dst) { static WARN_UNUSED error_code try_parse(const padded_string &s, document &dst) {
return try_parse(s.data(), s.length(), dst, false); return try_parse(s.data(), s.length(), dst, false);
} }
@ -101,15 +101,15 @@ namespace simdjson {
inline WARN_UNUSED document document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) { inline WARN_UNUSED document document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
document::parser parser; document::parser parser;
if (!parser.allocate_capacity(len)) { if (!parser.allocate_capacity(len)) {
throw invalid_json(ErrorValues(parser.error_code = MEMALLOC)); throw invalid_json(parser.error = MEMALLOC);
} }
return parser.parse_new(buf, len, realloc_if_needed); return parser.parse_new(buf, len, realloc_if_needed);
} }
inline WARN_UNUSED ErrorValues document::try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed) noexcept { inline WARN_UNUSED error_code document::try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed) noexcept {
document::parser parser; document::parser parser;
if (!parser.allocate_capacity(len)) { if (!parser.allocate_capacity(len)) {
return ErrorValues(parser.error_code = MEMALLOC); return parser.error = MEMALLOC;
} }
return parser.try_parse_into(buf, len, dst, realloc_if_needed); return parser.try_parse_into(buf, len, dst, realloc_if_needed);
} }

View File

@ -71,14 +71,14 @@ public:
// //
// Returns != SUCCESS if the JSON is invalid. // Returns != SUCCESS if the JSON is invalid.
// //
WARN_UNUSED ErrorValues try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept; WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept;
WARN_UNUSED ErrorValues try_parse(const char *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept { WARN_UNUSED error_code try_parse(const char *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept {
return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed); return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
} }
WARN_UNUSED ErrorValues try_parse(const std::string &s, const document *&dst, bool realloc_if_needed = true) noexcept { WARN_UNUSED error_code try_parse(const std::string &s, const document *&dst, bool realloc_if_needed = true) noexcept {
return try_parse(s.data(), s.length(), dst, realloc_if_needed); return try_parse(s.data(), s.length(), dst, realloc_if_needed);
} }
WARN_UNUSED ErrorValues try_parse(const padded_string &s, const document *&dst) noexcept { WARN_UNUSED error_code try_parse(const padded_string &s, const document *&dst) noexcept {
return try_parse(s.data(), s.length(), dst, false); return try_parse(s.data(), s.length(), dst, false);
} }
@ -89,14 +89,14 @@ public:
// //
// Returns != SUCCESS if the JSON is invalid. // Returns != SUCCESS if the JSON is invalid.
// //
WARN_UNUSED ErrorValues try_parse_into(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept; WARN_UNUSED error_code try_parse_into(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
WARN_UNUSED ErrorValues try_parse_into(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept { WARN_UNUSED error_code try_parse_into(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept {
return try_parse_into((const uint8_t *)buf, len, dst, realloc_if_needed); return try_parse_into((const uint8_t *)buf, len, dst, realloc_if_needed);
} }
WARN_UNUSED ErrorValues try_parse_into(const std::string &s, document &dst, bool realloc_if_needed = true) noexcept { WARN_UNUSED error_code try_parse_into(const std::string &s, document &dst, bool realloc_if_needed = true) noexcept {
return try_parse_into(s.data(), s.length(), dst, realloc_if_needed); return try_parse_into(s.data(), s.length(), dst, realloc_if_needed);
} }
WARN_UNUSED ErrorValues try_parse_into(const padded_string &s, document &dst) noexcept { WARN_UNUSED error_code try_parse_into(const padded_string &s, document &dst) noexcept {
return try_parse_into(s.data(), s.length(), dst, false); return try_parse_into(s.data(), s.length(), dst, false);
} }
@ -143,7 +143,7 @@ public:
uint8_t *current_string_buf_loc; uint8_t *current_string_buf_loc;
bool valid{false}; bool valid{false};
int error_code{simdjson::UNINITIALIZED}; error_code error{simdjson::UNINITIALIZED};
// Document we're writing to // Document we're writing to
document doc; document doc;
@ -173,12 +173,12 @@ public:
// this should be called when parsing (right before writing the tapes) // this should be called when parsing (right before writing the tapes)
void init_stage2(); void init_stage2();
really_inline ErrorValues on_error(ErrorValues new_error_code) { really_inline error_code on_error(error_code new_error_code) {
error_code = new_error_code; error = new_error_code;
return new_error_code; return new_error_code;
} }
really_inline ErrorValues on_success(ErrorValues success_code) { really_inline error_code on_success(error_code success_code) {
error_code = success_code; error = success_code;
valid = true; valid = true;
return success_code; return success_code;
} }
@ -276,11 +276,11 @@ public:
// - Returns CAPACITY if the document is too large // - Returns CAPACITY if the document is too large
// - Returns MEMALLOC if we needed to allocate memory and could not // - Returns MEMALLOC if we needed to allocate memory and could not
// //
WARN_UNUSED ErrorValues init_parse(size_t len); WARN_UNUSED error_code init_parse(size_t len);
const document &get_document() const { const document &get_document() const {
if (!is_valid()) { if (!is_valid()) {
throw invalid_json(ErrorValues(error_code)); throw invalid_json(error);
} }
return doc; return doc;
} }
@ -323,7 +323,7 @@ private:
doc.tape[saved_loc] |= val; doc.tape[saved_loc] |= val;
} }
WARN_UNUSED ErrorValues try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept; WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept;
// //
// Set the current capacity: the largest document this parser can support without reallocating. // Set the current capacity: the largest document this parser can support without reallocating.

43
include/simdjson/error.h Normal file
View File

@ -0,0 +1,43 @@
#ifndef SIMDJSON_ERROR_H
#define SIMDJSON_ERROR_H
#include <string>
namespace simdjson {
enum error_code {
SUCCESS = 0,
SUCCESS_AND_HAS_MORE, //No errors and buffer still has more data
CAPACITY, // This parser can't support a document that big
MEMALLOC, // Error allocating memory, most likely out of memory
TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this
// is a generic error
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
STRING_ERROR, // Problem while parsing a string
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR, // the input is not valid UTF-8
UNINITIALIZED, // unknown error, or uninitialized document
EMPTY, // no structural element found
UNESCAPED_CHARS, // found unescaped characters in a string.
UNCLOSED_STRING, // missing quote at the end
UNEXPECTED_ERROR // indicative of a bug in simdjson
};
const std::string &error_message(error_code error);
struct invalid_json : public std::exception {
invalid_json(error_code _error) : error{_error} {}
const char *what() const noexcept { return error_message(error).c_str(); }
error_code error;
};
// backcompat
using ErrorValues = error_code;
inline const std::string &error_message(int error) { return error_message(error_code(error)); }
} // namespace simdjson
#endif // SIMDJSON_ERROR_H

View File

@ -12,9 +12,9 @@
namespace simdjson { namespace simdjson {
// json_parse_implementation is the generic function, it is specialized for // json_parse_implementation is the generic function, it is specialized for
// various architectures, e.g., as // various architectures, e.g., as
// json_parse_implementation<Architecture::HASWELL> or // json_parse_implementation<architecture::HASWELL> or
// json_parse_implementation<Architecture::ARM64> // json_parse_implementation<architecture::ARM64>
template <Architecture T> template <architecture T>
int json_parse_implementation(const uint8_t *buf, size_t len, document::parser &parser, int json_parse_implementation(const uint8_t *buf, size_t len, document::parser &parser,
bool realloc_if_needed = true) { bool realloc_if_needed = true) {
int result = parser.init_parse(len); int result = parser.init_parse(len);

View File

@ -145,7 +145,7 @@ private:
size_t n_parsed_docs{0}; size_t n_parsed_docs{0};
size_t n_bytes_parsed{0}; size_t n_bytes_parsed{0};
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
int stage1_is_ok_thread{0}; error_code stage1_is_ok_thread{SUCCESS};
std::thread stage_1_thread; std::thread stage_1_thread;
document::parser parser_thread; document::parser parser_thread;
#endif #endif
@ -246,21 +246,21 @@ void find_the_best_supported_implementation() {
simdjson::instruction_set::SSE42 | simdjson::instruction_set::PCLMULQDQ; simdjson::instruction_set::SSE42 | simdjson::instruction_set::PCLMULQDQ;
if ((haswell_flags & supports) == haswell_flags) { if ((haswell_flags & supports) == haswell_flags) {
best_stage1 = best_stage1 =
simdjson::find_structural_bits<simdjson::Architecture::HASWELL>; simdjson::find_structural_bits<simdjson::architecture::HASWELL>;
best_stage2 = simdjson::unified_machine<simdjson::Architecture::HASWELL>; best_stage2 = simdjson::unified_machine<simdjson::architecture::HASWELL>;
return; return;
} }
if ((westmere_flags & supports) == westmere_flags) { if ((westmere_flags & supports) == westmere_flags) {
best_stage1 = best_stage1 =
simdjson::find_structural_bits<simdjson::Architecture::WESTMERE>; simdjson::find_structural_bits<simdjson::architecture::WESTMERE>;
best_stage2 = simdjson::unified_machine<simdjson::Architecture::WESTMERE>; best_stage2 = simdjson::unified_machine<simdjson::architecture::WESTMERE>;
return; return;
} }
#endif #endif
#ifdef IS_ARM64 #ifdef IS_ARM64
if (supports & instruction_set::NEON) { if (supports & instruction_set::NEON) {
best_stage1 = simdjson::find_structural_bits<Architecture::ARM64>; best_stage1 = simdjson::find_structural_bits<architecture::ARM64>;
best_stage2 = simdjson::unified_machine<Architecture::ARM64>; best_stage2 = simdjson::unified_machine<architecture::ARM64>;
return; return;
} }
#endif #endif
@ -293,18 +293,15 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
if (unlikely(parser.capacity() == 0)) { if (unlikely(parser.capacity() == 0)) {
const bool allocok = parser.allocate_capacity(_batch_size); const bool allocok = parser.allocate_capacity(_batch_size);
if (!allocok) { if (!allocok) {
parser.error_code = simdjson::MEMALLOC; return parser.error = simdjson::MEMALLOC;
return parser.error_code;
} }
} else if (unlikely(parser.capacity() < _batch_size)) { } else if (unlikely(parser.capacity() < _batch_size)) {
parser.error_code = simdjson::CAPACITY; return parser.error = simdjson::CAPACITY;
return parser.error_code;
} }
if (unlikely(parser_thread.capacity() < _batch_size)) { if (unlikely(parser_thread.capacity() < _batch_size)) {
const bool allocok_thread = parser_thread.allocate_capacity(_batch_size); const bool allocok_thread = parser_thread.allocate_capacity(_batch_size);
if (!allocok_thread) { if (!allocok_thread) {
parser.error_code = simdjson::MEMALLOC; return parser.error = simdjson::MEMALLOC;
return parser.error_code;
} }
} }
if (unlikely(load_next_batch)) { if (unlikely(load_next_batch)) {
@ -313,19 +310,16 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
_batch_size = (std::min)(_batch_size, remaining()); _batch_size = (std::min)(_batch_size, remaining());
_batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size); _batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size);
if (_batch_size == 0) { if (_batch_size == 0) {
parser.error_code = simdjson::UTF8_ERROR; return parser.error = simdjson::UTF8_ERROR;
return parser.error_code;
} }
int stage1_is_ok = best_stage1(buf(), _batch_size, parser, true); auto stage1_is_ok = error_code(best_stage1(buf(), _batch_size, parser, true));
if (stage1_is_ok != simdjson::SUCCESS) { if (stage1_is_ok != simdjson::SUCCESS) {
parser.error_code = stage1_is_ok; return parser.error = stage1_is_ok;
return parser.error_code;
} }
size_t last_index = find_last_json_buf_idx(buf(), _batch_size, parser); size_t last_index = find_last_json_buf_idx(buf(), _batch_size, parser);
if (last_index == 0) { if (last_index == 0) {
if (parser.n_structural_indexes == 0) { if (parser.n_structural_indexes == 0) {
parser.error_code = simdjson::EMPTY; return parser.error = simdjson::EMPTY;
return parser.error_code;
} }
} else { } else {
parser.n_structural_indexes = last_index + 1; parser.n_structural_indexes = last_index + 1;
@ -335,8 +329,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
else { else {
stage_1_thread.join(); stage_1_thread.join();
if (stage1_is_ok_thread != simdjson::SUCCESS) { if (stage1_is_ok_thread != simdjson::SUCCESS) {
parser.error_code = stage1_is_ok_thread; return parser.error = stage1_is_ok_thread;
return parser.error_code;
} }
std::swap(parser.structural_indexes, parser_thread.structural_indexes); std::swap(parser.structural_indexes, parser_thread.structural_indexes);
parser.n_structural_indexes = parser_thread.n_structural_indexes; parser.n_structural_indexes = parser_thread.n_structural_indexes;
@ -352,8 +345,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
_batch_size = trimmed_length_safe_utf8( _batch_size = trimmed_length_safe_utf8(
(const char *)(buf() + last_json_buffer_loc), _batch_size); (const char *)(buf() + last_json_buffer_loc), _batch_size);
if (_batch_size == 0) { if (_batch_size == 0) {
parser.error_code = simdjson::UTF8_ERROR; return parser.error = simdjson::UTF8_ERROR;
return parser.error_code;
} }
// let us capture read-only variables // let us capture read-only variables
const char *const b = buf() + last_json_buffer_loc; const char *const b = buf() + last_json_buffer_loc;
@ -362,7 +354,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
// this->stage1_is_ok_thread // this->stage1_is_ok_thread
// there is only one thread that may write to this value // there is only one thread that may write to this value
stage_1_thread = std::thread([this, b, bs] { stage_1_thread = std::thread([this, b, bs] {
this->stage1_is_ok_thread = best_stage1(b, bs, this->parser_thread, true); this->stage1_is_ok_thread = error_code(best_stage1(b, bs, this->parser_thread, true));
}); });
} }
} }
@ -403,7 +395,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
n_bytes_parsed += current_buffer_loc; n_bytes_parsed += current_buffer_loc;
_batch_size = (std::min)(_batch_size, remaining()); _batch_size = (std::min)(_batch_size, remaining());
_batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size); _batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size);
auto stage1_is_ok = (ErrorValues)best_stage1(buf(), _batch_size, parser, true); auto stage1_is_ok = (error_code)best_stage1(buf(), _batch_size, parser, true);
if (stage1_is_ok != simdjson::SUCCESS) { if (stage1_is_ok != simdjson::SUCCESS) {
return parser.on_error(stage1_is_ok); return parser.on_error(stage1_is_ok);
} }

View File

@ -17,54 +17,7 @@
#error simdjson requires a compiler compliant with the C++17 standard #error simdjson requires a compiler compliant with the C++17 standard
#endif #endif
#include <string> #include "simdjson/architecture.h"
#include "simdjson/error.h"
namespace simdjson {
// Represents the minimal architecture that would support an implementation
enum class Architecture {
UNSUPPORTED,
WESTMERE,
HASWELL,
ARM64,
// TODO remove 'native' in favor of runtime dispatch?
// the 'native' enum class value should point at a good default on the current
// machine
#ifdef IS_X86_64
NATIVE = WESTMERE
#elif defined(IS_ARM64)
NATIVE = ARM64
#endif
};
Architecture find_best_supported_architecture();
Architecture parse_architecture(char *architecture);
enum ErrorValues {
SUCCESS = 0,
SUCCESS_AND_HAS_MORE, //No errors and buffer still has more data
CAPACITY, // This parser can't support a document that big
MEMALLOC, // Error allocating memory, most likely out of memory
TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this
// is a generic error
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
STRING_ERROR, // Problem while parsing a string
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR, // the input is not valid UTF-8
UNINITIALIZED, // unknown error, or uninitialized document
EMPTY, // no structural element found
UNESCAPED_CHARS, // found unescaped characters in a string.
UNCLOSED_STRING, // missing quote at the end
UNEXPECTED_ERROR // indicative of a bug in simdjson
};
const std::string &error_message(const int);
struct invalid_json : public std::exception {
invalid_json(ErrorValues _error_code) : error_code{_error_code} {}
const char *what() const noexcept { return error_message(error_code).c_str(); }
ErrorValues error_code;
};
} // namespace simdjson
#endif // SIMDJSON_H #endif // SIMDJSON_H

View File

@ -10,26 +10,26 @@ namespace simdjson {
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings, // The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8. // you may want to call on a function like trimmed_length_safe_utf8.
// A function like find_last_json_buf_idx may also prove useful. // A function like find_last_json_buf_idx may also prove useful.
template <Architecture T = Architecture::NATIVE> template <architecture T = architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser, bool streaming); int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser, bool streaming);
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings. // Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings, // The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8. // you may want to call on a function like trimmed_length_safe_utf8.
// A function like find_last_json_buf_idx may also prove useful. // A function like find_last_json_buf_idx may also prove useful.
template <Architecture T = Architecture::NATIVE> template <architecture T = architecture::NATIVE>
int find_structural_bits(const char *buf, size_t len, document::parser &parser, bool streaming) { int find_structural_bits(const char *buf, size_t len, document::parser &parser, bool streaming) {
return find_structural_bits<T>((const uint8_t *)buf, len, parser, streaming); return find_structural_bits<T>((const uint8_t *)buf, len, parser, streaming);
} }
template <Architecture T = Architecture::NATIVE> template <architecture T = architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser) { int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser) {
return find_structural_bits<T>(buf, len, parser, false); return find_structural_bits<T>(buf, len, parser, false);
} }
template <Architecture T = Architecture::NATIVE> template <architecture T = architecture::NATIVE>
int find_structural_bits(const char *buf, size_t len, document::parser &parser) { int find_structural_bits(const char *buf, size_t len, document::parser &parser) {
return find_structural_bits<T>((const uint8_t *)buf, len, parser); return find_structural_bits<T>((const uint8_t *)buf, len, parser);
} }

View File

@ -7,13 +7,11 @@
namespace simdjson { namespace simdjson {
void init_state_machine(); template <architecture T = architecture::NATIVE>
template <Architecture T = Architecture::NATIVE>
WARN_UNUSED int WARN_UNUSED int
unified_machine(const uint8_t *buf, size_t len, document::parser &parser); unified_machine(const uint8_t *buf, size_t len, document::parser &parser);
template <Architecture T = Architecture::NATIVE> template <architecture T = architecture::NATIVE>
WARN_UNUSED int WARN_UNUSED int
unified_machine(const char *buf, size_t len, document::parser &parser) { unified_machine(const char *buf, size_t len, document::parser &parser) {
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser); return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser);
@ -22,11 +20,11 @@ unified_machine(const char *buf, size_t len, document::parser &parser) {
// Streaming // Streaming
template <Architecture T = Architecture::NATIVE> template <architecture T = architecture::NATIVE>
WARN_UNUSED int WARN_UNUSED int
unified_machine(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json); unified_machine(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json);
template <Architecture T = Architecture::NATIVE> template <architecture T = architecture::NATIVE>
int unified_machine(const char *buf, size_t len, document::parser &parser, size_t &next_json) { int unified_machine(const char *buf, size_t len, document::parser &parser, size_t &next_json) {
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser, next_json); return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser, next_json);
} }

View File

@ -29,7 +29,7 @@ set(SIMDJSON_SRC
stage2_build_tape.cpp stage2_build_tape.cpp
document.cpp document.cpp
document/parser.cpp document/parser.cpp
simdjson.cpp error.cpp
) )
# Load headers and sources # Load headers and sources

View File

@ -55,7 +55,7 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
namespace simdjson { namespace simdjson {
template <> template <>
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) { int find_structural_bits<architecture::ARM64>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
return arm64::stage1::find_structural_bits<64>(buf, len, parser, streaming); return arm64::stage1::find_structural_bits<64>(buf, len, parser, streaming);
} }

View File

@ -20,13 +20,13 @@ namespace simdjson {
template <> template <>
WARN_UNUSED int WARN_UNUSED int
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) { unified_machine<architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return arm64::stage2::unified_machine(buf, len, pj); return arm64::stage2::unified_machine(buf, len, pj);
} }
template <> template <>
WARN_UNUSED int WARN_UNUSED int
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) { unified_machine<architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
return arm64::stage2::unified_machine(buf, len, pj, next_json); return arm64::stage2::unified_machine(buf, len, pj, next_json);
} }

View File

@ -4,17 +4,17 @@
namespace simdjson { namespace simdjson {
// This is the internal one all others end up calling // This is the internal one all others end up calling
ErrorValues document::parser::try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept { error_code document::parser::try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
return (ErrorValues)json_parse(buf, len, *this, realloc_if_needed); return (error_code)json_parse(buf, len, *this, realloc_if_needed);
} }
ErrorValues document::parser::try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed) noexcept { error_code document::parser::try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed) noexcept {
auto result = try_parse(buf, len, realloc_if_needed); auto result = try_parse(buf, len, realloc_if_needed);
dst = result == SUCCESS ? &doc : nullptr; dst = result == SUCCESS ? &doc : nullptr;
return result; return result;
} }
ErrorValues document::parser::try_parse_into(const uint8_t *buf, size_t len, document & dst, bool realloc_if_needed) noexcept { error_code document::parser::try_parse_into(const uint8_t *buf, size_t len, document & dst, bool realloc_if_needed) noexcept {
auto result = try_parse(buf, len, realloc_if_needed); auto result = try_parse(buf, len, realloc_if_needed);
if (result != SUCCESS) { if (result != SUCCESS) {
return result; return result;
@ -22,13 +22,13 @@ ErrorValues document::parser::try_parse_into(const uint8_t *buf, size_t len, doc
// Take the document // Take the document
dst = (document&&)doc; dst = (document&&)doc;
valid = false; // Document has been taken; there is no valid document anymore valid = false; // Document has been taken; there is no valid document anymore
error_code = UNINITIALIZED; error = UNINITIALIZED;
return result; return result;
} }
const document &document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) { const document &document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
const document *dst; const document *dst;
ErrorValues result = try_parse(buf, len, dst, realloc_if_needed); error_code result = try_parse(buf, len, dst, realloc_if_needed);
if (result) { if (result) {
throw invalid_json(result); throw invalid_json(result);
} }
@ -37,7 +37,7 @@ const document &document::parser::parse(const uint8_t *buf, size_t len, bool rea
document document::parser::parse_new(const uint8_t *buf, size_t len, bool realloc_if_needed) { document document::parser::parse_new(const uint8_t *buf, size_t len, bool realloc_if_needed) {
document dst; document dst;
ErrorValues result = try_parse_into(buf, len, dst, realloc_if_needed); error_code result = try_parse_into(buf, len, dst, realloc_if_needed);
if (result) { if (result) {
throw invalid_json(result); throw invalid_json(result);
} }
@ -45,14 +45,14 @@ document document::parser::parse_new(const uint8_t *buf, size_t len, bool reallo
} }
WARN_UNUSED WARN_UNUSED
ErrorValues document::parser::init_parse(size_t len) { error_code document::parser::init_parse(size_t len) {
if (len > capacity()) { if (len > capacity()) {
return ErrorValues(error_code = CAPACITY); return error = CAPACITY;
} }
// If the last doc was taken, we need to allocate a new one // If the last doc was taken, we need to allocate a new one
if (!doc.tape) { if (!doc.tape) {
if (!doc.set_capacity(len)) { if (!doc.set_capacity(len)) {
return ErrorValues(error_code = MEMALLOC); return error = MEMALLOC;
} }
} }
return SUCCESS; return SUCCESS;
@ -128,15 +128,15 @@ void document::parser::init_stage2() {
current_string_buf_loc = doc.string_buf.get(); current_string_buf_loc = doc.string_buf.get();
current_loc = 0; current_loc = 0;
valid = false; valid = false;
error_code = UNINITIALIZED; error = UNINITIALIZED;
} }
bool document::parser::is_valid() const { return valid; } bool document::parser::is_valid() const { return valid; }
int document::parser::get_error_code() const { return error_code; } int document::parser::get_error_code() const { return error; }
std::string document::parser::get_error_message() const { std::string document::parser::get_error_message() const {
return error_message(error_code); return error_message(error);
} }
WARN_UNUSED WARN_UNUSED

View File

@ -1,7 +1,8 @@
#include "simdjson/simdjson.h" #include "simdjson/error.h"
#include <map> #include <map>
namespace simdjson { namespace simdjson {
const std::map<int, const std::string> error_strings = { const std::map<int, const std::string> error_strings = {
{SUCCESS, "No error"}, {SUCCESS, "No error"},
{SUCCESS_AND_HAS_MORE, "No error and buffer still has more data"}, {SUCCESS_AND_HAS_MORE, "No error and buffer still has more data"},
@ -30,11 +31,12 @@ const std::map<int, const std::string> error_strings = {
const std::string unexpected_error_msg {"Unexpected error"}; const std::string unexpected_error_msg {"Unexpected error"};
// returns a string matching the error code // returns a string matching the error code
const std::string &error_message(const int error_code) { const std::string &error_message(error_code code) {
auto keyvalue = error_strings.find(error_code); auto keyvalue = error_strings.find(code);
if(keyvalue == error_strings.end()) { if(keyvalue == error_strings.end()) {
return unexpected_error_msg; return unexpected_error_msg;
} }
return keyvalue->second; return keyvalue->second;
} }
} // namespace simdjson } // namespace simdjson

View File

@ -80,7 +80,7 @@ public:
// This may detect errors as well, such as unclosed string and certain UTF-8 errors. // This may detect errors as well, such as unclosed string and certain UTF-8 errors.
// if streaming is set to true, an unclosed string is allowed. // if streaming is set to true, an unclosed string is allowed.
// //
really_inline ErrorValues detect_errors_on_eof(bool streaming = false); really_inline error_code detect_errors_on_eof(bool streaming = false);
// //
// Return a mask of all string characters plus end quotes. // Return a mask of all string characters plus end quotes.
@ -213,7 +213,7 @@ really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint
return result; return result;
} }
really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool streaming) { really_inline error_code json_structural_scanner::detect_errors_on_eof(bool streaming) {
if ((prev_in_string) and (not streaming)) { if ((prev_in_string) and (not streaming)) {
return UNCLOSED_STRING; return UNCLOSED_STRING;
} }
@ -399,7 +399,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parse
json_structural_scanner scanner{parser.structural_indexes.get()}; json_structural_scanner scanner{parser.structural_indexes.get()};
scanner.scan<STEP_SIZE>(buf, len, utf8_checker); scanner.scan<STEP_SIZE>(buf, len, utf8_checker);
// we might tolerate an unclosed string if streaming is true // we might tolerate an unclosed string if streaming is true
ErrorValues error = scanner.detect_errors_on_eof(streaming); error_code error = scanner.detect_errors_on_eof(streaming);
if (unlikely(error != SUCCESS)) { if (unlikely(error != SUCCESS)) {
return error; return error;
} }

View File

@ -198,7 +198,7 @@ struct structural_parser {
} }
} }
WARN_UNUSED really_inline ErrorValues finish() { WARN_UNUSED really_inline error_code finish() {
// the string might not be NULL terminated. // the string might not be NULL terminated.
if ( i + 1 != doc_parser.n_structural_indexes ) { if ( i + 1 != doc_parser.n_structural_indexes ) {
return doc_parser.on_error(TAPE_ERROR); return doc_parser.on_error(TAPE_ERROR);
@ -214,7 +214,7 @@ struct structural_parser {
return doc_parser.on_success(SUCCESS); return doc_parser.on_success(SUCCESS);
} }
WARN_UNUSED really_inline ErrorValues error() { WARN_UNUSED really_inline error_code error() {
/* We do not need the next line because this is done by doc_parser.init_stage2(), /* We do not need the next line because this is done by doc_parser.init_stage2(),
* pessimistically. * pessimistically.
* doc_parser.is_valid = false; * doc_parser.is_valid = false;
@ -254,7 +254,7 @@ struct structural_parser {
} }
} }
WARN_UNUSED really_inline ErrorValues start(ret_address finish_state) { WARN_UNUSED really_inline error_code start(ret_address finish_state) {
doc_parser.init_stage2(); // sets is_valid to false doc_parser.init_stage2(); // sets is_valid to false
if (len > doc_parser.capacity()) { if (len > doc_parser.capacity()) {
return CAPACITY; return CAPACITY;

View File

@ -4,7 +4,7 @@ struct streaming_structural_parser: structural_parser {
really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_pj, size_t _i) : structural_parser(_buf, _len, _pj, _i) {} really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_pj, size_t _i) : structural_parser(_buf, _len, _pj, _i) {}
// override to add streaming // override to add streaming
WARN_UNUSED really_inline ErrorValues start(ret_address finish_parser) { WARN_UNUSED really_inline error_code start(ret_address finish_parser) {
doc_parser.init_stage2(); // sets is_valid to false doc_parser.init_stage2(); // sets is_valid to false
// Capacity ain't no thang for streaming, so we don't check it. // Capacity ain't no thang for streaming, so we don't check it.
// Advance to the first character as soon as possible // Advance to the first character as soon as possible
@ -17,7 +17,7 @@ struct streaming_structural_parser: structural_parser {
} }
// override to add streaming // override to add streaming
WARN_UNUSED really_inline ErrorValues finish() { WARN_UNUSED really_inline error_code finish() {
if ( i + 1 > doc_parser.n_structural_indexes ) { if ( i + 1 > doc_parser.n_structural_indexes ) {
return doc_parser.on_error(TAPE_ERROR); return doc_parser.on_error(TAPE_ERROR);
} }

View File

@ -172,7 +172,7 @@ struct utf8_checker {
} }
} }
really_inline ErrorValues errors() { really_inline error_code errors() {
return this->has_error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->has_error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }
}; // struct utf8_checker }; // struct utf8_checker

View File

@ -415,7 +415,7 @@ namespace utf8_validation {
} }
} }
really_inline ErrorValues errors() { really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }

View File

@ -292,7 +292,7 @@ struct utf8_checker {
} }
} }
really_inline ErrorValues errors() { really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }

View File

@ -174,7 +174,7 @@ struct utf8_checker {
} }
} }
really_inline ErrorValues errors() { really_inline error_code errors() {
return this->has_error.any() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->has_error.any() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }
}; // struct utf8_checker }; // struct utf8_checker

View File

@ -354,7 +354,7 @@ struct utf8_checker {
} }
} }
really_inline ErrorValues errors() { really_inline error_code errors() {
return (this->special_case_errors.any_bits_set_anywhere() | this->length_errors) ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return (this->special_case_errors.any_bits_set_anywhere() | this->length_errors) ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }
}; // struct utf8_checker }; // struct utf8_checker

View File

@ -55,7 +55,7 @@ TARGET_HASWELL
namespace simdjson { namespace simdjson {
template <> template <>
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) { int find_structural_bits<architecture::HASWELL>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
return haswell::stage1::find_structural_bits<128>(buf, len, parser, streaming); return haswell::stage1::find_structural_bits<128>(buf, len, parser, streaming);
} }

View File

@ -23,13 +23,13 @@ namespace simdjson {
template <> template <>
WARN_UNUSED int WARN_UNUSED int
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) { unified_machine<architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return haswell::stage2::unified_machine(buf, len, pj); return haswell::stage2::unified_machine(buf, len, pj);
} }
template <> template <>
WARN_UNUSED int WARN_UNUSED int
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj, UNUSED size_t &next_json) { unified_machine<architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj, UNUSED size_t &next_json) {
return haswell::stage2::unified_machine(buf, len, pj, next_json); return haswell::stage2::unified_machine(buf, len, pj, next_json);
} }

View File

@ -26,7 +26,7 @@ int json_parse(const char *buf, size_t len, ParsedJson &pj, bool realloc) {
realloc); realloc);
} }
Architecture find_best_supported_architecture() { architecture find_best_supported_architecture() {
constexpr uint32_t haswell_flags = constexpr uint32_t haswell_flags =
instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::AVX2 | instruction_set::PCLMULQDQ |
instruction_set::BMI1 | instruction_set::BMI2; instruction_set::BMI1 | instruction_set::BMI2;
@ -36,38 +36,38 @@ Architecture find_best_supported_architecture() {
uint32_t supports = detect_supported_architectures(); uint32_t supports = detect_supported_architectures();
// Order from best to worst (within architecture) // Order from best to worst (within architecture)
if ((haswell_flags & supports) == haswell_flags) if ((haswell_flags & supports) == haswell_flags)
return Architecture::HASWELL; return architecture::HASWELL;
if ((westmere_flags & supports) == westmere_flags) if ((westmere_flags & supports) == westmere_flags)
return Architecture::WESTMERE; return architecture::WESTMERE;
if (supports & instruction_set::NEON) if (supports & instruction_set::NEON)
return Architecture::ARM64; return architecture::ARM64;
return Architecture::UNSUPPORTED; return architecture::UNSUPPORTED;
} }
Architecture parse_architecture(char *architecture) { architecture parse_architecture(char *arch) {
if (!strcmp(architecture, "HASWELL")) { return Architecture::HASWELL; } if (!strcmp(arch, "HASWELL")) { return architecture::HASWELL; }
if (!strcmp(architecture, "WESTMERE")) { return Architecture::WESTMERE; } if (!strcmp(arch, "WESTMERE")) { return architecture::WESTMERE; }
if (!strcmp(architecture, "ARM64")) { return Architecture::ARM64; } if (!strcmp(arch, "ARM64")) { return architecture::ARM64; }
return Architecture::UNSUPPORTED; return architecture::UNSUPPORTED;
} }
// Responsible to select the best json_parse implementation // Responsible to select the best json_parse implementation
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) { int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) {
Architecture best_implementation = find_best_supported_architecture(); architecture best_implementation = find_best_supported_architecture();
// Selecting the best implementation // Selecting the best implementation
switch (best_implementation) { switch (best_implementation) {
#ifdef IS_X86_64 #ifdef IS_X86_64
case Architecture::HASWELL: case architecture::HASWELL:
json_parse_ptr.store(&json_parse_implementation<Architecture::HASWELL>, std::memory_order_relaxed); json_parse_ptr.store(&json_parse_implementation<architecture::HASWELL>, std::memory_order_relaxed);
break; break;
case Architecture::WESTMERE: case architecture::WESTMERE:
json_parse_ptr.store(&json_parse_implementation<Architecture::WESTMERE>, std::memory_order_relaxed); json_parse_ptr.store(&json_parse_implementation<architecture::WESTMERE>, std::memory_order_relaxed);
break; break;
#endif #endif
#ifdef IS_ARM64 #ifdef IS_ARM64
case Architecture::ARM64: case architecture::ARM64:
json_parse_ptr.store(&json_parse_implementation<Architecture::ARM64>, std::memory_order_relaxed); json_parse_ptr.store(&json_parse_implementation<architecture::ARM64>, std::memory_order_relaxed);
break; break;
#endif #endif
default: default:

View File

@ -57,7 +57,7 @@ TARGET_WESTMERE
namespace simdjson { namespace simdjson {
template <> template <>
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) { int find_structural_bits<architecture::WESTMERE>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
return westmere::stage1::find_structural_bits<64>(buf, len, parser, streaming); return westmere::stage1::find_structural_bits<64>(buf, len, parser, streaming);
} }

View File

@ -23,13 +23,13 @@ namespace simdjson {
template <> template <>
WARN_UNUSED int WARN_UNUSED int
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) { unified_machine<architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return westmere::stage2::unified_machine(buf, len, pj); return westmere::stage2::unified_machine(buf, len, pj);
} }
template <> template <>
WARN_UNUSED int WARN_UNUSED int
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) { unified_machine<architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
return westmere::stage2::unified_machine(buf, len, pj, next_json); return westmere::stage2::unified_machine(buf, len, pj, next_json);
} }