Lowercase Architecture and ErrorValues (#487)

ErrorValues -> error_code, Architecture -> architecture
This commit is contained in:
John Keiser 2020-02-14 15:21:28 -08:00 committed by GitHub
parent 083569fca8
commit bc8bc7d1a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 232 additions and 214 deletions

View File

@ -70,10 +70,10 @@ LIBHEADERS_HASWELL= src/haswell/bitmanipulation.h src/haswell/bitmask.h src/h
LIBHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
LIBHEADERS=src/jsoncharutils.h src/simdprune_tables.h $(LIBHEADERS_GENERIC) $(LIBHEADERS_ARM64) $(LIBHEADERS_HASWELL) $(LIBHEADERS_WESTMERE)
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document/iterator.h include/simdjson/document/parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document/iterator.h include/simdjson/document/parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/architecture.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
HEADERS=$(PUBHEADERS) $(LIBHEADERS)
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document/parser.cpp
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document/parser.cpp
MINIFIERHEADERS=include/simdjson/jsonminifier.h
MINIFIERLIBFILES=src/jsonminifier.cpp
@ -205,7 +205,7 @@ basictests:tests/basictests.cpp $(HEADERS) $(LIBFILES)
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o integer_tests $(LIBFILES) tests/integer_tests.cpp -I. $(LIBFLAGS)
@ -213,7 +213,7 @@ integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
pointercheck:tests/pointercheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o pointercheck $(LIBFILES) tests/pointercheck.cpp -I. $(LIBFLAGS)

View File

@ -16,7 +16,7 @@ INCLUDEPATH="$SCRIPTPATH/include"
# this list excludes the "src/generic headers"
ALLCFILES="
simdjson.cpp
error.cpp
jsonioutil.cpp
jsonminifier.cpp
jsonparser.cpp

View File

@ -82,7 +82,7 @@ void exit_usage(string message) {
}
struct option_struct {
Architecture architecture = Architecture::UNSUPPORTED;
architecture arch = architecture::UNSUPPORTED;
bool stage1_only = false;
int32_t iterations = 400;
@ -106,8 +106,8 @@ struct option_struct {
verbose = true;
break;
case 'a':
architecture = parse_architecture(optarg);
if (architecture == Architecture::UNSUPPORTED) {
arch = parse_architecture(optarg);
if (arch == architecture::UNSUPPORTED) {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
}
break;
@ -129,8 +129,8 @@ struct option_struct {
#endif
// If architecture is not specified, pick the best supported architecture by default
if (architecture == Architecture::UNSUPPORTED) {
architecture = find_best_supported_architecture();
if (arch == architecture::UNSUPPORTED) {
arch = find_best_supported_architecture();
}
}
@ -410,7 +410,7 @@ int main(int argc, char *argv[]) {
event_collector collector;
// Set up benchmarkers by reading all files
json_parser parser(options.architecture);
json_parser parser(options.arch);
feature_benchmarker features(parser, collector);
benchmarker gsoc_2018("jsonexamples/gsoc-2018.json", parser, collector);

View File

@ -45,17 +45,17 @@ using stage2_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
using jsonparse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, bool streaming);
stage1_functype* get_stage1_func(const Architecture architecture) {
switch (architecture) {
stage1_functype* get_stage1_func(const architecture arch) {
switch (arch) {
#ifdef IS_X86_64
case Architecture::HASWELL:
return &find_structural_bits<Architecture::HASWELL>;
case Architecture::WESTMERE:
return &find_structural_bits<Architecture::WESTMERE>;
case architecture::HASWELL:
return &find_structural_bits<architecture::HASWELL>;
case architecture::WESTMERE:
return &find_structural_bits<architecture::WESTMERE>;
#endif
#ifdef IS_ARM64
case Architecture::ARM64:
return &find_structural_bits<Architecture::ARM64>;
case architecture::ARM64:
return &find_structural_bits<architecture::ARM64>;
#endif
default:
std::cerr << "The processor is not supported by simdjson." << std::endl;
@ -63,19 +63,19 @@ stage1_functype* get_stage1_func(const Architecture architecture) {
}
}
stage2_functype* get_stage2_func(const Architecture architecture) {
switch (architecture) {
stage2_functype* get_stage2_func(const architecture arch) {
switch (arch) {
#ifdef IS_X86_64
case Architecture::HASWELL:
return &unified_machine<Architecture::HASWELL>;
case architecture::HASWELL:
return &unified_machine<architecture::HASWELL>;
break;
case Architecture::WESTMERE:
return &unified_machine<Architecture::WESTMERE>;
case architecture::WESTMERE:
return &unified_machine<architecture::WESTMERE>;
break;
#endif
#ifdef IS_ARM64
case Architecture::ARM64:
return &unified_machine<Architecture::ARM64>;
case architecture::ARM64:
return &unified_machine<architecture::ARM64>;
break;
#endif
default:
@ -84,19 +84,19 @@ stage2_functype* get_stage2_func(const Architecture architecture) {
}
}
jsonparse_functype* get_jsonparse_func(const Architecture architecture) {
switch (architecture) {
jsonparse_functype* get_jsonparse_func(const architecture arch) {
switch (arch) {
#ifdef IS_X86_64
case Architecture::HASWELL:
return &json_parse_implementation<Architecture::HASWELL>;
case architecture::HASWELL:
return &json_parse_implementation<architecture::HASWELL>;
break;
case Architecture::WESTMERE:
return &json_parse_implementation<Architecture::WESTMERE>;
case architecture::WESTMERE:
return &json_parse_implementation<architecture::WESTMERE>;
break;
#endif
#ifdef IS_ARM64
case Architecture::ARM64:
return &json_parse_implementation<Architecture::ARM64>;
case architecture::ARM64:
return &json_parse_implementation<architecture::ARM64>;
break;
#endif
default:
@ -106,15 +106,15 @@ jsonparse_functype* get_jsonparse_func(const Architecture architecture) {
}
struct json_parser {
const Architecture architecture;
const architecture arch;
stage1_functype *stage1_func;
stage2_functype *stage2_func;
jsonparse_functype *jsonparse_func;
json_parser(const Architecture _architecture) : architecture(_architecture) {
this->stage1_func = get_stage1_func(architecture);
this->stage2_func = get_stage2_func(architecture);
this->jsonparse_func = get_jsonparse_func(architecture);
json_parser(const architecture _arch) : arch(_arch) {
this->stage1_func = get_stage1_func(arch);
this->stage2_func = get_stage2_func(arch);
this->jsonparse_func = get_jsonparse_func(arch);
}
json_parser() : json_parser(find_best_supported_architecture()) {}

View File

@ -85,7 +85,7 @@ void exit_usage(string message) {
struct option_struct {
vector<char*> files;
Architecture architecture = Architecture::UNSUPPORTED;
architecture arch = architecture::UNSUPPORTED;
bool stage1_only = false;
int32_t iterations = 200;
@ -114,8 +114,8 @@ struct option_struct {
verbose = true;
break;
case 'a':
architecture = parse_architecture(optarg);
if (architecture == Architecture::UNSUPPORTED) {
arch = parse_architecture(optarg);
if (arch == architecture::UNSUPPORTED) {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
}
break;
@ -143,8 +143,8 @@ struct option_struct {
#endif
// If architecture is not specified, pick the best supported architecture by default
if (architecture == Architecture::UNSUPPORTED) {
architecture = find_best_supported_architecture();
if (arch == architecture::UNSUPPORTED) {
arch = find_best_supported_architecture();
}
// All remaining arguments are considered to be files
@ -186,7 +186,7 @@ int main(int argc, char *argv[]) {
}
// Set up benchmarkers by reading all files
json_parser parser(options.architecture);
json_parser parser(options.arch);
vector<benchmarker*> benchmarkers;
for (size_t i=0; i<options.files.size(); i++) {
benchmarkers.push_back(new benchmarker(options.files[i], parser, collector));

View File

@ -185,7 +185,7 @@ int main(int argc, char *argv[]) {
results.resize(evts.size());
for (uint32_t i = 0; i < iterations; i++) {
unified.start();
// The default template is simdjson::Architecture::NATIVE.
// The default template is simdjson::architecture::NATIVE.
bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) ==
simdjson::SUCCESS);
unified.end(results);

View File

@ -0,0 +1,30 @@
#ifndef SIMDJSON_ARCHITECTURE_H
#define SIMDJSON_ARCHITECTURE_H
namespace simdjson {
// Represents the minimal architecture that would support an implementation
enum class architecture {
UNSUPPORTED,
WESTMERE,
HASWELL,
ARM64,
// TODO remove 'native' in favor of runtime dispatch?
// the 'native' enum class value should point at a good default on the current
// machine
#ifdef IS_X86_64
NATIVE = WESTMERE
#elif defined(IS_ARM64)
NATIVE = ARM64
#endif
};
architecture find_best_supported_architecture();
architecture parse_architecture(char *arch_name);
// backcompat
using Architecture = architecture;
} // namespace simdjson
#endif // SIMDJSON_ARCHITECTURE_H

View File

@ -72,14 +72,14 @@ public:
//
// Returns != SUCCESS if the JSON is invalid.
//
static WARN_UNUSED ErrorValues try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
static WARN_UNUSED ErrorValues try_parse(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) {
static WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
static WARN_UNUSED error_code try_parse(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) {
return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
}
static WARN_UNUSED ErrorValues try_parse(const std::string &s, document &dst, bool realloc_if_needed = true) {
static WARN_UNUSED error_code try_parse(const std::string &s, document &dst, bool realloc_if_needed = true) {
return try_parse(s.data(), s.length(), dst, realloc_if_needed);
}
static WARN_UNUSED ErrorValues try_parse(const padded_string &s, document &dst) {
static WARN_UNUSED error_code try_parse(const padded_string &s, document &dst) {
return try_parse(s.data(), s.length(), dst, false);
}
@ -101,15 +101,15 @@ namespace simdjson {
inline WARN_UNUSED document document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
document::parser parser;
if (!parser.allocate_capacity(len)) {
throw invalid_json(ErrorValues(parser.error_code = MEMALLOC));
throw invalid_json(parser.error = MEMALLOC);
}
return parser.parse_new(buf, len, realloc_if_needed);
}
inline WARN_UNUSED ErrorValues document::try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed) noexcept {
inline WARN_UNUSED error_code document::try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed) noexcept {
document::parser parser;
if (!parser.allocate_capacity(len)) {
return ErrorValues(parser.error_code = MEMALLOC);
return parser.error = MEMALLOC;
}
return parser.try_parse_into(buf, len, dst, realloc_if_needed);
}

View File

@ -71,14 +71,14 @@ public:
//
// Returns != SUCCESS if the JSON is invalid.
//
WARN_UNUSED ErrorValues try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept;
WARN_UNUSED ErrorValues try_parse(const char *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept {
WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept;
WARN_UNUSED error_code try_parse(const char *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept {
return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
}
WARN_UNUSED ErrorValues try_parse(const std::string &s, const document *&dst, bool realloc_if_needed = true) noexcept {
WARN_UNUSED error_code try_parse(const std::string &s, const document *&dst, bool realloc_if_needed = true) noexcept {
return try_parse(s.data(), s.length(), dst, realloc_if_needed);
}
WARN_UNUSED ErrorValues try_parse(const padded_string &s, const document *&dst) noexcept {
WARN_UNUSED error_code try_parse(const padded_string &s, const document *&dst) noexcept {
return try_parse(s.data(), s.length(), dst, false);
}
@ -89,14 +89,14 @@ public:
//
// Returns != SUCCESS if the JSON is invalid.
//
WARN_UNUSED ErrorValues try_parse_into(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
WARN_UNUSED ErrorValues try_parse_into(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept {
WARN_UNUSED error_code try_parse_into(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
WARN_UNUSED error_code try_parse_into(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept {
return try_parse_into((const uint8_t *)buf, len, dst, realloc_if_needed);
}
WARN_UNUSED ErrorValues try_parse_into(const std::string &s, document &dst, bool realloc_if_needed = true) noexcept {
WARN_UNUSED error_code try_parse_into(const std::string &s, document &dst, bool realloc_if_needed = true) noexcept {
return try_parse_into(s.data(), s.length(), dst, realloc_if_needed);
}
WARN_UNUSED ErrorValues try_parse_into(const padded_string &s, document &dst) noexcept {
WARN_UNUSED error_code try_parse_into(const padded_string &s, document &dst) noexcept {
return try_parse_into(s.data(), s.length(), dst, false);
}
@ -143,7 +143,7 @@ public:
uint8_t *current_string_buf_loc;
bool valid{false};
int error_code{simdjson::UNINITIALIZED};
error_code error{simdjson::UNINITIALIZED};
// Document we're writing to
document doc;
@ -173,12 +173,12 @@ public:
// this should be called when parsing (right before writing the tapes)
void init_stage2();
really_inline ErrorValues on_error(ErrorValues new_error_code) {
error_code = new_error_code;
really_inline error_code on_error(error_code new_error_code) {
error = new_error_code;
return new_error_code;
}
really_inline ErrorValues on_success(ErrorValues success_code) {
error_code = success_code;
really_inline error_code on_success(error_code success_code) {
error = success_code;
valid = true;
return success_code;
}
@ -276,11 +276,11 @@ public:
// - Returns CAPACITY if the document is too large
// - Returns MEMALLOC if we needed to allocate memory and could not
//
WARN_UNUSED ErrorValues init_parse(size_t len);
WARN_UNUSED error_code init_parse(size_t len);
const document &get_document() const {
if (!is_valid()) {
throw invalid_json(ErrorValues(error_code));
throw invalid_json(error);
}
return doc;
}
@ -323,7 +323,7 @@ private:
doc.tape[saved_loc] |= val;
}
WARN_UNUSED ErrorValues try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept;
WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept;
//
// Set the current capacity: the largest document this parser can support without reallocating.

43
include/simdjson/error.h Normal file
View File

@ -0,0 +1,43 @@
#ifndef SIMDJSON_ERROR_H
#define SIMDJSON_ERROR_H
#include <string>
namespace simdjson {
enum error_code {
SUCCESS = 0,
SUCCESS_AND_HAS_MORE, //No errors and buffer still has more data
CAPACITY, // This parser can't support a document that big
MEMALLOC, // Error allocating memory, most likely out of memory
TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this
// is a generic error
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
STRING_ERROR, // Problem while parsing a string
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR, // the input is not valid UTF-8
UNINITIALIZED, // unknown error, or uninitialized document
EMPTY, // no structural element found
UNESCAPED_CHARS, // found unescaped characters in a string.
UNCLOSED_STRING, // missing quote at the end
UNEXPECTED_ERROR // indicative of a bug in simdjson
};
const std::string &error_message(error_code error);
struct invalid_json : public std::exception {
invalid_json(error_code _error) : error{_error} {}
const char *what() const noexcept { return error_message(error).c_str(); }
error_code error;
};
// backcompat
using ErrorValues = error_code;
inline const std::string &error_message(int error) { return error_message(error_code(error)); }
} // namespace simdjson
#endif // SIMDJSON_ERROR_H

View File

@ -12,9 +12,9 @@
namespace simdjson {
// json_parse_implementation is the generic function, it is specialized for
// various architectures, e.g., as
// json_parse_implementation<Architecture::HASWELL> or
// json_parse_implementation<Architecture::ARM64>
template <Architecture T>
// json_parse_implementation<architecture::HASWELL> or
// json_parse_implementation<architecture::ARM64>
template <architecture T>
int json_parse_implementation(const uint8_t *buf, size_t len, document::parser &parser,
bool realloc_if_needed = true) {
int result = parser.init_parse(len);

View File

@ -145,7 +145,7 @@ private:
size_t n_parsed_docs{0};
size_t n_bytes_parsed{0};
#ifdef SIMDJSON_THREADS_ENABLED
int stage1_is_ok_thread{0};
error_code stage1_is_ok_thread{SUCCESS};
std::thread stage_1_thread;
document::parser parser_thread;
#endif
@ -246,21 +246,21 @@ void find_the_best_supported_implementation() {
simdjson::instruction_set::SSE42 | simdjson::instruction_set::PCLMULQDQ;
if ((haswell_flags & supports) == haswell_flags) {
best_stage1 =
simdjson::find_structural_bits<simdjson::Architecture::HASWELL>;
best_stage2 = simdjson::unified_machine<simdjson::Architecture::HASWELL>;
simdjson::find_structural_bits<simdjson::architecture::HASWELL>;
best_stage2 = simdjson::unified_machine<simdjson::architecture::HASWELL>;
return;
}
if ((westmere_flags & supports) == westmere_flags) {
best_stage1 =
simdjson::find_structural_bits<simdjson::Architecture::WESTMERE>;
best_stage2 = simdjson::unified_machine<simdjson::Architecture::WESTMERE>;
simdjson::find_structural_bits<simdjson::architecture::WESTMERE>;
best_stage2 = simdjson::unified_machine<simdjson::architecture::WESTMERE>;
return;
}
#endif
#ifdef IS_ARM64
if (supports & instruction_set::NEON) {
best_stage1 = simdjson::find_structural_bits<Architecture::ARM64>;
best_stage2 = simdjson::unified_machine<Architecture::ARM64>;
best_stage1 = simdjson::find_structural_bits<architecture::ARM64>;
best_stage2 = simdjson::unified_machine<architecture::ARM64>;
return;
}
#endif
@ -293,18 +293,15 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
if (unlikely(parser.capacity() == 0)) {
const bool allocok = parser.allocate_capacity(_batch_size);
if (!allocok) {
parser.error_code = simdjson::MEMALLOC;
return parser.error_code;
return parser.error = simdjson::MEMALLOC;
}
} else if (unlikely(parser.capacity() < _batch_size)) {
parser.error_code = simdjson::CAPACITY;
return parser.error_code;
return parser.error = simdjson::CAPACITY;
}
if (unlikely(parser_thread.capacity() < _batch_size)) {
const bool allocok_thread = parser_thread.allocate_capacity(_batch_size);
if (!allocok_thread) {
parser.error_code = simdjson::MEMALLOC;
return parser.error_code;
return parser.error = simdjson::MEMALLOC;
}
}
if (unlikely(load_next_batch)) {
@ -313,19 +310,16 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
_batch_size = (std::min)(_batch_size, remaining());
_batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size);
if (_batch_size == 0) {
parser.error_code = simdjson::UTF8_ERROR;
return parser.error_code;
return parser.error = simdjson::UTF8_ERROR;
}
int stage1_is_ok = best_stage1(buf(), _batch_size, parser, true);
auto stage1_is_ok = error_code(best_stage1(buf(), _batch_size, parser, true));
if (stage1_is_ok != simdjson::SUCCESS) {
parser.error_code = stage1_is_ok;
return parser.error_code;
return parser.error = stage1_is_ok;
}
size_t last_index = find_last_json_buf_idx(buf(), _batch_size, parser);
if (last_index == 0) {
if (parser.n_structural_indexes == 0) {
parser.error_code = simdjson::EMPTY;
return parser.error_code;
return parser.error = simdjson::EMPTY;
}
} else {
parser.n_structural_indexes = last_index + 1;
@ -335,8 +329,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
else {
stage_1_thread.join();
if (stage1_is_ok_thread != simdjson::SUCCESS) {
parser.error_code = stage1_is_ok_thread;
return parser.error_code;
return parser.error = stage1_is_ok_thread;
}
std::swap(parser.structural_indexes, parser_thread.structural_indexes);
parser.n_structural_indexes = parser_thread.n_structural_indexes;
@ -352,8 +345,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
_batch_size = trimmed_length_safe_utf8(
(const char *)(buf() + last_json_buffer_loc), _batch_size);
if (_batch_size == 0) {
parser.error_code = simdjson::UTF8_ERROR;
return parser.error_code;
return parser.error = simdjson::UTF8_ERROR;
}
// let us capture read-only variables
const char *const b = buf() + last_json_buffer_loc;
@ -362,7 +354,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
// this->stage1_is_ok_thread
// there is only one thread that may write to this value
stage_1_thread = std::thread([this, b, bs] {
this->stage1_is_ok_thread = best_stage1(b, bs, this->parser_thread, true);
this->stage1_is_ok_thread = error_code(best_stage1(b, bs, this->parser_thread, true));
});
}
}
@ -403,7 +395,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
n_bytes_parsed += current_buffer_loc;
_batch_size = (std::min)(_batch_size, remaining());
_batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size);
auto stage1_is_ok = (ErrorValues)best_stage1(buf(), _batch_size, parser, true);
auto stage1_is_ok = (error_code)best_stage1(buf(), _batch_size, parser, true);
if (stage1_is_ok != simdjson::SUCCESS) {
return parser.on_error(stage1_is_ok);
}

View File

@ -17,54 +17,7 @@
#error simdjson requires a compiler compliant with the C++17 standard
#endif
#include <string>
#include "simdjson/architecture.h"
#include "simdjson/error.h"
namespace simdjson {
// Represents the minimal architecture that would support an implementation
enum class Architecture {
UNSUPPORTED,
WESTMERE,
HASWELL,
ARM64,
// TODO remove 'native' in favor of runtime dispatch?
// the 'native' enum class value should point at a good default on the current
// machine
#ifdef IS_X86_64
NATIVE = WESTMERE
#elif defined(IS_ARM64)
NATIVE = ARM64
#endif
};
Architecture find_best_supported_architecture();
Architecture parse_architecture(char *architecture);
enum ErrorValues {
SUCCESS = 0,
SUCCESS_AND_HAS_MORE, //No errors and buffer still has more data
CAPACITY, // This parser can't support a document that big
MEMALLOC, // Error allocating memory, most likely out of memory
TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this
// is a generic error
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
STRING_ERROR, // Problem while parsing a string
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR, // the input is not valid UTF-8
UNINITIALIZED, // unknown error, or uninitialized document
EMPTY, // no structural element found
UNESCAPED_CHARS, // found unescaped characters in a string.
UNCLOSED_STRING, // missing quote at the end
UNEXPECTED_ERROR // indicative of a bug in simdjson
};
const std::string &error_message(const int);
struct invalid_json : public std::exception {
invalid_json(ErrorValues _error_code) : error_code{_error_code} {}
const char *what() const noexcept { return error_message(error_code).c_str(); }
ErrorValues error_code;
};
} // namespace simdjson
#endif // SIMDJSON_H

View File

@ -10,26 +10,26 @@ namespace simdjson {
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8.
// A function like find_last_json_buf_idx may also prove useful.
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser, bool streaming);
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8.
// A function like find_last_json_buf_idx may also prove useful.
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
int find_structural_bits(const char *buf, size_t len, document::parser &parser, bool streaming) {
return find_structural_bits<T>((const uint8_t *)buf, len, parser, streaming);
}
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser) {
return find_structural_bits<T>(buf, len, parser, false);
}
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
int find_structural_bits(const char *buf, size_t len, document::parser &parser) {
return find_structural_bits<T>((const uint8_t *)buf, len, parser);
}

View File

@ -7,13 +7,11 @@
namespace simdjson {
void init_state_machine();
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
WARN_UNUSED int
unified_machine(const uint8_t *buf, size_t len, document::parser &parser);
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
WARN_UNUSED int
unified_machine(const char *buf, size_t len, document::parser &parser) {
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser);
@ -22,11 +20,11 @@ unified_machine(const char *buf, size_t len, document::parser &parser) {
// Streaming
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
WARN_UNUSED int
unified_machine(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json);
template <Architecture T = Architecture::NATIVE>
template <architecture T = architecture::NATIVE>
int unified_machine(const char *buf, size_t len, document::parser &parser, size_t &next_json) {
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser, next_json);
}

View File

@ -29,7 +29,7 @@ set(SIMDJSON_SRC
stage2_build_tape.cpp
document.cpp
document/parser.cpp
simdjson.cpp
error.cpp
)
# Load headers and sources

View File

@ -55,7 +55,7 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
namespace simdjson {
template <>
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
int find_structural_bits<architecture::ARM64>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
return arm64::stage1::find_structural_bits<64>(buf, len, parser, streaming);
}

View File

@ -20,13 +20,13 @@ namespace simdjson {
template <>
WARN_UNUSED int
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
unified_machine<architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return arm64::stage2::unified_machine(buf, len, pj);
}
template <>
WARN_UNUSED int
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
unified_machine<architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
return arm64::stage2::unified_machine(buf, len, pj, next_json);
}

View File

@ -4,17 +4,17 @@
namespace simdjson {
// This is the internal one all others end up calling
ErrorValues document::parser::try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
return (ErrorValues)json_parse(buf, len, *this, realloc_if_needed);
error_code document::parser::try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
return (error_code)json_parse(buf, len, *this, realloc_if_needed);
}
ErrorValues document::parser::try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed) noexcept {
error_code document::parser::try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed) noexcept {
auto result = try_parse(buf, len, realloc_if_needed);
dst = result == SUCCESS ? &doc : nullptr;
return result;
}
ErrorValues document::parser::try_parse_into(const uint8_t *buf, size_t len, document & dst, bool realloc_if_needed) noexcept {
error_code document::parser::try_parse_into(const uint8_t *buf, size_t len, document & dst, bool realloc_if_needed) noexcept {
auto result = try_parse(buf, len, realloc_if_needed);
if (result != SUCCESS) {
return result;
@ -22,13 +22,13 @@ ErrorValues document::parser::try_parse_into(const uint8_t *buf, size_t len, doc
// Take the document
dst = (document&&)doc;
valid = false; // Document has been taken; there is no valid document anymore
error_code = UNINITIALIZED;
error = UNINITIALIZED;
return result;
}
const document &document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
const document *dst;
ErrorValues result = try_parse(buf, len, dst, realloc_if_needed);
error_code result = try_parse(buf, len, dst, realloc_if_needed);
if (result) {
throw invalid_json(result);
}
@ -37,7 +37,7 @@ const document &document::parser::parse(const uint8_t *buf, size_t len, bool rea
document document::parser::parse_new(const uint8_t *buf, size_t len, bool realloc_if_needed) {
document dst;
ErrorValues result = try_parse_into(buf, len, dst, realloc_if_needed);
error_code result = try_parse_into(buf, len, dst, realloc_if_needed);
if (result) {
throw invalid_json(result);
}
@ -45,14 +45,14 @@ document document::parser::parse_new(const uint8_t *buf, size_t len, bool reallo
}
WARN_UNUSED
ErrorValues document::parser::init_parse(size_t len) {
error_code document::parser::init_parse(size_t len) {
if (len > capacity()) {
return ErrorValues(error_code = CAPACITY);
return error = CAPACITY;
}
// If the last doc was taken, we need to allocate a new one
if (!doc.tape) {
if (!doc.set_capacity(len)) {
return ErrorValues(error_code = MEMALLOC);
return error = MEMALLOC;
}
}
return SUCCESS;
@ -128,15 +128,15 @@ void document::parser::init_stage2() {
current_string_buf_loc = doc.string_buf.get();
current_loc = 0;
valid = false;
error_code = UNINITIALIZED;
error = UNINITIALIZED;
}
bool document::parser::is_valid() const { return valid; }
int document::parser::get_error_code() const { return error_code; }
int document::parser::get_error_code() const { return error; }
std::string document::parser::get_error_message() const {
return error_message(error_code);
return error_message(error);
}
WARN_UNUSED

View File

@ -1,7 +1,8 @@
#include "simdjson/simdjson.h"
#include "simdjson/error.h"
#include <map>
namespace simdjson {
const std::map<int, const std::string> error_strings = {
{SUCCESS, "No error"},
{SUCCESS_AND_HAS_MORE, "No error and buffer still has more data"},
@ -30,11 +31,12 @@ const std::map<int, const std::string> error_strings = {
const std::string unexpected_error_msg {"Unexpected error"};
// returns a string matching the error code
const std::string &error_message(const int error_code) {
auto keyvalue = error_strings.find(error_code);
const std::string &error_message(error_code code) {
auto keyvalue = error_strings.find(code);
if(keyvalue == error_strings.end()) {
return unexpected_error_msg;
}
return keyvalue->second;
}
} // namespace simdjson

View File

@ -80,7 +80,7 @@ public:
// This may detect errors as well, such as unclosed string and certain UTF-8 errors.
// if streaming is set to true, an unclosed string is allowed.
//
really_inline ErrorValues detect_errors_on_eof(bool streaming = false);
really_inline error_code detect_errors_on_eof(bool streaming = false);
//
// Return a mask of all string characters plus end quotes.
@ -213,7 +213,7 @@ really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint
return result;
}
really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool streaming) {
really_inline error_code json_structural_scanner::detect_errors_on_eof(bool streaming) {
if ((prev_in_string) and (not streaming)) {
return UNCLOSED_STRING;
}
@ -399,7 +399,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parse
json_structural_scanner scanner{parser.structural_indexes.get()};
scanner.scan<STEP_SIZE>(buf, len, utf8_checker);
// we might tolerate an unclosed string if streaming is true
ErrorValues error = scanner.detect_errors_on_eof(streaming);
error_code error = scanner.detect_errors_on_eof(streaming);
if (unlikely(error != SUCCESS)) {
return error;
}

View File

@ -198,7 +198,7 @@ struct structural_parser {
}
}
WARN_UNUSED really_inline ErrorValues finish() {
WARN_UNUSED really_inline error_code finish() {
// the string might not be NULL terminated.
if ( i + 1 != doc_parser.n_structural_indexes ) {
return doc_parser.on_error(TAPE_ERROR);
@ -214,7 +214,7 @@ struct structural_parser {
return doc_parser.on_success(SUCCESS);
}
WARN_UNUSED really_inline ErrorValues error() {
WARN_UNUSED really_inline error_code error() {
/* We do not need the next line because this is done by doc_parser.init_stage2(),
* pessimistically.
* doc_parser.is_valid = false;
@ -254,7 +254,7 @@ struct structural_parser {
}
}
WARN_UNUSED really_inline ErrorValues start(ret_address finish_state) {
WARN_UNUSED really_inline error_code start(ret_address finish_state) {
doc_parser.init_stage2(); // sets is_valid to false
if (len > doc_parser.capacity()) {
return CAPACITY;

View File

@ -4,7 +4,7 @@ struct streaming_structural_parser: structural_parser {
really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_pj, size_t _i) : structural_parser(_buf, _len, _pj, _i) {}
// override to add streaming
WARN_UNUSED really_inline ErrorValues start(ret_address finish_parser) {
WARN_UNUSED really_inline error_code start(ret_address finish_parser) {
doc_parser.init_stage2(); // sets is_valid to false
// Capacity ain't no thang for streaming, so we don't check it.
// Advance to the first character as soon as possible
@ -17,7 +17,7 @@ struct streaming_structural_parser: structural_parser {
}
// override to add streaming
WARN_UNUSED really_inline ErrorValues finish() {
WARN_UNUSED really_inline error_code finish() {
if ( i + 1 > doc_parser.n_structural_indexes ) {
return doc_parser.on_error(TAPE_ERROR);
}

View File

@ -172,7 +172,7 @@ struct utf8_checker {
}
}
really_inline ErrorValues errors() {
really_inline error_code errors() {
return this->has_error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
}
}; // struct utf8_checker

View File

@ -415,7 +415,7 @@ namespace utf8_validation {
}
}
really_inline ErrorValues errors() {
really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
}

View File

@ -292,7 +292,7 @@ struct utf8_checker {
}
}
really_inline ErrorValues errors() {
really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
}

View File

@ -174,7 +174,7 @@ struct utf8_checker {
}
}
really_inline ErrorValues errors() {
really_inline error_code errors() {
return this->has_error.any() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
}
}; // struct utf8_checker

View File

@ -354,7 +354,7 @@ struct utf8_checker {
}
}
really_inline ErrorValues errors() {
really_inline error_code errors() {
return (this->special_case_errors.any_bits_set_anywhere() | this->length_errors) ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
}
}; // struct utf8_checker

View File

@ -55,7 +55,7 @@ TARGET_HASWELL
namespace simdjson {
template <>
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
int find_structural_bits<architecture::HASWELL>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
return haswell::stage1::find_structural_bits<128>(buf, len, parser, streaming);
}

View File

@ -23,13 +23,13 @@ namespace simdjson {
template <>
WARN_UNUSED int
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
unified_machine<architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return haswell::stage2::unified_machine(buf, len, pj);
}
template <>
WARN_UNUSED int
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj, UNUSED size_t &next_json) {
unified_machine<architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj, UNUSED size_t &next_json) {
return haswell::stage2::unified_machine(buf, len, pj, next_json);
}

View File

@ -26,7 +26,7 @@ int json_parse(const char *buf, size_t len, ParsedJson &pj, bool realloc) {
realloc);
}
Architecture find_best_supported_architecture() {
architecture find_best_supported_architecture() {
constexpr uint32_t haswell_flags =
instruction_set::AVX2 | instruction_set::PCLMULQDQ |
instruction_set::BMI1 | instruction_set::BMI2;
@ -36,38 +36,38 @@ Architecture find_best_supported_architecture() {
uint32_t supports = detect_supported_architectures();
// Order from best to worst (within architecture)
if ((haswell_flags & supports) == haswell_flags)
return Architecture::HASWELL;
return architecture::HASWELL;
if ((westmere_flags & supports) == westmere_flags)
return Architecture::WESTMERE;
return architecture::WESTMERE;
if (supports & instruction_set::NEON)
return Architecture::ARM64;
return architecture::ARM64;
return Architecture::UNSUPPORTED;
return architecture::UNSUPPORTED;
}
Architecture parse_architecture(char *architecture) {
if (!strcmp(architecture, "HASWELL")) { return Architecture::HASWELL; }
if (!strcmp(architecture, "WESTMERE")) { return Architecture::WESTMERE; }
if (!strcmp(architecture, "ARM64")) { return Architecture::ARM64; }
return Architecture::UNSUPPORTED;
architecture parse_architecture(char *arch) {
if (!strcmp(arch, "HASWELL")) { return architecture::HASWELL; }
if (!strcmp(arch, "WESTMERE")) { return architecture::WESTMERE; }
if (!strcmp(arch, "ARM64")) { return architecture::ARM64; }
return architecture::UNSUPPORTED;
}
// Responsible to select the best json_parse implementation
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) {
Architecture best_implementation = find_best_supported_architecture();
architecture best_implementation = find_best_supported_architecture();
// Selecting the best implementation
switch (best_implementation) {
#ifdef IS_X86_64
case Architecture::HASWELL:
json_parse_ptr.store(&json_parse_implementation<Architecture::HASWELL>, std::memory_order_relaxed);
case architecture::HASWELL:
json_parse_ptr.store(&json_parse_implementation<architecture::HASWELL>, std::memory_order_relaxed);
break;
case Architecture::WESTMERE:
json_parse_ptr.store(&json_parse_implementation<Architecture::WESTMERE>, std::memory_order_relaxed);
case architecture::WESTMERE:
json_parse_ptr.store(&json_parse_implementation<architecture::WESTMERE>, std::memory_order_relaxed);
break;
#endif
#ifdef IS_ARM64
case Architecture::ARM64:
json_parse_ptr.store(&json_parse_implementation<Architecture::ARM64>, std::memory_order_relaxed);
case architecture::ARM64:
json_parse_ptr.store(&json_parse_implementation<architecture::ARM64>, std::memory_order_relaxed);
break;
#endif
default:

View File

@ -57,7 +57,7 @@ TARGET_WESTMERE
namespace simdjson {
template <>
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
int find_structural_bits<architecture::WESTMERE>(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) {
return westmere::stage1::find_structural_bits<64>(buf, len, parser, streaming);
}

View File

@ -23,13 +23,13 @@ namespace simdjson {
template <>
WARN_UNUSED int
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
unified_machine<architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
return westmere::stage2::unified_machine(buf, len, pj);
}
template <>
WARN_UNUSED int
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
unified_machine<architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
return westmere::stage2::unified_machine(buf, len, pj, next_json);
}