Merge branch 'master' of github.com:simdjson/simdjson

This commit is contained in:
Daniel Lemire 2020-08-19 10:36:01 -04:00
commit d5a44f9ad4
104 changed files with 1530 additions and 1169 deletions

View File

@ -26,7 +26,7 @@ static void recover_one_string(State& state) {
cerr << "could not parse string" << error << endl; cerr << "could not parse string" << error << endl;
return; return;
} }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::string_view v; std::string_view v;
error = doc.get(v); error = doc.get(v);
if (error) { if (error) {
@ -59,7 +59,7 @@ static void serialize_twitter(State& state) {
return; return;
} }
size_t bytes = 0; size_t bytes = 0;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::string serial = simdjson::minify(doc); std::string serial = simdjson::minify(doc);
bytes += serial.size(); bytes += serial.size();
benchmark::DoNotOptimize(serial); benchmark::DoNotOptimize(serial);
@ -83,7 +83,7 @@ static void numbers_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl; cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
return; return;
} }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (auto e : arr) { for (auto e : arr) {
double x; double x;
@ -105,7 +105,7 @@ static void numbers_size_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl; cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
return; return;
} }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -131,7 +131,7 @@ static void numbers_type_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl; cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return; return;
} }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (auto e : arr) { for (auto e : arr) {
dom::element_type actual_type = e.type(); dom::element_type actual_type = e.type();
@ -157,7 +157,7 @@ static void numbers_type_size_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl; cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
return; return;
} }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -182,7 +182,7 @@ static void numbers_load_scan(State& state) {
dom::parser parser; dom::parser parser;
dom::array arr; dom::array arr;
simdjson::error_code error; simdjson::error_code error;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
if ((error = parser.load(NUMBERS_JSON).get(arr))) { if ((error = parser.load(NUMBERS_JSON).get(arr))) {
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl; cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
@ -205,7 +205,7 @@ static void numbers_load_size_scan(State& state) {
dom::parser parser; dom::parser parser;
dom::array arr; dom::array arr;
simdjson::error_code error; simdjson::error_code error;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
if ((error = parser.load(NUMBERS_JSON).get(arr))) { if ((error = parser.load(NUMBERS_JSON).get(arr))) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl; cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
@ -234,7 +234,7 @@ static void numbers_exceptions_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (double x : arr) { for (double x : arr) {
container.push_back(x); container.push_back(x);
@ -249,7 +249,7 @@ static void numbers_exceptions_size_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -269,7 +269,7 @@ static void numbers_type_exceptions_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (auto e : arr) { for (auto e : arr) {
dom::element_type actual_type = e.type(); dom::element_type actual_type = e.type();
@ -288,7 +288,7 @@ static void numbers_type_exceptions_size_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -309,7 +309,7 @@ BENCHMARK(numbers_type_exceptions_size_scan);
static void numbers_exceptions_load_scan(State& state) { static void numbers_exceptions_load_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
std::vector<double> container; std::vector<double> container;
@ -325,7 +325,7 @@ BENCHMARK(numbers_exceptions_load_scan);
static void numbers_exceptions_load_size_scan(State& state) { static void numbers_exceptions_load_size_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
std::vector<double> container; std::vector<double> container;
@ -346,7 +346,7 @@ static void twitter_count(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
uint64_t result_count = doc["search_metadata"]["count"]; uint64_t result_count = doc["search_metadata"]["count"];
if (result_count != 100) { return; } if (result_count != 100) { return; }
} }
@ -359,7 +359,7 @@ static void iterator_twitter_count(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
padded_string json = padded_string::load(TWITTER_JSON); padded_string json = padded_string::load(TWITTER_JSON);
ParsedJson pj = build_parsed_json(json); ParsedJson pj = build_parsed_json(json);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
ParsedJson::Iterator iter(pj); ParsedJson::Iterator iter(pj);
// uint64_t result_count = doc["search_metadata"]["count"]; // uint64_t result_count = doc["search_metadata"]["count"];
if (!iter.move_to_key("search_metadata")) { return; } if (!iter.move_to_key("search_metadata")) { return; }
@ -377,7 +377,7 @@ static void twitter_default_profile(State& state) {
// Count unique users with a default profile. // Count unique users with a default profile.
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
set<string_view> default_users; set<string_view> default_users;
for (dom::object tweet : doc["statuses"]) { for (dom::object tweet : doc["statuses"]) {
dom::object user = tweet["user"]; dom::object user = tweet["user"];
@ -395,7 +395,7 @@ static void twitter_image_sizes(State& state) {
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
simdjson::error_code error; simdjson::error_code error;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes; set<tuple<uint64_t, uint64_t>> image_sizes;
for (dom::object tweet : doc["statuses"]) { for (dom::object tweet : doc["statuses"]) {
dom::array media; dom::array media;
@ -420,7 +420,7 @@ static void error_code_twitter_count(State& state) noexcept {
simdjson::error_code error; simdjson::error_code error;
dom::element doc; dom::element doc;
if ((error = parser.load(TWITTER_JSON).get(doc))) { return; } if ((error = parser.load(TWITTER_JSON).get(doc))) { return; }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
uint64_t value; uint64_t value;
if ((error = doc["search_metadata"]["count"].get(value))) { return; } if ((error = doc["search_metadata"]["count"].get(value))) { return; }
if (value != 100) { return; } if (value != 100) { return; }
@ -434,7 +434,7 @@ static void error_code_twitter_default_profile(State& state) noexcept {
simdjson::error_code error; simdjson::error_code error;
dom::element doc; dom::element doc;
if ((error = parser.load(TWITTER_JSON).get(doc))) { std::cerr << error << std::endl; return; } if ((error = parser.load(TWITTER_JSON).get(doc))) { std::cerr << error << std::endl; return; }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
set<string_view> default_users; set<string_view> default_users;
dom::array tweets; dom::array tweets;
@ -464,7 +464,7 @@ static void iterator_twitter_default_profile(State& state) {
auto error = padded_string::load(TWITTER_JSON).get(json); auto error = padded_string::load(TWITTER_JSON).get(json);
if (error) { std::cerr << error << std::endl; return; } if (error) { std::cerr << error << std::endl; return; }
ParsedJson pj = build_parsed_json(json); ParsedJson pj = build_parsed_json(json);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
set<string_view> default_users; set<string_view> default_users;
ParsedJson::Iterator iter(pj); ParsedJson::Iterator iter(pj);
@ -505,7 +505,7 @@ static void error_code_twitter_image_sizes(State& state) noexcept {
simdjson::error_code error; simdjson::error_code error;
dom::element doc; dom::element doc;
if ((error = parser.load(TWITTER_JSON).get(doc))) { std::cerr << error << std::endl; return; } if ((error = parser.load(TWITTER_JSON).get(doc))) { std::cerr << error << std::endl; return; }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes; set<tuple<uint64_t, uint64_t>> image_sizes;
dom::array statuses; dom::array statuses;
if ((error = doc["statuses"].get(statuses))) { return; } if ((error = doc["statuses"].get(statuses))) { return; }
@ -537,7 +537,7 @@ static void iterator_twitter_image_sizes(State& state) {
auto error = padded_string::load(TWITTER_JSON).get(json); auto error = padded_string::load(TWITTER_JSON).get(json);
if (error) { std::cerr << error << std::endl; return; } if (error) { std::cerr << error << std::endl; return; }
ParsedJson pj = build_parsed_json(json); ParsedJson pj = build_parsed_json(json);
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes; set<tuple<uint64_t, uint64_t>> image_sizes;
ParsedJson::Iterator iter(pj); ParsedJson::Iterator iter(pj);
@ -602,7 +602,7 @@ static void print_json(State& state) noexcept {
int code = json_parse(json, parser); int code = json_parse(json, parser);
if (code) { cerr << error_message(code) << endl; return; } if (code) { cerr << error_message(code) << endl; return; }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
std::stringstream s; std::stringstream s;
if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; } if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; }
} }

View File

@ -25,7 +25,7 @@ static void unicode_validate_twitter(State& state) {
return; return;
} }
size_t bytes = 0; size_t bytes = 0;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
bool is_ok = simdjson::validate_utf8(docdata.data(), docdata.size()); bool is_ok = simdjson::validate_utf8(docdata.data(), docdata.size());
bytes += docdata.size(); bytes += docdata.size();
benchmark::DoNotOptimize(is_ok); benchmark::DoNotOptimize(is_ok);
@ -55,7 +55,7 @@ static void parse_twitter(State& state) {
return; return;
} }
size_t bytes = 0; size_t bytes = 0;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
dom::element doc; dom::element doc;
bytes += docdata.size(); bytes += docdata.size();
if ((error = parser.parse(docdata).get(doc))) { if ((error = parser.parse(docdata).get(doc))) {
@ -90,7 +90,7 @@ static void parse_gsoc(State& state) {
return; return;
} }
size_t bytes = 0; size_t bytes = 0;
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
bytes += docdata.size(); bytes += docdata.size();
dom::element doc; dom::element doc;
if ((error = parser.parse(docdata).get(doc))) { if ((error = parser.parse(docdata).get(doc))) {
@ -116,7 +116,7 @@ SIMDJSON_DISABLE_DEPRECATED_WARNING
static void json_parse(State& state) { static void json_parse(State& state) {
ParsedJson pj; ParsedJson pj;
if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; } if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
auto error = json_parse(EMPTY_ARRAY, pj); auto error = json_parse(EMPTY_ARRAY, pj);
if (error) { return; } if (error) { return; }
} }
@ -126,7 +126,7 @@ BENCHMARK(json_parse);
static void parser_parse_error_code(State& state) { static void parser_parse_error_code(State& state) {
dom::parser parser; dom::parser parser;
if (parser.allocate(EMPTY_ARRAY.length())) { return; } if (parser.allocate(EMPTY_ARRAY.length())) { return; }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
auto error = parser.parse(EMPTY_ARRAY).error(); auto error = parser.parse(EMPTY_ARRAY).error();
if (error) { return; } if (error) { return; }
} }
@ -138,9 +138,9 @@ BENCHMARK(parser_parse_error_code);
static void parser_parse_exception(State& state) { static void parser_parse_exception(State& state) {
dom::parser parser; dom::parser parser;
if (parser.allocate(EMPTY_ARRAY.length())) { return; } if (parser.allocate(EMPTY_ARRAY.length())) { return; }
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
try { try {
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY); SIMDJSON_UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
} catch(simdjson_error &j) { } catch(simdjson_error &j) {
cout << j.what() << endl; cout << j.what() << endl;
return; return;
@ -154,7 +154,7 @@ BENCHMARK(parser_parse_exception);
SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_DEPRECATED_WARNING
static void build_parsed_json(State& state) { static void build_parsed_json(State& state) {
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY); dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY);
if (!parser.valid) { return; } if (!parser.valid) { return; }
} }
@ -163,7 +163,7 @@ SIMDJSON_POP_DISABLE_WARNINGS
BENCHMARK(build_parsed_json); BENCHMARK(build_parsed_json);
static void document_parse_error_code(State& state) { static void document_parse_error_code(State& state) {
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
dom::parser parser; dom::parser parser;
auto error = parser.parse(EMPTY_ARRAY).error(); auto error = parser.parse(EMPTY_ARRAY).error();
if (error) { return; } if (error) { return; }
@ -174,10 +174,10 @@ BENCHMARK(document_parse_error_code);
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
static void document_parse_exception(State& state) { static void document_parse_exception(State& state) {
for (UNUSED auto _ : state) { for (SIMDJSON_UNUSED auto _ : state) {
try { try {
dom::parser parser; dom::parser parser;
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY); SIMDJSON_UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
} catch(simdjson_error &j) { } catch(simdjson_error &j) {
cout << j.what() << endl; cout << j.what() << endl;
return; return;

View File

@ -158,7 +158,7 @@ struct feature_benchmarker {
} }
really_inline void run_iterations(size_t iterations, bool stage1_only=false) { simdjson_really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
struct7.run_iterations(iterations, stage1_only); struct7.run_iterations(iterations, stage1_only);
struct7_miss.run_iterations(iterations, stage1_only); struct7_miss.run_iterations(iterations, stage1_only);
struct7_full.run_iterations(iterations, stage1_only); struct7_full.run_iterations(iterations, stage1_only);

View File

@ -308,7 +308,7 @@ struct benchmarker {
return all_stages_without_allocation.iterations; return all_stages_without_allocation.iterations;
} }
really_inline void run_iteration(bool stage1_only, bool hotbuffers=false) { simdjson_really_inline void run_iteration(bool stage1_only, bool hotbuffers=false) {
// Allocate dom::parser // Allocate dom::parser
collector.start(); collector.start();
dom::parser parser; dom::parser parser;
@ -384,7 +384,7 @@ struct benchmarker {
loop << all_loop_count; loop << all_loop_count;
} }
really_inline void run_iterations(size_t iterations, bool stage1_only, bool hotbuffers=false) { simdjson_really_inline void run_iterations(size_t iterations, bool stage1_only, bool hotbuffers=false) {
for (size_t i = 0; i<iterations; i++) { for (size_t i = 0; i<iterations; i++) {
run_iteration(stage1_only, hotbuffers); run_iteration(stage1_only, hotbuffers);
} }

View File

@ -65,7 +65,7 @@ void print_vec(const std::vector<int64_t> &v) {
// clang-format on // clang-format on
really_inline void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::element element); simdjson_really_inline void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::element element);
void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::array array) { void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::array array) {
for (auto child : array) { for (auto child : array) {
simdjson_recurse(v, child); simdjson_recurse(v, child);
@ -97,8 +97,8 @@ void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::object object) {
} }
} }
} }
really_inline void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::element element) { simdjson_really_inline void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::element element) {
UNUSED simdjson::error_code error; SIMDJSON_UNUSED simdjson::error_code error;
simdjson::dom::array array; simdjson::dom::array array;
simdjson::dom::object object; simdjson::dom::object object;
if (not (error = element.get(array))) { if (not (error = element.get(array))) {
@ -108,7 +108,7 @@ really_inline void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::ele
} }
} }
really_inline std::vector<int64_t> simdjson_really_inline std::vector<int64_t>
simdjson_just_dom(simdjson::dom::element doc) { simdjson_just_dom(simdjson::dom::element doc) {
std::vector<int64_t> answer; std::vector<int64_t> answer;
simdjson_recurse(answer, doc); simdjson_recurse(answer, doc);
@ -116,7 +116,7 @@ simdjson_just_dom(simdjson::dom::element doc) {
return answer; return answer;
} }
really_inline std::vector<int64_t> simdjson_really_inline std::vector<int64_t>
simdjson_compute_stats(const simdjson::padded_string &p) { simdjson_compute_stats(const simdjson::padded_string &p) {
std::vector<int64_t> answer; std::vector<int64_t> answer;
simdjson::dom::parser parser; simdjson::dom::parser parser;
@ -129,7 +129,7 @@ simdjson_compute_stats(const simdjson::padded_string &p) {
return answer; return answer;
} }
really_inline simdjson::error_code simdjson_really_inline simdjson::error_code
simdjson_just_parse(const simdjson::padded_string &p) { simdjson_just_parse(const simdjson::padded_string &p) {
simdjson::dom::parser parser; simdjson::dom::parser parser;
return parser.parse(p).error(); return parser.parse(p).error();
@ -187,7 +187,7 @@ void sajson_traverse(std::vector<int64_t> &answer, const sajson::value &node) {
} }
} }
really_inline std::vector<int64_t> simdjson_really_inline std::vector<int64_t>
sasjon_just_dom(sajson::document &d) { sasjon_just_dom(sajson::document &d) {
std::vector<int64_t> answer; std::vector<int64_t> answer;
sajson_traverse(answer, d.get_root()); sajson_traverse(answer, d.get_root());
@ -195,7 +195,7 @@ sasjon_just_dom(sajson::document &d) {
return answer; return answer;
} }
really_inline std::vector<int64_t> simdjson_really_inline std::vector<int64_t>
sasjon_compute_stats(const simdjson::padded_string &p) { sasjon_compute_stats(const simdjson::padded_string &p) {
std::vector<int64_t> answer; std::vector<int64_t> answer;
char *buffer = (char *)malloc(p.size()); char *buffer = (char *)malloc(p.size());
@ -212,7 +212,7 @@ sasjon_compute_stats(const simdjson::padded_string &p) {
return answer; return answer;
} }
really_inline bool simdjson_really_inline bool
sasjon_just_parse(const simdjson::padded_string &p) { sasjon_just_parse(const simdjson::padded_string &p) {
char *buffer = (char *)malloc(p.size()); char *buffer = (char *)malloc(p.size());
memcpy(buffer, p.data(), p.size()); memcpy(buffer, p.data(), p.size());
@ -263,7 +263,7 @@ void rapid_traverse(std::vector<int64_t> &answer, const rapidjson::Value &v) {
} }
} }
really_inline std::vector<int64_t> simdjson_really_inline std::vector<int64_t>
rapid_just_dom(rapidjson::Document &d) { rapid_just_dom(rapidjson::Document &d) {
std::vector<int64_t> answer; std::vector<int64_t> answer;
rapid_traverse(answer, d); rapid_traverse(answer, d);
@ -271,7 +271,7 @@ rapid_just_dom(rapidjson::Document &d) {
return answer; return answer;
} }
really_inline std::vector<int64_t> simdjson_really_inline std::vector<int64_t>
rapid_compute_stats(const simdjson::padded_string &p) { rapid_compute_stats(const simdjson::padded_string &p) {
std::vector<int64_t> answer; std::vector<int64_t> answer;
char *buffer = (char *)malloc(p.size() + 1); char *buffer = (char *)malloc(p.size() + 1);
@ -289,7 +289,7 @@ rapid_compute_stats(const simdjson::padded_string &p) {
return answer; return answer;
} }
really_inline bool simdjson_really_inline bool
rapid_just_parse(const simdjson::padded_string &p) { rapid_just_parse(const simdjson::padded_string &p) {
char *buffer = (char *)malloc(p.size() + 1); char *buffer = (char *)malloc(p.size() + 1);
memcpy(buffer, p.data(), p.size()); memcpy(buffer, p.data(), p.size());

View File

@ -133,13 +133,13 @@ struct event_collector {
} }
#endif #endif
really_inline void start() { simdjson_really_inline void start() {
#if defined(__linux) #if defined(__linux)
linux_events.start(); linux_events.start();
#endif #endif
start_clock = steady_clock::now(); start_clock = steady_clock::now();
} }
really_inline event_count& end() { simdjson_really_inline event_count& end() {
time_point<steady_clock> end_clock = steady_clock::now(); time_point<steady_clock> end_clock = steady_clock::now();
#if defined(__linux) #if defined(__linux)
linux_events.end(count.event_counts); linux_events.end(count.event_counts);

View File

@ -5,7 +5,7 @@
#include <iostream> #include <iostream>
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
never_inline simdjson_never_inline
double bench(std::string filename, simdjson::padded_string& p) { double bench(std::string filename, simdjson::padded_string& p) {
std::chrono::time_point<std::chrono::steady_clock> start_clock = std::chrono::time_point<std::chrono::steady_clock> start_clock =
std::chrono::steady_clock::now(); std::chrono::steady_clock::now();

View File

@ -48,7 +48,7 @@ void print_stat(const stat_t &s) {
s.true_count, s.false_count); s.true_count, s.false_count);
} }
really_inline void simdjson_process_atom(stat_t &s, simdjson_really_inline void simdjson_process_atom(stat_t &s,
simdjson::dom::element element) { simdjson::dom::element element) {
if (element.is<double>()) { if (element.is<double>()) {
s.number_count++; s.number_count++;
@ -102,7 +102,7 @@ void simdjson_recurse(stat_t &s, simdjson::dom::element element) {
} }
} }
never_inline stat_t simdjson_compute_stats(const simdjson::padded_string &p) { simdjson_never_inline stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
stat_t s{}; stat_t s{};
simdjson::dom::parser parser; simdjson::dom::parser parser;
simdjson::dom::element doc; simdjson::dom::element doc;
@ -212,13 +212,13 @@ static void RapidGenStat(Stat &stat, const rapidjson::Value &v) {
break; break;
} }
} }
never_inline Stat rapidjson_compute_stats_ref(const rapidjson::Value &doc) { simdjson_never_inline Stat rapidjson_compute_stats_ref(const rapidjson::Value &doc) {
Stat s{}; Stat s{};
RapidGenStat(s, doc); RapidGenStat(s, doc);
return s; return s;
} }
never_inline Stat simdjson_never_inline Stat
simdjson_compute_stats_refplus(const simdjson::dom::element &doc) { simdjson_compute_stats_refplus(const simdjson::dom::element &doc) {
Stat s{}; Stat s{};
GenStatPlus(s, doc); GenStatPlus(s, doc);
@ -268,7 +268,7 @@ void sajson_traverse(stat_t &stats, const sajson::value &node) {
} }
} }
never_inline stat_t sasjon_compute_stats(const simdjson::padded_string &p) { simdjson_never_inline stat_t sasjon_compute_stats(const simdjson::padded_string &p) {
stat_t answer{}; stat_t answer{};
char *buffer = (char *)malloc(p.size()); char *buffer = (char *)malloc(p.size());
if (buffer == nullptr) { if (buffer == nullptr) {
@ -329,7 +329,7 @@ void rapid_traverse(stat_t &stats, const rapidjson::Value &v) {
} }
} }
never_inline stat_t rapid_compute_stats(const simdjson::padded_string &p) { simdjson_never_inline stat_t rapid_compute_stats(const simdjson::padded_string &p) {
stat_t answer{}; stat_t answer{};
char *buffer = (char *)malloc(p.size() + 1); char *buffer = (char *)malloc(p.size() + 1);
if (buffer == nullptr) { if (buffer == nullptr) {
@ -355,7 +355,7 @@ never_inline stat_t rapid_compute_stats(const simdjson::padded_string &p) {
return answer; return answer;
} }
never_inline stat_t simdjson_never_inline stat_t
rapid_accurate_compute_stats(const simdjson::padded_string &p) { rapid_accurate_compute_stats(const simdjson::padded_string &p) {
stat_t answer{}; stat_t answer{};
char *buffer = (char *)malloc(p.size() + 1); char *buffer = (char *)malloc(p.size() + 1);

View File

@ -53,7 +53,7 @@ using namespace rapidjson;
#ifdef ALLPARSER #ifdef ALLPARSER
// fastjson has a tricky interface // fastjson has a tricky interface
void on_json_error(void *, UNUSED const fastjson::ErrorContext &ec) { void on_json_error(void *, SIMDJSON_UNUSED const fastjson::ErrorContext &ec) {
// std::cerr<<"ERROR: "<<ec.mesg<<std::endl; // std::cerr<<"ERROR: "<<ec.mesg<<std::endl;
} }
bool fastjson_parse(const char *input) { bool fastjson_parse(const char *input) {
@ -65,7 +65,7 @@ bool fastjson_parse(const char *input) {
// end of fastjson stuff // end of fastjson stuff
#endif #endif
never_inline size_t sum_line_lengths(std::stringstream & is) { simdjson_never_inline size_t sum_line_lengths(std::stringstream & is) {
std::string line; std::string line;
size_t sumofalllinelengths{0}; size_t sumofalllinelengths{0};
while(std::getline(is, line)) { while(std::getline(is, line)) {

View File

@ -41,7 +41,7 @@ using stat_t = struct stat_s;
really_inline void simdjson_process_atom(stat_t &s, simdjson_really_inline void simdjson_process_atom(stat_t &s,
simdjson::dom::element element) { simdjson::dom::element element) {
if (element.is<int64_t>()) { if (element.is<int64_t>()) {
s.integer_count++; s.integer_count++;

View File

@ -287,7 +287,7 @@ JSON Pointer
------------ ------------
The simdjson library also supports [JSON pointer](https://tools.ietf.org/html/rfc6901) through the The simdjson library also supports [JSON pointer](https://tools.ietf.org/html/rfc6901) through the
at() method, letting you reach further down into the document in a single call: `at_pointer()` method, letting you reach further down into the document in a single call:
```c++ ```c++
auto cars_json = R"( [ auto cars_json = R"( [
@ -297,9 +297,39 @@ auto cars_json = R"( [
] )"_padded; ] )"_padded;
dom::parser parser; dom::parser parser;
dom::element cars = parser.parse(cars_json); dom::element cars = parser.parse(cars_json);
cout << cars.at("0/tire_pressure/1") << endl; // Prints 39.9 cout << cars.at_pointer("/0/tire_pressure/1") << endl; // Prints 39.9
``` ```
A JSON Path is a sequence of segments each starting with the '/' character. Within arrays, an integer
index allows you to select the indexed node. Within objects, the string value of the key allows you to
select the value. If your keys contain the characters '/' or '~', they must be escaped as '~1' and
'~0' respectively. An empty JSON Path refers to the whole document.
We also extend the JSON Pointer support to include *relative* paths.
You can apply a JSON path to any node and the path gets interpreted relatively, as if the currrent node were a whole JSON document.
Consider the following example:
```c++
auto cars_json = R"( [
{ "make": "Toyota", "model": "Camry", "year": 2018, "tire_pressure": [ 40.1, 39.9, 37.7, 40.4 ] },
{ "make": "Kia", "model": "Soul", "year": 2012, "tire_pressure": [ 30.1, 31.0, 28.6, 28.7 ] },
{ "make": "Toyota", "model": "Tercel", "year": 1999, "tire_pressure": [ 29.8, 30.0, 30.2, 30.5 ] }
] )"_padded;
dom::parser parser;
dom::element cars = parser.parse(cars_json);
cout << cars.at_pointer("/0/tire_pressure/1") << endl; // Prints 39.9
for (dom::element car_element : cars) {
dom::object car;
simdjson::error_code error;
if ((error = car_element.get(car))) { std::cerr << error << std::endl; return; }
double x = car.at_pointer("/tire_pressure/1");
cout << x << endl; // Prints 39.9, 31 and 30
}
```
Error Handling Error Handling
-------------- --------------

View File

@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if (error) { return 1; } if (error) { return 1; }
NulOStream os; NulOStream os;
UNUSED auto dumpstatus = elem.dump_raw_tape(os); SIMDJSON_UNUSED auto dumpstatus = elem.dump_raw_tape(os);
return 0; return 0;
} }

View File

@ -4,7 +4,7 @@
#include <string> #include <string>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
simdjson::dom::parser parser; simdjson::dom::parser parser;
UNUSED simdjson::dom::element elem; SIMDJSON_UNUSED simdjson::dom::element elem;
UNUSED auto error = parser.parse(Data, Size).get(elem); SIMDJSON_UNUSED auto error = parser.parse(Data, Size).get(elem);
return 0; return 0;
} }

View File

@ -38,34 +38,34 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024;
#if defined(__GNUC__) #if defined(__GNUC__)
// Marks a block with a name so that MCA analysis can see it. // Marks a block with a name so that MCA analysis can see it.
#define BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
#define END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
#define DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name);
#else #else
#define BEGIN_DEBUG_BLOCK(name) #define SIMDJSON_BEGIN_DEBUG_BLOCK(name)
#define END_DEBUG_BLOCK(name) #define SIMDJSON_END_DEBUG_BLOCK(name)
#define DEBUG_BLOCK(name, block) #define SIMDJSON_DEBUG_BLOCK(name, block)
#endif #endif
// Align to N-byte boundary // Align to N-byte boundary
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) #define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) #define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO)
#define really_inline __forceinline #define simdjson_really_inline __forceinline
#define never_inline __declspec(noinline) #define simdjson_never_inline __declspec(noinline)
#define UNUSED #define SIMDJSON_UNUSED
#define WARN_UNUSED #define SIMDJSON_WARN_UNUSED
#ifndef likely #ifndef simdjson_likely
#define likely(x) x #define simdjson_likely(x) x
#endif #endif
#ifndef unlikely #ifndef simdjson_unlikely
#define unlikely(x) x #define simdjson_unlikely(x) x
#endif #endif
#define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
@ -89,17 +89,17 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024;
#else // SIMDJSON_REGULAR_VISUAL_STUDIO #else // SIMDJSON_REGULAR_VISUAL_STUDIO
#define really_inline inline __attribute__((always_inline)) #define simdjson_really_inline inline __attribute__((always_inline))
#define never_inline inline __attribute__((noinline)) #define simdjson_never_inline inline __attribute__((noinline))
#define UNUSED __attribute__((unused)) #define SIMDJSON_UNUSED __attribute__((unused))
#define WARN_UNUSED __attribute__((warn_unused_result)) #define SIMDJSON_WARN_UNUSED __attribute__((warn_unused_result))
#ifndef likely #ifndef simdjson_likely
#define likely(x) __builtin_expect(!!(x), 1) #define simdjson_likely(x) __builtin_expect(!!(x), 1)
#endif #endif
#ifndef unlikely #ifndef simdjson_unlikely
#define unlikely(x) __builtin_expect(!!(x), 0) #define simdjson_unlikely(x) __builtin_expect(!!(x), 0)
#endif #endif
#define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")

View File

@ -19,7 +19,7 @@ class element;
class array { class array {
public: public:
/** Create a new, invalid array */ /** Create a new, invalid array */
really_inline array() noexcept; simdjson_really_inline array() noexcept;
class iterator { class iterator {
public: public:
@ -60,7 +60,7 @@ public:
iterator(const iterator&) noexcept = default; iterator(const iterator&) noexcept = default;
iterator& operator=(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default;
private: private:
really_inline iterator(const internal::tape_ref &tape) noexcept; simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept;
internal::tape_ref tape; internal::tape_ref tape;
friend class array; friend class array;
}; };
@ -84,12 +84,14 @@ public:
*/ */
inline size_t size() const noexcept; inline size_t size() const noexcept;
/** /**
* Get the value associated with the given JSON pointer. * Get the value associated with the given JSON pointer. We use the RFC 6901
* https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
* as the root of its own JSON document.
* *
* dom::parser parser; * dom::parser parser;
* array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded);
* a.at("0/foo/a/1") == 20 * a.at_pointer("/0/foo/a/1") == 20
* a.at("0")["foo"]["a"].at(1) == 20 * a.at_pointer("0")["foo"]["a"].at(1) == 20
* *
* @return The value associated with the given JSON pointer, or: * @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object * - NO_SUCH_FIELD if a field does not exist in an object
@ -97,7 +99,7 @@ public:
* - INCORRECT_TYPE if a non-integer is used to access an array * - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/ */
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; inline simdjson_result<element> at_pointer(std::string_view json_pointer) const noexcept;
/** /**
* Get the value at the given index. This function has linear-time complexity and * Get the value at the given index. This function has linear-time complexity and
@ -118,7 +120,7 @@ public:
inline simdjson_result<element> at(size_t index) const noexcept; inline simdjson_result<element> at(size_t index) const noexcept;
private: private:
really_inline array(const internal::tape_ref &tape) noexcept; simdjson_really_inline array(const internal::tape_ref &tape) noexcept;
internal::tape_ref tape; internal::tape_ref tape;
friend class element; friend class element;
friend struct simdjson_result<element>; friend struct simdjson_result<element>;
@ -143,11 +145,11 @@ inline std::ostream& operator<<(std::ostream& out, const array &value);
template<> template<>
struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> { struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> {
public: public:
really_inline simdjson_result() noexcept; ///< @private simdjson_really_inline simdjson_result() noexcept; ///< @private
really_inline simdjson_result(dom::array value) noexcept; ///< @private simdjson_really_inline simdjson_result(dom::array value) noexcept; ///< @private
really_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; inline simdjson_result<dom::element> at_pointer(std::string_view json_pointer) const noexcept;
inline simdjson_result<dom::element> at(size_t index) const noexcept; inline simdjson_result<dom::element> at(size_t index) const noexcept;
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS

View File

@ -80,13 +80,13 @@ public:
* error = parser.parse_many(json).get(docs); * error = parser.parse_many(json).get(docs);
* ``` * ```
*/ */
really_inline document_stream() noexcept; simdjson_really_inline document_stream() noexcept;
/** Move one document_stream to another. */ /** Move one document_stream to another. */
really_inline document_stream(document_stream &&other) noexcept = default; simdjson_really_inline document_stream(document_stream &&other) noexcept = default;
/** Move one document_stream to another. */ /** Move one document_stream to another. */
really_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default;
really_inline ~document_stream() noexcept; simdjson_really_inline ~document_stream() noexcept;
/** /**
* An iterator through a forward-only stream of documents. * An iterator through a forward-only stream of documents.
@ -96,7 +96,7 @@ public:
/** /**
* Get the current document (or error). * Get the current document (or error).
*/ */
really_inline simdjson_result<element> operator*() noexcept; simdjson_really_inline simdjson_result<element> operator*() noexcept;
/** /**
* Advance to the next document. * Advance to the next document.
*/ */
@ -105,7 +105,7 @@ public:
* Check if we're at the end yet. * Check if we're at the end yet.
* @param other the end iterator to compare to. * @param other the end iterator to compare to.
*/ */
really_inline bool operator!=(const iterator &other) const noexcept; simdjson_really_inline bool operator!=(const iterator &other) const noexcept;
/** /**
* @private * @private
* *
@ -121,7 +121,7 @@ public:
* may change in future versions of simdjson: we find the API somewhat * may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier. * awkward and we would like to offer something friendlier.
*/ */
really_inline size_t current_index() const noexcept; simdjson_really_inline size_t current_index() const noexcept;
/** /**
* @private * @private
* *
@ -141,10 +141,10 @@ public:
* may change in future versions of simdjson: we find the API somewhat * may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier. * awkward and we would like to offer something friendlier.
*/ */
really_inline std::string_view source() const noexcept; simdjson_really_inline std::string_view source() const noexcept;
private: private:
really_inline iterator(document_stream &s, bool finished) noexcept; simdjson_really_inline iterator(document_stream &s, bool finished) noexcept;
/** The document_stream we're iterating through. */ /** The document_stream we're iterating through. */
document_stream& stream; document_stream& stream;
/** Whether we're finished or not. */ /** Whether we're finished or not. */
@ -155,11 +155,11 @@ public:
/** /**
* Start iterating the documents in the stream. * Start iterating the documents in the stream.
*/ */
really_inline iterator begin() noexcept; simdjson_really_inline iterator begin() noexcept;
/** /**
* The end of the stream, for iterator comparison purposes. * The end of the stream, for iterator comparison purposes.
*/ */
really_inline iterator end() noexcept; simdjson_really_inline iterator end() noexcept;
private: private:
@ -169,8 +169,13 @@ private:
/** /**
* Construct a document_stream. Does not allocate or parse anything until the iterator is * Construct a document_stream. Does not allocate or parse anything until the iterator is
* used. * used.
*
* @param parser is a reference to the parser instance used to generate this document_stream
* @param buf is the raw byte buffer we need to process
* @param len is the length of the raw byte buffer in bytes
* @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document)
*/ */
really_inline document_stream( simdjson_really_inline document_stream(
dom::parser &parser, dom::parser &parser,
const uint8_t *buf, const uint8_t *buf,
size_t len, size_t len,
@ -231,6 +236,9 @@ private:
size_t doc_index{}; size_t doc_index{};
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
/** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
bool use_thread;
inline void load_from_stage1_thread() noexcept; inline void load_from_stage1_thread() noexcept;
/** Start a thread to run stage 1 on the next batch. */ /** Start a thread to run stage 1 on the next batch. */
@ -262,18 +270,18 @@ private:
template<> template<>
struct simdjson_result<dom::document_stream> : public internal::simdjson_result_base<dom::document_stream> { struct simdjson_result<dom::document_stream> : public internal::simdjson_result_base<dom::document_stream> {
public: public:
really_inline simdjson_result() noexcept; ///< @private simdjson_really_inline simdjson_result() noexcept; ///< @private
really_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
really_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private simdjson_really_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
really_inline dom::document_stream::iterator begin() noexcept(false); simdjson_really_inline dom::document_stream::iterator begin() noexcept(false);
really_inline dom::document_stream::iterator end() noexcept(false); simdjson_really_inline dom::document_stream::iterator end() noexcept(false);
#else // SIMDJSON_EXCEPTIONS #else // SIMDJSON_EXCEPTIONS
[[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]]
really_inline dom::document_stream::iterator begin() noexcept; simdjson_really_inline dom::document_stream::iterator begin() noexcept;
[[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]]
really_inline dom::document_stream::iterator end() noexcept; simdjson_really_inline dom::document_stream::iterator end() noexcept;
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
}; // struct simdjson_result<dom::document_stream> }; // struct simdjson_result<dom::document_stream>

View File

@ -38,10 +38,10 @@ enum class element_type {
class element { class element {
public: public:
/** Create a new, invalid element. */ /** Create a new, invalid element. */
really_inline element() noexcept; simdjson_really_inline element() noexcept;
/** The type of this element. */ /** The type of this element. */
really_inline element_type type() const noexcept; simdjson_really_inline element_type type() const noexcept;
/** /**
* Cast this element to an array. * Cast this element to an array.
@ -207,7 +207,7 @@ public:
* @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
*/ */
template<typename T> template<typename T>
really_inline bool is() const noexcept; simdjson_really_inline bool is() const noexcept;
/** /**
* Get the value as the provided type (T). * Get the value as the provided type (T).
@ -244,7 +244,7 @@ public:
* @returns The error that occurred, or SUCCESS if there was no error. * @returns The error that occurred, or SUCCESS if there was no error.
*/ */
template<typename T> template<typename T>
WARN_UNUSED really_inline error_code get(T &value) const noexcept; SIMDJSON_WARN_UNUSED simdjson_really_inline error_code get(T &value) const noexcept;
/** /**
* Get the value as the provided type (T), setting error if it's not the given type. * Get the value as the provided type (T), setting error if it's not the given type.
@ -366,7 +366,7 @@ public:
* - NO_SUCH_FIELD if the field does not exist in the object * - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object * - INCORRECT_TYPE if this is not an object
*/ */
inline simdjson_result<element> operator[](const std::string_view &key) const noexcept; inline simdjson_result<element> operator[](std::string_view key) const noexcept;
/** /**
* Get the value associated with the given key. * Get the value associated with the given key.
@ -384,13 +384,20 @@ public:
inline simdjson_result<element> operator[](const char *key) const noexcept; inline simdjson_result<element> operator[](const char *key) const noexcept;
/** /**
* Get the value associated with the given JSON pointer. * Get the value associated with the given JSON pointer. We use the RFC 6901
* https://tools.ietf.org/html/rfc6901 standard.
* *
* dom::parser parser; * dom::parser parser;
* element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
* doc.at("/foo/a/1") == 20 * doc.at_pointer("/foo/a/1") == 20
* doc.at("/")["foo"]["a"].at(1) == 20 * doc.at_pointer("/foo")["a"].at(1) == 20
* doc.at("")["foo"]["a"].at(1) == 20 * doc.at_pointer("")["foo"]["a"].at(1) == 20
*
* It is allowed for a key to be the empty string:
*
* dom::parser parser;
* object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
* obj.at_pointer("//a/1") == 20
* *
* @return The value associated with the given JSON pointer, or: * @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object * - NO_SUCH_FIELD if a field does not exist in an object
@ -398,7 +405,30 @@ public:
* - INCORRECT_TYPE if a non-integer is used to access an array * - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/ */
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; inline simdjson_result<element> at_pointer(const std::string_view json_pointer) const noexcept;
/**
*
* Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard
* and allowed the following :
*
* dom::parser parser;
* element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
* doc.at("foo/a/1") == 20
*
* Though it is intuitive, it is not compliant with RFC 6901
* https://tools.ietf.org/html/rfc6901
*
* For standard compliance, use the at_pointer function instead.
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
* - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/
[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
inline simdjson_result<element> at(const std::string_view json_pointer) const noexcept;
/** /**
* Get the value at the given index. * Get the value at the given index.
@ -420,7 +450,7 @@ public:
* @return The value associated with this field, or: * @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object * - NO_SUCH_FIELD if the field does not exist in the object
*/ */
inline simdjson_result<element> at_key(const std::string_view &key) const noexcept; inline simdjson_result<element> at_key(std::string_view key) const noexcept;
/** /**
* Get the value associated with the given key in a case-insensitive manner. * Get the value associated with the given key in a case-insensitive manner.
@ -430,13 +460,13 @@ public:
* @return The value associated with this field, or: * @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object * - NO_SUCH_FIELD if the field does not exist in the object
*/ */
inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept; inline simdjson_result<element> at_key_case_insensitive(std::string_view key) const noexcept;
/** @private for debugging. Prints out the root element. */ /** @private for debugging. Prints out the root element. */
inline bool dump_raw_tape(std::ostream &out) const noexcept; inline bool dump_raw_tape(std::ostream &out) const noexcept;
private: private:
really_inline element(const internal::tape_ref &tape) noexcept; simdjson_really_inline element(const internal::tape_ref &tape) noexcept;
internal::tape_ref tape; internal::tape_ref tape;
friend class document; friend class document;
friend class object; friend class object;
@ -472,56 +502,58 @@ inline std::ostream& operator<<(std::ostream& out, element_type type);
template<> template<>
struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> { struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> {
public: public:
really_inline simdjson_result() noexcept; ///< @private simdjson_really_inline simdjson_result() noexcept; ///< @private
really_inline simdjson_result(dom::element &&value) noexcept; ///< @private simdjson_really_inline simdjson_result(dom::element &&value) noexcept; ///< @private
really_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
really_inline simdjson_result<dom::element_type> type() const noexcept; simdjson_really_inline simdjson_result<dom::element_type> type() const noexcept;
template<typename T> template<typename T>
really_inline bool is() const noexcept; simdjson_really_inline bool is() const noexcept;
template<typename T> template<typename T>
really_inline simdjson_result<T> get() const noexcept; simdjson_really_inline simdjson_result<T> get() const noexcept;
template<typename T> template<typename T>
WARN_UNUSED really_inline error_code get(T &value) const noexcept; SIMDJSON_WARN_UNUSED simdjson_really_inline error_code get(T &value) const noexcept;
really_inline simdjson_result<dom::array> get_array() const noexcept; simdjson_really_inline simdjson_result<dom::array> get_array() const noexcept;
really_inline simdjson_result<dom::object> get_object() const noexcept; simdjson_really_inline simdjson_result<dom::object> get_object() const noexcept;
really_inline simdjson_result<const char *> get_c_str() const noexcept; simdjson_really_inline simdjson_result<const char *> get_c_str() const noexcept;
really_inline simdjson_result<size_t> get_string_length() const noexcept; simdjson_really_inline simdjson_result<size_t> get_string_length() const noexcept;
really_inline simdjson_result<std::string_view> get_string() const noexcept; simdjson_really_inline simdjson_result<std::string_view> get_string() const noexcept;
really_inline simdjson_result<int64_t> get_int64() const noexcept; simdjson_really_inline simdjson_result<int64_t> get_int64() const noexcept;
really_inline simdjson_result<uint64_t> get_uint64() const noexcept; simdjson_really_inline simdjson_result<uint64_t> get_uint64() const noexcept;
really_inline simdjson_result<double> get_double() const noexcept; simdjson_really_inline simdjson_result<double> get_double() const noexcept;
really_inline simdjson_result<bool> get_bool() const noexcept; simdjson_really_inline simdjson_result<bool> get_bool() const noexcept;
really_inline bool is_array() const noexcept; simdjson_really_inline bool is_array() const noexcept;
really_inline bool is_object() const noexcept; simdjson_really_inline bool is_object() const noexcept;
really_inline bool is_string() const noexcept; simdjson_really_inline bool is_string() const noexcept;
really_inline bool is_int64() const noexcept; simdjson_really_inline bool is_int64() const noexcept;
really_inline bool is_uint64() const noexcept; simdjson_really_inline bool is_uint64() const noexcept;
really_inline bool is_double() const noexcept; simdjson_really_inline bool is_double() const noexcept;
really_inline bool is_bool() const noexcept; simdjson_really_inline bool is_bool() const noexcept;
really_inline bool is_null() const noexcept; simdjson_really_inline bool is_null() const noexcept;
really_inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept; simdjson_really_inline simdjson_result<dom::element> operator[](std::string_view key) const noexcept;
really_inline simdjson_result<dom::element> operator[](const char *key) const noexcept; simdjson_really_inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
really_inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; simdjson_really_inline simdjson_result<dom::element> at_pointer(const std::string_view json_pointer) const noexcept;
really_inline simdjson_result<dom::element> at(size_t index) const noexcept; [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
really_inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept; simdjson_really_inline simdjson_result<dom::element> at(const std::string_view json_pointer) const noexcept;
really_inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept; simdjson_really_inline simdjson_result<dom::element> at(size_t index) const noexcept;
simdjson_really_inline simdjson_result<dom::element> at_key(std::string_view key) const noexcept;
simdjson_really_inline simdjson_result<dom::element> at_key_case_insensitive(std::string_view key) const noexcept;
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
really_inline operator bool() const noexcept(false); simdjson_really_inline operator bool() const noexcept(false);
really_inline explicit operator const char*() const noexcept(false); simdjson_really_inline explicit operator const char*() const noexcept(false);
really_inline operator std::string_view() const noexcept(false); simdjson_really_inline operator std::string_view() const noexcept(false);
really_inline operator uint64_t() const noexcept(false); simdjson_really_inline operator uint64_t() const noexcept(false);
really_inline operator int64_t() const noexcept(false); simdjson_really_inline operator int64_t() const noexcept(false);
really_inline operator double() const noexcept(false); simdjson_really_inline operator double() const noexcept(false);
really_inline operator dom::array() const noexcept(false); simdjson_really_inline operator dom::array() const noexcept(false);
really_inline operator dom::object() const noexcept(false); simdjson_really_inline operator dom::object() const noexcept(false);
really_inline dom::array::iterator begin() const noexcept(false); simdjson_really_inline dom::array::iterator begin() const noexcept(false);
really_inline dom::array::iterator end() const noexcept(false); simdjson_really_inline dom::array::iterator end() const noexcept(false);
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
}; };
@ -537,7 +569,7 @@ public:
* underlying output stream, that error will be propagated (simdjson_error will not be * underlying output stream, that error will be propagated (simdjson_error will not be
* thrown). * thrown).
*/ */
really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false); simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false);
#endif #endif
} // namespace simdjson } // namespace simdjson

View File

@ -59,7 +59,7 @@ inline int json_parse(const padded_string &s, dom::parser &parser) noexcept {
} }
[[deprecated("Use parser.parse() instead")]] [[deprecated("Use parser.parse() instead")]]
WARN_UNUSED inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { SIMDJSON_WARN_UNUSED inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept {
dom::parser parser; dom::parser parser;
error_code code = parser.parse(buf, len, realloc_if_needed).error(); error_code code = parser.parse(buf, len, realloc_if_needed).error();
// The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
@ -71,7 +71,7 @@ WARN_UNUSED inline dom::parser build_parsed_json(const uint8_t *buf, size_t len,
return parser; return parser;
} }
[[deprecated("Use parser.parse() instead")]] [[deprecated("Use parser.parse() instead")]]
WARN_UNUSED inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { SIMDJSON_WARN_UNUSED inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept {
dom::parser parser; dom::parser parser;
error_code code = parser.parse(buf, len, realloc_if_needed).error(); error_code code = parser.parse(buf, len, realloc_if_needed).error();
// The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
@ -83,7 +83,7 @@ WARN_UNUSED inline dom::parser build_parsed_json(const char *buf, size_t len, bo
return parser; return parser;
} }
[[deprecated("Use parser.parse() instead")]] [[deprecated("Use parser.parse() instead")]]
WARN_UNUSED inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { SIMDJSON_WARN_UNUSED inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept {
dom::parser parser; dom::parser parser;
error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error();
// The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
@ -95,7 +95,7 @@ WARN_UNUSED inline dom::parser build_parsed_json(const std::string &s, bool real
return parser; return parser;
} }
[[deprecated("Use parser.parse() instead")]] [[deprecated("Use parser.parse() instead")]]
WARN_UNUSED inline dom::parser build_parsed_json(const padded_string &s) noexcept { SIMDJSON_WARN_UNUSED inline dom::parser build_parsed_json(const padded_string &s) noexcept {
dom::parser parser; dom::parser parser;
error_code code = parser.parse(s).error(); error_code code = parser.parse(s).error();
// The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid

View File

@ -20,7 +20,7 @@ class key_value_pair;
class object { class object {
public: public:
/** Create a new, invalid object */ /** Create a new, invalid object */
really_inline object() noexcept; simdjson_really_inline object() noexcept;
class iterator { class iterator {
public: public:
@ -70,13 +70,13 @@ public:
* Returns true if the key in this key/value pair is equal * Returns true if the key in this key/value pair is equal
* to the provided string_view. * to the provided string_view.
*/ */
inline bool key_equals(const std::string_view & o) const noexcept; inline bool key_equals(std::string_view o) const noexcept;
/** /**
* Returns true if the key in this key/value pair is equal * Returns true if the key in this key/value pair is equal
* to the provided string_view in a case-insensitive manner. * to the provided string_view in a case-insensitive manner.
* Case comparisons may only be handled correctly for ASCII strings. * Case comparisons may only be handled correctly for ASCII strings.
*/ */
inline bool key_equals_case_insensitive(const std::string_view & o) const noexcept; inline bool key_equals_case_insensitive(std::string_view o) const noexcept;
/** /**
* Get the key of this key/value pair. * Get the key of this key/value pair.
*/ */
@ -90,7 +90,7 @@ public:
iterator(const iterator&) noexcept = default; iterator(const iterator&) noexcept = default;
iterator& operator=(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default;
private: private:
really_inline iterator(const internal::tape_ref &tape) noexcept; simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept;
internal::tape_ref tape; internal::tape_ref tape;
@ -130,7 +130,7 @@ public:
* - NO_SUCH_FIELD if the field does not exist in the object * - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object * - INCORRECT_TYPE if this is not an object
*/ */
inline simdjson_result<element> operator[](const std::string_view &key) const noexcept; inline simdjson_result<element> operator[](std::string_view key) const noexcept;
/** /**
* Get the value associated with the given key. * Get the value associated with the given key.
@ -150,12 +150,21 @@ public:
inline simdjson_result<element> operator[](const char *key) const noexcept; inline simdjson_result<element> operator[](const char *key) const noexcept;
/** /**
* Get the value associated with the given JSON pointer. * Get the value associated with the given JSON pointer. We use the RFC 6901
* https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
* as the root of its own JSON document.
* *
* dom::parser parser; * dom::parser parser;
* object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
* obj.at("foo/a/1") == 20 * obj.at_pointer("/foo/a/1") == 20
* obj.at("foo")["a"].at(1) == 20 * obj.at_pointer("/foo")["a"].at(1) == 20
*
* It is allowed for a key to be the empty string:
*
* dom::parser parser;
* object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
* obj.at_pointer("//a/1") == 20
* obj.at_pointer("/")["a"].at(1) == 20
* *
* @return The value associated with the given JSON pointer, or: * @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object * - NO_SUCH_FIELD if a field does not exist in an object
@ -163,7 +172,7 @@ public:
* - INCORRECT_TYPE if a non-integer is used to access an array * - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/ */
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; inline simdjson_result<element> at_pointer(std::string_view json_pointer) const noexcept;
/** /**
* Get the value associated with the given key. * Get the value associated with the given key.
@ -179,7 +188,7 @@ public:
* @return The value associated with this field, or: * @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object * - NO_SUCH_FIELD if the field does not exist in the object
*/ */
inline simdjson_result<element> at_key(const std::string_view &key) const noexcept; inline simdjson_result<element> at_key(std::string_view key) const noexcept;
/** /**
* Get the value associated with the given key in a case-insensitive manner. * Get the value associated with the given key in a case-insensitive manner.
@ -192,10 +201,10 @@ public:
* @return The value associated with this field, or: * @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object * - NO_SUCH_FIELD if the field does not exist in the object
*/ */
inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept; inline simdjson_result<element> at_key_case_insensitive(std::string_view key) const noexcept;
private: private:
really_inline object(const internal::tape_ref &tape) noexcept; simdjson_really_inline object(const internal::tape_ref &tape) noexcept;
internal::tape_ref tape; internal::tape_ref tape;
@ -216,7 +225,7 @@ public:
element value; element value;
private: private:
really_inline key_value_pair(const std::string_view &_key, element _value) noexcept; simdjson_really_inline key_value_pair(std::string_view _key, element _value) noexcept;
friend class object; friend class object;
}; };
@ -247,15 +256,15 @@ inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value);
template<> template<>
struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> { struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> {
public: public:
really_inline simdjson_result() noexcept; ///< @private simdjson_really_inline simdjson_result() noexcept; ///< @private
really_inline simdjson_result(dom::object value) noexcept; ///< @private simdjson_really_inline simdjson_result(dom::object value) noexcept; ///< @private
really_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept; inline simdjson_result<dom::element> operator[](std::string_view key) const noexcept;
inline simdjson_result<dom::element> operator[](const char *key) const noexcept; inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; inline simdjson_result<dom::element> at_pointer(std::string_view json_pointer) const noexcept;
inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept; inline simdjson_result<dom::element> at_key(std::string_view key) const noexcept;
inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept; inline simdjson_result<dom::element> at_key_case_insensitive(std::string_view key) const noexcept;
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
inline dom::object::iterator begin() const noexcept(false); inline dom::object::iterator begin() const noexcept(false);

View File

@ -46,20 +46,20 @@ public:
* to allocate an initial capacity, call allocate() after constructing the parser. * to allocate an initial capacity, call allocate() after constructing the parser.
* Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
*/ */
really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; simdjson_really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
/** /**
* Take another parser's buffers and state. * Take another parser's buffers and state.
* *
* @param other The parser to take. Its capacity is zeroed. * @param other The parser to take. Its capacity is zeroed.
*/ */
really_inline parser(parser &&other) noexcept; simdjson_really_inline parser(parser &&other) noexcept;
parser(const parser &) = delete; ///< @private Disallow copying parser(const parser &) = delete; ///< @private Disallow copying
/** /**
* Take another parser's buffers and state. * Take another parser's buffers and state.
* *
* @param other The parser to take. Its capacity is zeroed. * @param other The parser to take. Its capacity is zeroed.
*/ */
really_inline parser &operator=(parser &&other) noexcept; simdjson_really_inline parser &operator=(parser &&other) noexcept;
parser &operator=(const parser &) = delete; ///< @private Disallow copying parser &operator=(const parser &) = delete; ///< @private Disallow copying
/** Deallocate the JSON parser. */ /** Deallocate the JSON parser. */
@ -129,17 +129,17 @@ public:
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; simdjson_really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; simdjson_really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const std::string &s) & noexcept; simdjson_really_inline simdjson_result<element> parse(const std::string &s) & noexcept;
really_inline simdjson_result<element> parse(const std::string &s) && =delete; simdjson_really_inline simdjson_result<element> parse(const std::string &s) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const padded_string &s) & noexcept; simdjson_really_inline simdjson_result<element> parse(const padded_string &s) & noexcept;
really_inline simdjson_result<element> parse(const padded_string &s) && =delete; simdjson_really_inline simdjson_result<element> parse(const padded_string &s) && =delete;
/** @private We do not want to allow implicit conversion from C string to std::string. */ /** @private We do not want to allow implicit conversion from C string to std::string. */
really_inline simdjson_result<element> parse(const char *buf) noexcept = delete; simdjson_really_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
/** /**
* Load a file containing many JSON documents. * Load a file containing many JSON documents.
@ -291,7 +291,7 @@ public:
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return The error, if there is one. * @return The error, if there is one.
*/ */
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; SIMDJSON_WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
/** /**
* @private deprecated because it returns bool instead of error_code, which is our standard for * @private deprecated because it returns bool instead of error_code, which is our standard for
@ -305,14 +305,14 @@ public:
* @return true if successful, false if allocation failed. * @return true if successful, false if allocation failed.
*/ */
[[deprecated("Use allocate() instead.")]] [[deprecated("Use allocate() instead.")]]
WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; SIMDJSON_WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
/** /**
* The largest document this parser can support without reallocating. * The largest document this parser can support without reallocating.
* *
* @return Current capacity, in bytes. * @return Current capacity, in bytes.
*/ */
really_inline size_t capacity() const noexcept; simdjson_really_inline size_t capacity() const noexcept;
/** /**
* The largest document this parser can automatically support. * The largest document this parser can automatically support.
@ -321,14 +321,14 @@ public:
* *
* @return Maximum capacity, in bytes. * @return Maximum capacity, in bytes.
*/ */
really_inline size_t max_capacity() const noexcept; simdjson_really_inline size_t max_capacity() const noexcept;
/** /**
* The maximum level of nested object and arrays supported by this parser. * The maximum level of nested object and arrays supported by this parser.
* *
* @return Maximum depth, in bytes. * @return Maximum depth, in bytes.
*/ */
really_inline size_t max_depth() const noexcept; simdjson_really_inline size_t max_depth() const noexcept;
/** /**
* Set max_capacity. This is the largest document this parser can automatically support. * Set max_capacity. This is the largest document this parser can automatically support.
@ -340,8 +340,16 @@ public:
* *
* @param max_capacity The new maximum capacity, in bytes. * @param max_capacity The new maximum capacity, in bytes.
*/ */
really_inline void set_max_capacity(size_t max_capacity) noexcept; simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept;
#ifdef SIMDJSON_THREADS_ENABLED
/**
* The parser instance can use threads when they are available to speed up some
* operations. It is enabled by default. Changing this attribute will change the
* behavior of the parser for future operations.
*/
bool threaded{true};
#endif
/** @private Use the new DOM API instead */ /** @private Use the new DOM API instead */
class Iterator; class Iterator;
/** @private Use simdjson_error instead */ /** @private Use simdjson_error instead */
@ -380,6 +388,7 @@ public:
/** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
inline bool dump_raw_tape(std::ostream &os) const noexcept; inline bool dump_raw_tape(std::ostream &os) const noexcept;
private: private:
/** /**
* The maximum document length this parser will automatically support. * The maximum document length this parser will automatically support.
@ -421,6 +430,8 @@ private:
friend class parser::Iterator; friend class parser::Iterator;
friend class document_stream; friend class document_stream;
}; // class parser }; // class parser
} // namespace dom } // namespace dom

View File

@ -101,22 +101,22 @@ struct simdjson_result_base : public std::pair<T, error_code> {
/** /**
* Create a new empty result with error = UNINITIALIZED. * Create a new empty result with error = UNINITIALIZED.
*/ */
really_inline simdjson_result_base() noexcept; simdjson_really_inline simdjson_result_base() noexcept;
/** /**
* Create a new error result. * Create a new error result.
*/ */
really_inline simdjson_result_base(error_code error) noexcept; simdjson_really_inline simdjson_result_base(error_code error) noexcept;
/** /**
* Create a new successful result. * Create a new successful result.
*/ */
really_inline simdjson_result_base(T &&value) noexcept; simdjson_really_inline simdjson_result_base(T &&value) noexcept;
/** /**
* Create a new result with both things (use if you don't want to branch when creating the result). * Create a new result with both things (use if you don't want to branch when creating the result).
*/ */
really_inline simdjson_result_base(T &&value, error_code error) noexcept; simdjson_really_inline simdjson_result_base(T &&value, error_code error) noexcept;
/** /**
* Move the value and the error to the provided variables. * Move the value and the error to the provided variables.
@ -124,19 +124,19 @@ struct simdjson_result_base : public std::pair<T, error_code> {
* @param value The variable to assign the value to. May not be set if there is an error. * @param value The variable to assign the value to. May not be set if there is an error.
* @param error The variable to assign the error to. Set to SUCCESS if there is no error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
*/ */
really_inline void tie(T &value, error_code &error) && noexcept; simdjson_really_inline void tie(T &value, error_code &error) && noexcept;
/** /**
* Move the value to the provided variable. * Move the value to the provided variable.
* *
* @param value The variable to assign the value to. May not be set if there is an error. * @param value The variable to assign the value to. May not be set if there is an error.
*/ */
really_inline error_code get(T &value) && noexcept; simdjson_really_inline error_code get(T &value) && noexcept;
/** /**
* The error. * The error.
*/ */
really_inline error_code error() const noexcept; simdjson_really_inline error_code error() const noexcept;
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
@ -145,21 +145,21 @@ struct simdjson_result_base : public std::pair<T, error_code> {
* *
* @throw simdjson_error if there was an error. * @throw simdjson_error if there was an error.
*/ */
really_inline T& value() noexcept(false); simdjson_really_inline T& value() noexcept(false);
/** /**
* Take the result value (move it). * Take the result value (move it).
* *
* @throw simdjson_error if there was an error. * @throw simdjson_error if there was an error.
*/ */
really_inline T&& take_value() && noexcept(false); simdjson_really_inline T&& take_value() && noexcept(false);
/** /**
* Cast to the value (will throw on error). * Cast to the value (will throw on error).
* *
* @throw simdjson_error if there was an error. * @throw simdjson_error if there was an error.
*/ */
really_inline operator T&&() && noexcept(false); simdjson_really_inline operator T&&() && noexcept(false);
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
}; // struct simdjson_result_base }; // struct simdjson_result_base
@ -176,19 +176,19 @@ struct simdjson_result : public internal::simdjson_result_base<T> {
/** /**
* @private Create a new empty result with error = UNINITIALIZED. * @private Create a new empty result with error = UNINITIALIZED.
*/ */
really_inline simdjson_result() noexcept; simdjson_really_inline simdjson_result() noexcept;
/** /**
* @private Create a new error result. * @private Create a new error result.
*/ */
really_inline simdjson_result(T &&value) noexcept; simdjson_really_inline simdjson_result(T &&value) noexcept;
/** /**
* @private Create a new successful result. * @private Create a new successful result.
*/ */
really_inline simdjson_result(error_code error_code) noexcept; simdjson_really_inline simdjson_result(error_code error_code) noexcept;
/** /**
* @private Create a new result with both things (use if you don't want to branch when creating the result). * @private Create a new result with both things (use if you don't want to branch when creating the result).
*/ */
really_inline simdjson_result(T &&value, error_code error) noexcept; simdjson_really_inline simdjson_result(T &&value, error_code error) noexcept;
/** /**
* Move the value and the error to the provided variables. * Move the value and the error to the provided variables.
@ -196,19 +196,19 @@ struct simdjson_result : public internal::simdjson_result_base<T> {
* @param value The variable to assign the value to. May not be set if there is an error. * @param value The variable to assign the value to. May not be set if there is an error.
* @param error The variable to assign the error to. Set to SUCCESS if there is no error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
*/ */
really_inline void tie(T &value, error_code &error) && noexcept; simdjson_really_inline void tie(T &value, error_code &error) && noexcept;
/** /**
* Move the value to the provided variable. * Move the value to the provided variable.
* *
* @param value The variable to assign the value to. May not be set if there is an error. * @param value The variable to assign the value to. May not be set if there is an error.
*/ */
WARN_UNUSED really_inline error_code get(T &value) && noexcept; SIMDJSON_WARN_UNUSED simdjson_really_inline error_code get(T &value) && noexcept;
/** /**
* The error. * The error.
*/ */
really_inline error_code error() const noexcept; simdjson_really_inline error_code error() const noexcept;
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
@ -217,21 +217,21 @@ struct simdjson_result : public internal::simdjson_result_base<T> {
* *
* @throw simdjson_error if there was an error. * @throw simdjson_error if there was an error.
*/ */
really_inline T& value() noexcept(false); simdjson_really_inline T& value() noexcept(false);
/** /**
* Take the result value (move it). * Take the result value (move it).
* *
* @throw simdjson_error if there was an error. * @throw simdjson_error if there was an error.
*/ */
really_inline T&& take_value() && noexcept(false); simdjson_really_inline T&& take_value() && noexcept(false);
/** /**
* Cast to the value (will throw on error). * Cast to the value (will throw on error).
* *
* @throw simdjson_error if there was an error. * @throw simdjson_error if there was an error.
*/ */
really_inline operator T&&() && noexcept(false); simdjson_really_inline operator T&&() && noexcept(false);
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
}; // struct simdjson_result }; // struct simdjson_result

View File

@ -16,7 +16,7 @@ namespace simdjson {
* @param len the length of the string in bytes. * @param len the length of the string in bytes.
* @return true if the string is valid UTF-8. * @return true if the string is valid UTF-8.
*/ */
WARN_UNUSED bool validate_utf8(const char * buf, size_t len) noexcept; SIMDJSON_WARN_UNUSED bool validate_utf8(const char * buf, size_t len) noexcept;
/** /**
@ -25,7 +25,7 @@ WARN_UNUSED bool validate_utf8(const char * buf, size_t len) noexcept;
* @param sv the string_view to validate. * @param sv the string_view to validate.
* @return true if the string is valid UTF-8. * @return true if the string is valid UTF-8.
*/ */
really_inline WARN_UNUSED bool validate_utf8(const std::string_view sv) noexcept { simdjson_really_inline SIMDJSON_WARN_UNUSED bool validate_utf8(const std::string_view sv) noexcept {
return validate_utf8(sv.data(), sv.size()); return validate_utf8(sv.data(), sv.size());
} }
@ -35,7 +35,7 @@ really_inline WARN_UNUSED bool validate_utf8(const std::string_view sv) noexcept
* @param p the string to validate. * @param p the string to validate.
* @return true if the string is valid UTF-8. * @return true if the string is valid UTF-8.
*/ */
really_inline WARN_UNUSED bool validate_utf8(const std::string& s) noexcept { simdjson_really_inline SIMDJSON_WARN_UNUSED bool validate_utf8(const std::string& s) noexcept {
return validate_utf8(s.data(), s.size()); return validate_utf8(s.data(), s.size());
} }
@ -111,7 +111,7 @@ public:
* @param dst_len the number of bytes written. Output only. * @param dst_len the number of bytes written. Output only.
* @return the error code, or SUCCESS if there was no error. * @return the error code, or SUCCESS if there was no error.
*/ */
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; SIMDJSON_WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
/** /**
@ -123,11 +123,11 @@ public:
* @param len the length of the string in bytes. * @param len the length of the string in bytes.
* @return true if and only if the string is valid UTF-8. * @return true if and only if the string is valid UTF-8.
*/ */
WARN_UNUSED virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; SIMDJSON_WARN_UNUSED virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
protected: protected:
/** @private Construct an implementation with the given name and description. For subclasses. */ /** @private Construct an implementation with the given name and description. For subclasses. */
really_inline implementation( simdjson_really_inline implementation(
std::string_view name, std::string_view name,
std::string_view description, std::string_view description,
uint32_t required_instruction_sets uint32_t required_instruction_sets
@ -165,7 +165,7 @@ namespace internal {
class available_implementation_list { class available_implementation_list {
public: public:
/** Get the list of available implementations compiled into simdjson */ /** Get the list of available implementations compiled into simdjson */
really_inline available_implementation_list() {} simdjson_really_inline available_implementation_list() {}
/** Number of implementations */ /** Number of implementations */
size_t size() const noexcept; size_t size() const noexcept;
/** STL const begin() iterator */ /** STL const begin() iterator */

View File

@ -12,11 +12,11 @@ namespace simdjson {
// //
// simdjson_result<dom::array> inline implementation // simdjson_result<dom::array> inline implementation
// //
really_inline simdjson_result<dom::array>::simdjson_result() noexcept simdjson_really_inline simdjson_result<dom::array>::simdjson_result() noexcept
: internal::simdjson_result_base<dom::array>() {} : internal::simdjson_result_base<dom::array>() {}
really_inline simdjson_result<dom::array>::simdjson_result(dom::array value) noexcept simdjson_really_inline simdjson_result<dom::array>::simdjson_result(dom::array value) noexcept
: internal::simdjson_result_base<dom::array>(std::forward<dom::array>(value)) {} : internal::simdjson_result_base<dom::array>(std::forward<dom::array>(value)) {}
really_inline simdjson_result<dom::array>::simdjson_result(error_code error) noexcept simdjson_really_inline simdjson_result<dom::array>::simdjson_result(error_code error) noexcept
: internal::simdjson_result_base<dom::array>(error) {} : internal::simdjson_result_base<dom::array>(error) {}
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
@ -36,9 +36,9 @@ inline size_t simdjson_result<dom::array>::size() const noexcept(false) {
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
inline simdjson_result<dom::element> simdjson_result<dom::array>::at(const std::string_view &json_pointer) const noexcept { inline simdjson_result<dom::element> simdjson_result<dom::array>::at_pointer(std::string_view json_pointer) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.at(json_pointer); return first.at_pointer(json_pointer);
} }
inline simdjson_result<dom::element> simdjson_result<dom::array>::at(size_t index) const noexcept { inline simdjson_result<dom::element> simdjson_result<dom::array>::at(size_t index) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
@ -50,8 +50,8 @@ namespace dom {
// //
// array inline implementation // array inline implementation
// //
really_inline array::array() noexcept : tape{} {} simdjson_really_inline array::array() noexcept : tape{} {}
really_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} simdjson_really_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {}
inline array::iterator array::begin() const noexcept { inline array::iterator array::begin() const noexcept {
return internal::tape_ref(tape.doc, tape.json_index + 1); return internal::tape_ref(tape.doc, tape.json_index + 1);
} }
@ -61,7 +61,15 @@ inline array::iterator array::end() const noexcept {
inline size_t array::size() const noexcept { inline size_t array::size() const noexcept {
return tape.scope_count(); return tape.scope_count();
} }
inline simdjson_result<element> array::at(const std::string_view &json_pointer) const noexcept { inline simdjson_result<element> array::at_pointer(std::string_view json_pointer) const noexcept {
if(json_pointer[0] != '/') {
if(json_pointer.size() == 0) { // an empty string means that we return the current node
return element(this->tape); // copy the current node
} else { // otherwise there is an error
return INVALID_JSON_POINTER;
}
}
json_pointer = json_pointer.substr(1);
// - means "the append position" or "the element after the end of the array" // - means "the append position" or "the element after the end of the array"
// We don't support this, because we're returning a real element, not a position. // We don't support this, because we're returning a real element, not a position.
if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; }
@ -84,9 +92,13 @@ inline simdjson_result<element> array::at(const std::string_view &json_pointer)
// Get the child // Get the child
auto child = array(tape).at(array_index); auto child = array(tape).at(array_index);
// If there is an error, it ends here
if(child.error()) {
return child;
}
// If there is a /, we're not done yet, call recursively. // If there is a /, we're not done yet, call recursively.
if (i < json_pointer.length()) { if (i < json_pointer.length()) {
child = child.at(json_pointer.substr(i+1)); child = child.at_pointer(json_pointer.substr(i));
} }
return child; return child;
} }
@ -102,7 +114,7 @@ inline simdjson_result<element> array::at(size_t index) const noexcept {
// //
// array::iterator inline implementation // array::iterator inline implementation
// //
really_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } simdjson_really_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
inline element array::iterator::operator*() const noexcept { inline element array::iterator::operator*() const noexcept {
return element(tape); return element(tape);
} }

View File

@ -20,7 +20,7 @@ inline element document::root() const noexcept {
return element(internal::tape_ref(this, 1)); return element(internal::tape_ref(this, 1));
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
inline error_code document::allocate(size_t capacity) noexcept { inline error_code document::allocate(size_t capacity) noexcept {
if (capacity == 0) { if (capacity == 0) {
string_buf.reset(); string_buf.reset();
@ -33,10 +33,10 @@ inline error_code document::allocate(size_t capacity) noexcept {
// worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"
//where len + 1 tape elements are //where len + 1 tape elements are
// generated, see issue https://github.com/lemire/simdjson/issues/345 // generated, see issue https://github.com/lemire/simdjson/issues/345
size_t tape_capacity = ROUNDUP_N(capacity + 3, 64); size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64);
// a document with only zero-length strings... could have len/3 string // a document with only zero-length strings... could have len/3 string
// and we would need len/3 * 5 bytes on the string buffer // and we would need len/3 * 5 bytes on the string buffer
size_t string_capacity = ROUNDUP_N(5 * capacity / 3 + 32, 64); size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + 32, 64);
string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); string_buf.reset( new (std::nothrow) uint8_t[string_capacity]);
tape.reset(new (std::nothrow) uint64_t[tape_capacity]); tape.reset(new (std::nothrow) uint64_t[tape_capacity]);
return string_buf && tape ? SUCCESS : MEMALLOC; return string_buf && tape ? SUCCESS : MEMALLOC;

View File

@ -64,7 +64,7 @@ inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_
} }
#endif #endif
really_inline document_stream::document_stream( simdjson_really_inline document_stream::document_stream(
dom::parser &_parser, dom::parser &_parser,
const uint8_t *_buf, const uint8_t *_buf,
size_t _len, size_t _len,
@ -75,6 +75,9 @@ really_inline document_stream::document_stream(
len{_len}, len{_len},
batch_size{_batch_size}, batch_size{_batch_size},
error{SUCCESS} error{SUCCESS}
#ifdef SIMDJSON_THREADS_ENABLED
, use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change
#endif
{ {
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
if(worker.get() == nullptr) { if(worker.get() == nullptr) {
@ -83,45 +86,49 @@ really_inline document_stream::document_stream(
#endif #endif
} }
really_inline document_stream::document_stream() noexcept simdjson_really_inline document_stream::document_stream() noexcept
: parser{nullptr}, : parser{nullptr},
buf{nullptr}, buf{nullptr},
len{0}, len{0},
batch_size{0}, batch_size{0},
error{UNINITIALIZED} { error{UNINITIALIZED}
#ifdef SIMDJSON_THREADS_ENABLED
, use_thread(false)
#endif
{
} }
really_inline document_stream::~document_stream() noexcept { simdjson_really_inline document_stream::~document_stream() noexcept {
} }
really_inline document_stream::iterator document_stream::begin() noexcept { simdjson_really_inline document_stream::iterator document_stream::begin() noexcept {
start(); start();
// If there are no documents, we're finished. // If there are no documents, we're finished.
return iterator(*this, error == EMPTY); return iterator(*this, error == EMPTY);
} }
really_inline document_stream::iterator document_stream::end() noexcept { simdjson_really_inline document_stream::iterator document_stream::end() noexcept {
return iterator(*this, true); return iterator(*this, true);
} }
really_inline document_stream::iterator::iterator(document_stream& _stream, bool is_end) noexcept simdjson_really_inline document_stream::iterator::iterator(document_stream& _stream, bool is_end) noexcept
: stream{_stream}, finished{is_end} { : stream{_stream}, finished{is_end} {
} }
really_inline simdjson_result<element> document_stream::iterator::operator*() noexcept { simdjson_really_inline simdjson_result<element> document_stream::iterator::operator*() noexcept {
// Once we have yielded any errors, we're finished. // Once we have yielded any errors, we're finished.
if (stream.error) { finished = true; return stream.error; } if (stream.error) { finished = true; return stream.error; }
return stream.parser->doc.root(); return stream.parser->doc.root();
} }
really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept {
stream.next(); stream.next();
// If that was the last document, we're finished. // If that was the last document, we're finished.
if (stream.error == EMPTY) { finished = true; } if (stream.error == EMPTY) { finished = true; }
return *this; return *this;
} }
really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept {
return finished != other.finished; return finished != other.finished;
} }
@ -137,7 +144,7 @@ inline void document_stream::start() noexcept {
if (error) { return; } if (error) { return; }
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
if (next_batch_start() < len) { if (use_thread && next_batch_start() < len) {
// Kick off the first thread if needed // Kick off the first thread if needed
error = stage1_thread_parser.ensure_capacity(batch_size); error = stage1_thread_parser.ensure_capacity(batch_size);
if (error) { return; } if (error) { return; }
@ -150,11 +157,11 @@ inline void document_stream::start() noexcept {
next(); next();
} }
really_inline size_t document_stream::iterator::current_index() const noexcept { simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept {
return stream.doc_index; return stream.doc_index;
} }
really_inline std::string_view document_stream::iterator::source() const noexcept { simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept {
size_t next_doc_index = stream.batch_start + stream.parser->implementation->structural_indexes[stream.parser->implementation->next_structural_index]; size_t next_doc_index = stream.batch_start + stream.parser->implementation->structural_indexes[stream.parser->implementation->next_structural_index];
return std::string_view(reinterpret_cast<const char*>(stream.buf) + current_index(), next_doc_index - current_index() - 1); return std::string_view(reinterpret_cast<const char*>(stream.buf) + current_index(), next_doc_index - current_index() - 1);
} }
@ -172,7 +179,11 @@ inline void document_stream::next() noexcept {
if (batch_start >= len) { break; } if (batch_start >= len) { break; }
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
load_from_stage1_thread(); if(use_thread) {
load_from_stage1_thread();
} else {
error = run_stage1(*parser, batch_start);
}
#else #else
error = run_stage1(*parser, batch_start); error = run_stage1(*parser, batch_start);
#endif #endif
@ -228,31 +239,31 @@ inline void document_stream::start_stage1_thread() noexcept {
} // namespace dom } // namespace dom
really_inline simdjson_result<dom::document_stream>::simdjson_result() noexcept simdjson_really_inline simdjson_result<dom::document_stream>::simdjson_result() noexcept
: simdjson_result_base() { : simdjson_result_base() {
} }
really_inline simdjson_result<dom::document_stream>::simdjson_result(error_code error) noexcept simdjson_really_inline simdjson_result<dom::document_stream>::simdjson_result(error_code error) noexcept
: simdjson_result_base(error) { : simdjson_result_base(error) {
} }
really_inline simdjson_result<dom::document_stream>::simdjson_result(dom::document_stream &&value) noexcept simdjson_really_inline simdjson_result<dom::document_stream>::simdjson_result(dom::document_stream &&value) noexcept
: simdjson_result_base(std::forward<dom::document_stream>(value)) { : simdjson_result_base(std::forward<dom::document_stream>(value)) {
} }
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept(false) { simdjson_really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept(false) {
if (error()) { throw simdjson_error(error()); } if (error()) { throw simdjson_error(error()); }
return first.begin(); return first.begin();
} }
really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept(false) { simdjson_really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept(false) {
if (error()) { throw simdjson_error(error()); } if (error()) { throw simdjson_error(error()); }
return first.end(); return first.end();
} }
#else // SIMDJSON_EXCEPTIONS #else // SIMDJSON_EXCEPTIONS
really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept { simdjson_really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept {
first.error = error(); first.error = error();
return first.begin(); return first.begin();
} }
really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept { simdjson_really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept {
first.error = error(); first.error = error();
return first.end(); return first.end();
} }

View File

@ -12,11 +12,11 @@ namespace simdjson {
// //
// simdjson_result<dom::element> inline implementation // simdjson_result<dom::element> inline implementation
// //
really_inline simdjson_result<dom::element>::simdjson_result() noexcept simdjson_really_inline simdjson_result<dom::element>::simdjson_result() noexcept
: internal::simdjson_result_base<dom::element>() {} : internal::simdjson_result_base<dom::element>() {}
really_inline simdjson_result<dom::element>::simdjson_result(dom::element &&value) noexcept simdjson_really_inline simdjson_result<dom::element>::simdjson_result(dom::element &&value) noexcept
: internal::simdjson_result_base<dom::element>(std::forward<dom::element>(value)) {} : internal::simdjson_result_base<dom::element>(std::forward<dom::element>(value)) {}
really_inline simdjson_result<dom::element>::simdjson_result(error_code error) noexcept simdjson_really_inline simdjson_result<dom::element>::simdjson_result(error_code error) noexcept
: internal::simdjson_result_base<dom::element>(error) {} : internal::simdjson_result_base<dom::element>(error) {}
inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type() const noexcept { inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
@ -24,140 +24,148 @@ inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type()
} }
template<typename T> template<typename T>
really_inline bool simdjson_result<dom::element>::is() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is() const noexcept {
return !error() && first.is<T>(); return !error() && first.is<T>();
} }
template<typename T> template<typename T>
really_inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept { simdjson_really_inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get<T>(); return first.get<T>();
} }
template<typename T> template<typename T>
WARN_UNUSED really_inline error_code simdjson_result<dom::element>::get(T &value) const noexcept { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code simdjson_result<dom::element>::get(T &value) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get<T>(value); return first.get<T>(value);
} }
really_inline simdjson_result<dom::array> simdjson_result<dom::element>::get_array() const noexcept { simdjson_really_inline simdjson_result<dom::array> simdjson_result<dom::element>::get_array() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_array(); return first.get_array();
} }
really_inline simdjson_result<dom::object> simdjson_result<dom::element>::get_object() const noexcept { simdjson_really_inline simdjson_result<dom::object> simdjson_result<dom::element>::get_object() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_object(); return first.get_object();
} }
really_inline simdjson_result<const char *> simdjson_result<dom::element>::get_c_str() const noexcept { simdjson_really_inline simdjson_result<const char *> simdjson_result<dom::element>::get_c_str() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_c_str(); return first.get_c_str();
} }
really_inline simdjson_result<size_t> simdjson_result<dom::element>::get_string_length() const noexcept { simdjson_really_inline simdjson_result<size_t> simdjson_result<dom::element>::get_string_length() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_string_length(); return first.get_string_length();
} }
really_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept { simdjson_really_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_string(); return first.get_string();
} }
really_inline simdjson_result<int64_t> simdjson_result<dom::element>::get_int64() const noexcept { simdjson_really_inline simdjson_result<int64_t> simdjson_result<dom::element>::get_int64() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_int64(); return first.get_int64();
} }
really_inline simdjson_result<uint64_t> simdjson_result<dom::element>::get_uint64() const noexcept { simdjson_really_inline simdjson_result<uint64_t> simdjson_result<dom::element>::get_uint64() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_uint64(); return first.get_uint64();
} }
really_inline simdjson_result<double> simdjson_result<dom::element>::get_double() const noexcept { simdjson_really_inline simdjson_result<double> simdjson_result<dom::element>::get_double() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_double(); return first.get_double();
} }
really_inline simdjson_result<bool> simdjson_result<dom::element>::get_bool() const noexcept { simdjson_really_inline simdjson_result<bool> simdjson_result<dom::element>::get_bool() const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.get_bool(); return first.get_bool();
} }
really_inline bool simdjson_result<dom::element>::is_array() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_array() const noexcept {
return !error() && first.is_array(); return !error() && first.is_array();
} }
really_inline bool simdjson_result<dom::element>::is_object() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_object() const noexcept {
return !error() && first.is_object(); return !error() && first.is_object();
} }
really_inline bool simdjson_result<dom::element>::is_string() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_string() const noexcept {
return !error() && first.is_string(); return !error() && first.is_string();
} }
really_inline bool simdjson_result<dom::element>::is_int64() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_int64() const noexcept {
return !error() && first.is_int64(); return !error() && first.is_int64();
} }
really_inline bool simdjson_result<dom::element>::is_uint64() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_uint64() const noexcept {
return !error() && first.is_uint64(); return !error() && first.is_uint64();
} }
really_inline bool simdjson_result<dom::element>::is_double() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_double() const noexcept {
return !error() && first.is_double(); return !error() && first.is_double();
} }
really_inline bool simdjson_result<dom::element>::is_bool() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_bool() const noexcept {
return !error() && first.is_bool(); return !error() && first.is_bool();
} }
really_inline bool simdjson_result<dom::element>::is_null() const noexcept { simdjson_really_inline bool simdjson_result<dom::element>::is_null() const noexcept {
return !error() && first.is_null(); return !error() && first.is_null();
} }
really_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const std::string_view &key) const noexcept { simdjson_really_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](std::string_view key) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first[key]; return first[key];
} }
really_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const char *key) const noexcept { simdjson_really_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const char *key) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first[key]; return first[key];
} }
really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(const std::string_view &json_pointer) const noexcept { simdjson_really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_pointer(const std::string_view json_pointer) const noexcept {
if (error()) { return error(); }
return first.at_pointer(json_pointer);
}
[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
simdjson_really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(const std::string_view json_pointer) const noexcept {
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING
if (error()) { return error(); } if (error()) { return error(); }
return first.at(json_pointer); return first.at(json_pointer);
SIMDJSON_POP_DISABLE_WARNINGS
} }
really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(size_t index) const noexcept { simdjson_really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(size_t index) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.at(index); return first.at(index);
} }
really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key(const std::string_view &key) const noexcept { simdjson_really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key(std::string_view key) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.at_key(key); return first.at_key(key);
} }
really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key_case_insensitive(const std::string_view &key) const noexcept { simdjson_really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key_case_insensitive(std::string_view key) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.at_key_case_insensitive(key); return first.at_key_case_insensitive(key);
} }
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
really_inline simdjson_result<dom::element>::operator bool() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator bool() const noexcept(false) {
return get<bool>(); return get<bool>();
} }
really_inline simdjson_result<dom::element>::operator const char *() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator const char *() const noexcept(false) {
return get<const char *>(); return get<const char *>();
} }
really_inline simdjson_result<dom::element>::operator std::string_view() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator std::string_view() const noexcept(false) {
return get<std::string_view>(); return get<std::string_view>();
} }
really_inline simdjson_result<dom::element>::operator uint64_t() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator uint64_t() const noexcept(false) {
return get<uint64_t>(); return get<uint64_t>();
} }
really_inline simdjson_result<dom::element>::operator int64_t() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator int64_t() const noexcept(false) {
return get<int64_t>(); return get<int64_t>();
} }
really_inline simdjson_result<dom::element>::operator double() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator double() const noexcept(false) {
return get<double>(); return get<double>();
} }
really_inline simdjson_result<dom::element>::operator dom::array() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator dom::array() const noexcept(false) {
return get<dom::array>(); return get<dom::array>();
} }
really_inline simdjson_result<dom::element>::operator dom::object() const noexcept(false) { simdjson_really_inline simdjson_result<dom::element>::operator dom::object() const noexcept(false) {
return get<dom::object>(); return get<dom::object>();
} }
really_inline dom::array::iterator simdjson_result<dom::element>::begin() const noexcept(false) { simdjson_really_inline dom::array::iterator simdjson_result<dom::element>::begin() const noexcept(false) {
if (error()) { throw simdjson_error(error()); } if (error()) { throw simdjson_error(error()); }
return first.begin(); return first.begin();
} }
really_inline dom::array::iterator simdjson_result<dom::element>::end() const noexcept(false) { simdjson_really_inline dom::array::iterator simdjson_result<dom::element>::end() const noexcept(false) {
if (error()) { throw simdjson_error(error()); } if (error()) { throw simdjson_error(error()); }
return first.end(); return first.end();
} }
@ -169,8 +177,8 @@ namespace dom {
// //
// element inline implementation // element inline implementation
// //
really_inline element::element() noexcept : tape{} {} simdjson_really_inline element::element() noexcept : tape{} {}
really_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } simdjson_really_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
inline element_type element::type() const noexcept { inline element_type element::type() const noexcept {
auto tape_type = tape.tape_ref_type(); auto tape_type = tape.tape_ref_type();
@ -212,7 +220,7 @@ inline simdjson_result<std::string_view> element::get_string() const noexcept {
} }
} }
inline simdjson_result<uint64_t> element::get_uint64() const noexcept { inline simdjson_result<uint64_t> element::get_uint64() const noexcept {
if(unlikely(!tape.is_uint64())) { // branch rarely taken if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken
if(tape.is_int64()) { if(tape.is_int64()) {
int64_t result = tape.next_tape_value<int64_t>(); int64_t result = tape.next_tape_value<int64_t>();
if (result < 0) { if (result < 0) {
@ -225,7 +233,7 @@ inline simdjson_result<uint64_t> element::get_uint64() const noexcept {
return tape.next_tape_value<int64_t>(); return tape.next_tape_value<int64_t>();
} }
inline simdjson_result<int64_t> element::get_int64() const noexcept { inline simdjson_result<int64_t> element::get_int64() const noexcept {
if(unlikely(!tape.is_int64())) { // branch rarely taken if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken
if(tape.is_uint64()) { if(tape.is_uint64()) {
uint64_t result = tape.next_tape_value<uint64_t>(); uint64_t result = tape.next_tape_value<uint64_t>();
// Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std
@ -248,7 +256,7 @@ inline simdjson_result<double> element::get_double() const noexcept {
// We can expect get<double> to refer to a double type almost all the time. // We can expect get<double> to refer to a double type almost all the time.
// It is important to craft the code accordingly so that the compiler can use this // It is important to craft the code accordingly so that the compiler can use this
// information. (This could also be solved with profile-guided optimization.) // information. (This could also be solved with profile-guided optimization.)
if(unlikely(!tape.is_double())) { // branch rarely taken if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken
if(tape.is_uint64()) { if(tape.is_uint64()) {
return double(tape.next_tape_value<uint64_t>()); return double(tape.next_tape_value<uint64_t>());
} else if(tape.is_int64()) { } else if(tape.is_int64()) {
@ -277,18 +285,18 @@ inline simdjson_result<object> element::get_object() const noexcept {
} }
template<typename T> template<typename T>
WARN_UNUSED really_inline error_code element::get(T &value) const noexcept { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code element::get(T &value) const noexcept {
return get<T>().get(value); return get<T>().get(value);
} }
// An element-specific version prevents recursion with simdjson_result::get<element>(value) // An element-specific version prevents recursion with simdjson_result::get<element>(value)
template<> template<>
WARN_UNUSED really_inline error_code element::get<element>(element &value) const noexcept { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code element::get<element>(element &value) const noexcept {
value = element(tape); value = element(tape);
return SUCCESS; return SUCCESS;
} }
template<typename T> template<typename T>
really_inline bool element::is() const noexcept { simdjson_really_inline bool element::is() const noexcept {
auto result = get<T>(); auto result = get<T>();
return !result.error(); return !result.error();
} }
@ -334,29 +342,43 @@ inline array::iterator element::end() const noexcept(false) {
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
inline simdjson_result<element> element::operator[](const std::string_view &key) const noexcept { inline simdjson_result<element> element::operator[](std::string_view key) const noexcept {
return at_key(key); return at_key(key);
} }
inline simdjson_result<element> element::operator[](const char *key) const noexcept { inline simdjson_result<element> element::operator[](const char *key) const noexcept {
return at_key(key); return at_key(key);
} }
inline simdjson_result<element> element::at(const std::string_view &json_pointer) const noexcept {
inline simdjson_result<element> element::at_pointer(std::string_view json_pointer) const noexcept {
switch (tape.tape_ref_type()) { switch (tape.tape_ref_type()) {
case internal::tape_type::START_OBJECT: case internal::tape_type::START_OBJECT:
return object(tape).at(json_pointer); return object(tape).at_pointer(json_pointer);
case internal::tape_type::START_ARRAY: case internal::tape_type::START_ARRAY:
return array(tape).at(json_pointer); return array(tape).at_pointer(json_pointer);
default: default: {
return INCORRECT_TYPE; if(json_pointer.empty()) { // an empty string means that we return the current node
return INVALID_JSON_POINTER;
}
dom::element copy(*this);
return simdjson_result<element>(std::move(copy));
}
} }
} }
[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
inline simdjson_result<element> element::at(std::string_view json_pointer) const noexcept {
// version 0.4 of simdjson allowed non-compliant pointers
auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end());
return at_pointer(std_pointer);
}
inline simdjson_result<element> element::at(size_t index) const noexcept { inline simdjson_result<element> element::at(size_t index) const noexcept {
return get<array>().at(index); return get<array>().at(index);
} }
inline simdjson_result<element> element::at_key(const std::string_view &key) const noexcept { inline simdjson_result<element> element::at_key(std::string_view key) const noexcept {
return get<object>().at_key(key); return get<object>().at_key(key);
} }
inline simdjson_result<element> element::at_key_case_insensitive(const std::string_view &key) const noexcept { inline simdjson_result<element> element::at_key_case_insensitive(std::string_view key) const noexcept {
return get<object>().at_key_case_insensitive(key); return get<object>().at_key_case_insensitive(key);
} }
@ -419,7 +441,7 @@ inline std::ostream& minifier<dom::element>::print(std::ostream& out) {
case tape_type::START_ARRAY: { case tape_type::START_ARRAY: {
// If we're too deep, we need to recurse to go deeper. // If we're too deep, we need to recurse to go deeper.
depth++; depth++;
if (unlikely(depth >= MAX_DEPTH)) { if (simdjson_unlikely(depth >= MAX_DEPTH)) {
out << minify<dom::array>(dom::array(iter)); out << minify<dom::array>(dom::array(iter));
iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
depth--; depth--;
@ -446,7 +468,7 @@ inline std::ostream& minifier<dom::element>::print(std::ostream& out) {
case tape_type::START_OBJECT: { case tape_type::START_OBJECT: {
// If we're too deep, we need to recurse to go deeper. // If we're too deep, we need to recurse to go deeper.
depth++; depth++;
if (unlikely(depth >= MAX_DEPTH)) { if (simdjson_unlikely(depth >= MAX_DEPTH)) {
out << minify<dom::object>(dom::object(iter)); out << minify<dom::object>(dom::object(iter));
iter.json_index = iter.matching_brace_index() - 1; // Jump to the } iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
depth--; depth--;
@ -520,12 +542,12 @@ inline std::ostream& minifier<dom::element>::print(std::ostream& out) {
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
template<> template<>
really_inline std::ostream& minifier<simdjson_result<dom::element>>::print(std::ostream& out) { simdjson_really_inline std::ostream& minifier<simdjson_result<dom::element>>::print(std::ostream& out) {
if (value.error()) { throw simdjson_error(value.error()); } if (value.error()) { throw simdjson_error(value.error()); }
return out << minify<dom::element>(value.first); return out << minify<dom::element>(value.first);
} }
really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) { simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) {
return out << minify<simdjson_result<dom::element>>(value); return out << minify<simdjson_result<dom::element>>(value);
} }
#endif #endif

View File

@ -42,7 +42,7 @@ namespace internal {
// //
template<typename T> template<typename T>
really_inline void simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept { simdjson_really_inline void simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept {
// on the clang compiler that comes with current macOS (Apple clang version 11.0.0), // on the clang compiler that comes with current macOS (Apple clang version 11.0.0),
// tie(width, error) = size["w"].get<uint64_t>(); // tie(width, error) = size["w"].get<uint64_t>();
// fails with "error: no viable overloaded '='"" // fails with "error: no viable overloaded '='""
@ -53,49 +53,49 @@ really_inline void simdjson_result_base<T>::tie(T &value, error_code &error) &&
} }
template<typename T> template<typename T>
WARN_UNUSED really_inline error_code simdjson_result_base<T>::get(T &value) && noexcept { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code simdjson_result_base<T>::get(T &value) && noexcept {
error_code error; error_code error;
std::forward<simdjson_result_base<T>>(*this).tie(value, error); std::forward<simdjson_result_base<T>>(*this).tie(value, error);
return error; return error;
} }
template<typename T> template<typename T>
really_inline error_code simdjson_result_base<T>::error() const noexcept { simdjson_really_inline error_code simdjson_result_base<T>::error() const noexcept {
return this->second; return this->second;
} }
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
template<typename T> template<typename T>
really_inline T& simdjson_result_base<T>::value() noexcept(false) { simdjson_really_inline T& simdjson_result_base<T>::value() noexcept(false) {
if (error()) { throw simdjson_error(error()); } if (error()) { throw simdjson_error(error()); }
return this->first; return this->first;
} }
template<typename T> template<typename T>
really_inline T&& simdjson_result_base<T>::take_value() && noexcept(false) { simdjson_really_inline T&& simdjson_result_base<T>::take_value() && noexcept(false) {
if (error()) { throw simdjson_error(error()); } if (error()) { throw simdjson_error(error()); }
return std::forward<T>(this->first); return std::forward<T>(this->first);
} }
template<typename T> template<typename T>
really_inline simdjson_result_base<T>::operator T&&() && noexcept(false) { simdjson_really_inline simdjson_result_base<T>::operator T&&() && noexcept(false) {
return std::forward<simdjson_result_base<T>>(*this).take_value(); return std::forward<simdjson_result_base<T>>(*this).take_value();
} }
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
template<typename T> template<typename T>
really_inline simdjson_result_base<T>::simdjson_result_base(T &&value, error_code error) noexcept simdjson_really_inline simdjson_result_base<T>::simdjson_result_base(T &&value, error_code error) noexcept
: std::pair<T, error_code>(std::forward<T>(value), error) {} : std::pair<T, error_code>(std::forward<T>(value), error) {}
template<typename T> template<typename T>
really_inline simdjson_result_base<T>::simdjson_result_base(error_code error) noexcept simdjson_really_inline simdjson_result_base<T>::simdjson_result_base(error_code error) noexcept
: simdjson_result_base(T{}, error) {} : simdjson_result_base(T{}, error) {}
template<typename T> template<typename T>
really_inline simdjson_result_base<T>::simdjson_result_base(T &&value) noexcept simdjson_really_inline simdjson_result_base<T>::simdjson_result_base(T &&value) noexcept
: simdjson_result_base(std::forward<T>(value), SUCCESS) {} : simdjson_result_base(std::forward<T>(value), SUCCESS) {}
template<typename T> template<typename T>
really_inline simdjson_result_base<T>::simdjson_result_base() noexcept simdjson_really_inline simdjson_result_base<T>::simdjson_result_base() noexcept
: simdjson_result_base(T{}, UNINITIALIZED) {} : simdjson_result_base(T{}, UNINITIALIZED) {}
} // namespace internal } // namespace internal
@ -105,50 +105,50 @@ really_inline simdjson_result_base<T>::simdjson_result_base() noexcept
/// ///
template<typename T> template<typename T>
really_inline void simdjson_result<T>::tie(T &value, error_code &error) && noexcept { simdjson_really_inline void simdjson_result<T>::tie(T &value, error_code &error) && noexcept {
std::forward<internal::simdjson_result_base<T>>(*this).tie(value, error); std::forward<internal::simdjson_result_base<T>>(*this).tie(value, error);
} }
template<typename T> template<typename T>
WARN_UNUSED really_inline error_code simdjson_result<T>::get(T &value) && noexcept { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code simdjson_result<T>::get(T &value) && noexcept {
return std::forward<internal::simdjson_result_base<T>>(*this).get(value); return std::forward<internal::simdjson_result_base<T>>(*this).get(value);
} }
template<typename T> template<typename T>
really_inline error_code simdjson_result<T>::error() const noexcept { simdjson_really_inline error_code simdjson_result<T>::error() const noexcept {
return internal::simdjson_result_base<T>::error(); return internal::simdjson_result_base<T>::error();
} }
#if SIMDJSON_EXCEPTIONS #if SIMDJSON_EXCEPTIONS
template<typename T> template<typename T>
really_inline T& simdjson_result<T>::value() noexcept(false) { simdjson_really_inline T& simdjson_result<T>::value() noexcept(false) {
return internal::simdjson_result_base<T>::value(); return internal::simdjson_result_base<T>::value();
} }
template<typename T> template<typename T>
really_inline T&& simdjson_result<T>::take_value() && noexcept(false) { simdjson_really_inline T&& simdjson_result<T>::take_value() && noexcept(false) {
return std::forward<internal::simdjson_result_base<T>>(*this).take_value(); return std::forward<internal::simdjson_result_base<T>>(*this).take_value();
} }
template<typename T> template<typename T>
really_inline simdjson_result<T>::operator T&&() && noexcept(false) { simdjson_really_inline simdjson_result<T>::operator T&&() && noexcept(false) {
return std::forward<internal::simdjson_result_base<T>>(*this).take_value(); return std::forward<internal::simdjson_result_base<T>>(*this).take_value();
} }
#endif // SIMDJSON_EXCEPTIONS #endif // SIMDJSON_EXCEPTIONS
template<typename T> template<typename T>
really_inline simdjson_result<T>::simdjson_result(T &&value, error_code error) noexcept simdjson_really_inline simdjson_result<T>::simdjson_result(T &&value, error_code error) noexcept
: internal::simdjson_result_base<T>(std::forward<T>(value), error) {} : internal::simdjson_result_base<T>(std::forward<T>(value), error) {}
template<typename T> template<typename T>
really_inline simdjson_result<T>::simdjson_result(error_code error) noexcept simdjson_really_inline simdjson_result<T>::simdjson_result(error_code error) noexcept
: internal::simdjson_result_base<T>(error) {} : internal::simdjson_result_base<T>(error) {}
template<typename T> template<typename T>
really_inline simdjson_result<T>::simdjson_result(T &&value) noexcept simdjson_really_inline simdjson_result<T>::simdjson_result(T &&value) noexcept
: internal::simdjson_result_base<T>(std::forward<T>(value)) {} : internal::simdjson_result_base<T>(std::forward<T>(value)) {}
template<typename T> template<typename T>
really_inline simdjson_result<T>::simdjson_result() noexcept simdjson_really_inline simdjson_result<T>::simdjson_result() noexcept
: internal::simdjson_result_base<T>() {} : internal::simdjson_result_base<T>() {}
} // namespace simdjson } // namespace simdjson

View File

@ -12,14 +12,14 @@ namespace simdjson {
// //
// simdjson_result<dom::object> inline implementation // simdjson_result<dom::object> inline implementation
// //
really_inline simdjson_result<dom::object>::simdjson_result() noexcept simdjson_really_inline simdjson_result<dom::object>::simdjson_result() noexcept
: internal::simdjson_result_base<dom::object>() {} : internal::simdjson_result_base<dom::object>() {}
really_inline simdjson_result<dom::object>::simdjson_result(dom::object value) noexcept simdjson_really_inline simdjson_result<dom::object>::simdjson_result(dom::object value) noexcept
: internal::simdjson_result_base<dom::object>(std::forward<dom::object>(value)) {} : internal::simdjson_result_base<dom::object>(std::forward<dom::object>(value)) {}
really_inline simdjson_result<dom::object>::simdjson_result(error_code error) noexcept simdjson_really_inline simdjson_result<dom::object>::simdjson_result(error_code error) noexcept
: internal::simdjson_result_base<dom::object>(error) {} : internal::simdjson_result_base<dom::object>(error) {}
inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const std::string_view &key) const noexcept { inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](std::string_view key) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first[key]; return first[key];
} }
@ -27,15 +27,15 @@ inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](co
if (error()) { return error(); } if (error()) { return error(); }
return first[key]; return first[key];
} }
inline simdjson_result<dom::element> simdjson_result<dom::object>::at(const std::string_view &json_pointer) const noexcept { inline simdjson_result<dom::element> simdjson_result<dom::object>::at_pointer(std::string_view json_pointer) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.at(json_pointer); return first.at_pointer(json_pointer);
} }
inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key(const std::string_view &key) const noexcept { inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key(std::string_view key) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.at_key(key); return first.at_key(key);
} }
inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key_case_insensitive(const std::string_view &key) const noexcept { inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key_case_insensitive(std::string_view key) const noexcept {
if (error()) { return error(); } if (error()) { return error(); }
return first.at_key_case_insensitive(key); return first.at_key_case_insensitive(key);
} }
@ -62,8 +62,8 @@ namespace dom {
// //
// object inline implementation // object inline implementation
// //
really_inline object::object() noexcept : tape{} {} simdjson_really_inline object::object() noexcept : tape{} {}
really_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } simdjson_really_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
inline object::iterator object::begin() const noexcept { inline object::iterator object::begin() const noexcept {
return internal::tape_ref(tape.doc, tape.json_index + 1); return internal::tape_ref(tape.doc, tape.json_index + 1);
} }
@ -74,16 +74,23 @@ inline size_t object::size() const noexcept {
return tape.scope_count(); return tape.scope_count();
} }
inline simdjson_result<element> object::operator[](const std::string_view &key) const noexcept { inline simdjson_result<element> object::operator[](std::string_view key) const noexcept {
return at_key(key); return at_key(key);
} }
inline simdjson_result<element> object::operator[](const char *key) const noexcept { inline simdjson_result<element> object::operator[](const char *key) const noexcept {
return at_key(key); return at_key(key);
} }
inline simdjson_result<element> object::at(const std::string_view &json_pointer) const noexcept { inline simdjson_result<element> object::at_pointer(std::string_view json_pointer) const noexcept {
if(json_pointer[0] != '/') {
if(json_pointer.size() == 0) { // an empty string means that we return the current node
return element(this->tape); // copy the current node
} else { // otherwise there is an error
return INVALID_JSON_POINTER;
}
}
json_pointer = json_pointer.substr(1);
size_t slash = json_pointer.find('/'); size_t slash = json_pointer.find('/');
std::string_view key = json_pointer.substr(0, slash); std::string_view key = json_pointer.substr(0, slash);
// Grab the child with the given key // Grab the child with the given key
simdjson_result<element> child; simdjson_result<element> child;
@ -109,15 +116,17 @@ inline simdjson_result<element> object::at(const std::string_view &json_pointer)
} else { } else {
child = at_key(key); child = at_key(key);
} }
if(child.error()) {
return child; // we do not continue if there was an error
}
// If there is a /, we have to recurse and look up more of the path // If there is a /, we have to recurse and look up more of the path
if (slash != std::string_view::npos) { if (slash != std::string_view::npos) {
child = child.at(json_pointer.substr(slash+1)); child = child.at_pointer(json_pointer.substr(slash));
} }
return child; return child;
} }
inline simdjson_result<element> object::at_key(const std::string_view &key) const noexcept {
inline simdjson_result<element> object::at_key(std::string_view key) const noexcept {
iterator end_field = end(); iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) { for (iterator field = begin(); field != end_field; ++field) {
if (field.key_equals(key)) { if (field.key_equals(key)) {
@ -129,7 +138,7 @@ inline simdjson_result<element> object::at_key(const std::string_view &key) cons
// In case you wonder why we need this, please see // In case you wonder why we need this, please see
// https://github.com/simdjson/simdjson/issues/323 // https://github.com/simdjson/simdjson/issues/323
// People do seek keys in a case-insensitive manner. // People do seek keys in a case-insensitive manner.
inline simdjson_result<element> object::at_key_case_insensitive(const std::string_view &key) const noexcept { inline simdjson_result<element> object::at_key_case_insensitive(std::string_view key) const noexcept {
iterator end_field = end(); iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) { for (iterator field = begin(); field != end_field; ++field) {
if (field.key_equals_case_insensitive(key)) { if (field.key_equals_case_insensitive(key)) {
@ -142,7 +151,7 @@ inline simdjson_result<element> object::at_key_case_insensitive(const std::strin
// //
// object::iterator inline implementation // object::iterator inline implementation
// //
really_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } simdjson_really_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
inline const key_value_pair object::iterator::operator*() const noexcept { inline const key_value_pair object::iterator::operator*() const noexcept {
return key_value_pair(key(), value()); return key_value_pair(key(), value());
} }
@ -200,7 +209,7 @@ inline element object::iterator::value() const noexcept {
* on the long run. * on the long run.
*/ */
inline bool object::iterator::key_equals(const std::string_view & o) const noexcept { inline bool object::iterator::key_equals(std::string_view o) const noexcept {
// We use the fact that the key length can be computed quickly // We use the fact that the key length can be computed quickly
// without access to the string buffer. // without access to the string buffer.
const uint32_t len = key_length(); const uint32_t len = key_length();
@ -211,7 +220,7 @@ inline bool object::iterator::key_equals(const std::string_view & o) const noexc
return false; return false;
} }
inline bool object::iterator::key_equals_case_insensitive(const std::string_view & o) const noexcept { inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept {
// We use the fact that the key length can be computed quickly // We use the fact that the key length can be computed quickly
// without access to the string buffer. // without access to the string buffer.
const uint32_t len = key_length(); const uint32_t len = key_length();
@ -226,7 +235,7 @@ inline bool object::iterator::key_equals_case_insensitive(const std::string_view
// //
// key_value_pair inline implementation // key_value_pair inline implementation
// //
inline key_value_pair::key_value_pair(const std::string_view &_key, element _value) noexcept : inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
key(_key), value(_value) {} key(_key), value(_value) {}
inline std::ostream& operator<<(std::ostream& out, const object &value) { inline std::ostream& operator<<(std::ostream& out, const object &value) {

View File

@ -13,7 +13,7 @@ SIMDJSON_DISABLE_DEPRECATED_WARNING
// Because of template weirdness, the actual class definition is inline in the document class // Because of template weirdness, the actual class definition is inline in the document class
WARN_UNUSED bool dom::parser::Iterator::is_ok() const { SIMDJSON_WARN_UNUSED bool dom::parser::Iterator::is_ok() const {
return location < tape_length; return location < tape_length;
} }

View File

@ -15,12 +15,12 @@ namespace dom {
// //
// parser inline implementation // parser inline implementation
// //
really_inline parser::parser(size_t max_capacity) noexcept simdjson_really_inline parser::parser(size_t max_capacity) noexcept
: _max_capacity{max_capacity}, : _max_capacity{max_capacity},
loaded_bytes(nullptr) { loaded_bytes(nullptr) {
} }
really_inline parser::parser(parser &&other) noexcept = default; simdjson_really_inline parser::parser(parser &&other) noexcept = default;
really_inline parser &parser::operator=(parser &&other) noexcept = default; simdjson_really_inline parser &parser::operator=(parser &&other) noexcept = default;
inline bool parser::is_valid() const noexcept { return valid; } inline bool parser::is_valid() const noexcept { return valid; }
inline int parser::get_error_code() const noexcept { return error; } inline int parser::get_error_code() const noexcept { return error; }
@ -105,13 +105,13 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
return doc.root(); return doc.root();
} }
really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { simdjson_really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
return parse((const uint8_t *)buf, len, realloc_if_needed); return parse((const uint8_t *)buf, len, realloc_if_needed);
} }
really_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept { simdjson_really_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
} }
really_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept { simdjson_really_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
return parse(s.data(), s.length(), false); return parse(s.data(), s.length(), false);
} }
@ -128,17 +128,17 @@ inline simdjson_result<document_stream> parser::parse_many(const padded_string &
return parse_many(s.data(), s.length(), batch_size); return parse_many(s.data(), s.length(), batch_size);
} }
really_inline size_t parser::capacity() const noexcept { simdjson_really_inline size_t parser::capacity() const noexcept {
return implementation ? implementation->capacity() : 0; return implementation ? implementation->capacity() : 0;
} }
really_inline size_t parser::max_capacity() const noexcept { simdjson_really_inline size_t parser::max_capacity() const noexcept {
return _max_capacity; return _max_capacity;
} }
really_inline size_t parser::max_depth() const noexcept { simdjson_really_inline size_t parser::max_depth() const noexcept {
return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
// //
// Reallocate implementation and document if needed // Reallocate implementation and document if needed
@ -164,7 +164,7 @@ inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
return !allocate(capacity, max_depth); return !allocate(capacity, max_depth);
} }
@ -173,7 +173,7 @@ inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
// If we don't have enough capacity, (try to) automatically bump it. // If we don't have enough capacity, (try to) automatically bump it.
// If the document was taken, reallocate that too. // If the document was taken, reallocate that too.
// Both in one if statement to minimize unlikely branching. // Both in one if statement to minimize unlikely branching.
if (unlikely(capacity() < desired_capacity || !doc.tape)) { if (simdjson_unlikely(capacity() < desired_capacity || !doc.tape)) {
if (desired_capacity > max_capacity()) { if (desired_capacity > max_capacity()) {
return error = CAPACITY; return error = CAPACITY;
} }
@ -183,7 +183,7 @@ inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
return SUCCESS; return SUCCESS;
} }
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
_max_capacity = max_capacity; _max_capacity = max_capacity;
} }

View File

@ -10,34 +10,39 @@ namespace internal {
// //
// tape_ref inline implementation // tape_ref inline implementation
// //
really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} simdjson_really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {}
really_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} simdjson_really_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {}
simdjson_really_inline bool tape_ref::is_document_root() const noexcept {
return json_index == 1; // should we ever change the structure of the tape, this should get updated.
}
// Some value types have a specific on-tape word value. It can be faster // Some value types have a specific on-tape word value. It can be faster
// to check the type by doing a word-to-word comparison instead of extracting the // to check the type by doing a word-to-word comparison instead of extracting the
// most significant 8 bits. // most significant 8 bits.
really_inline bool tape_ref::is_double() const noexcept { simdjson_really_inline bool tape_ref::is_double() const noexcept {
constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56;
return doc->tape[json_index] == tape_double; return doc->tape[json_index] == tape_double;
} }
really_inline bool tape_ref::is_int64() const noexcept { simdjson_really_inline bool tape_ref::is_int64() const noexcept {
constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56;
return doc->tape[json_index] == tape_int64; return doc->tape[json_index] == tape_int64;
} }
really_inline bool tape_ref::is_uint64() const noexcept { simdjson_really_inline bool tape_ref::is_uint64() const noexcept {
constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56;
return doc->tape[json_index] == tape_uint64; return doc->tape[json_index] == tape_uint64;
} }
really_inline bool tape_ref::is_false() const noexcept { simdjson_really_inline bool tape_ref::is_false() const noexcept {
constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56;
return doc->tape[json_index] == tape_false; return doc->tape[json_index] == tape_false;
} }
really_inline bool tape_ref::is_true() const noexcept { simdjson_really_inline bool tape_ref::is_true() const noexcept {
constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56;
return doc->tape[json_index] == tape_true; return doc->tape[json_index] == tape_true;
} }
really_inline bool tape_ref::is_null_on_tape() const noexcept { simdjson_really_inline bool tape_ref::is_null_on_tape() const noexcept {
constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56;
return doc->tape[json_index] == tape_null; return doc->tape[json_index] == tape_null;
} }
@ -55,21 +60,21 @@ inline size_t tape_ref::after_element() const noexcept {
return json_index + 1; return json_index + 1;
} }
} }
really_inline tape_type tape_ref::tape_ref_type() const noexcept { simdjson_really_inline tape_type tape_ref::tape_ref_type() const noexcept {
return static_cast<tape_type>(doc->tape[json_index] >> 56); return static_cast<tape_type>(doc->tape[json_index] >> 56);
} }
really_inline uint64_t internal::tape_ref::tape_value() const noexcept { simdjson_really_inline uint64_t internal::tape_ref::tape_value() const noexcept {
return doc->tape[json_index] & internal::JSON_VALUE_MASK; return doc->tape[json_index] & internal::JSON_VALUE_MASK;
} }
really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { simdjson_really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept {
return uint32_t(doc->tape[json_index]); return uint32_t(doc->tape[json_index]);
} }
really_inline uint32_t internal::tape_ref::scope_count() const noexcept { simdjson_really_inline uint32_t internal::tape_ref::scope_count() const noexcept {
return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK);
} }
template<typename T> template<typename T>
really_inline T tape_ref::next_tape_value() const noexcept { simdjson_really_inline T tape_ref::next_tape_value() const noexcept {
static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit");
// Though the following is tempting... // Though the following is tempting...
// return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]); // return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
@ -80,14 +85,14 @@ really_inline T tape_ref::next_tape_value() const noexcept {
return x; return x;
} }
really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
size_t string_buf_index = size_t(tape_value()); size_t string_buf_index = size_t(tape_value());
uint32_t len; uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return len; return len;
} }
really_inline const char * internal::tape_ref::get_c_str() const noexcept { simdjson_really_inline const char * internal::tape_ref::get_c_str() const noexcept {
size_t string_buf_index = size_t(tape_value()); size_t string_buf_index = size_t(tape_value());
return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
} }

View File

@ -35,7 +35,7 @@ public:
* @param len The length of the json document. * @param len The length of the json document.
* @return The error code, or SUCCESS if there was no error. * @return The error code, or SUCCESS if there was no error.
*/ */
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; SIMDJSON_WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
/** /**
* @private For internal implementation use * @private For internal implementation use
@ -51,7 +51,7 @@ public:
* @param streaming Whether this is being called by parser::parse_many. * @param streaming Whether this is being called by parser::parse_many.
* @return The error code, or SUCCESS if there was no error. * @return The error code, or SUCCESS if there was no error.
*/ */
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0; SIMDJSON_WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0;
/** /**
* @private For internal implementation use * @private For internal implementation use
@ -65,7 +65,7 @@ public:
* @param doc The document to output to. * @param doc The document to output to.
* @return The error code, or SUCCESS if there was no error. * @return The error code, or SUCCESS if there was no error.
*/ */
WARN_UNUSED virtual error_code stage2(dom::document &doc) noexcept = 0; SIMDJSON_WARN_UNUSED virtual error_code stage2(dom::document &doc) noexcept = 0;
/** /**
* @private For internal implementation use * @private For internal implementation use
@ -78,7 +78,7 @@ public:
* @param doc The document to output to. * @param doc The document to output to.
* @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed.
*/ */
WARN_UNUSED virtual error_code stage2_next(dom::document &doc) noexcept = 0; SIMDJSON_WARN_UNUSED virtual error_code stage2_next(dom::document &doc) noexcept = 0;
/** /**
* Change the capacity of this parser. * Change the capacity of this parser.
@ -119,14 +119,14 @@ public:
* *
* @return Current capacity, in bytes. * @return Current capacity, in bytes.
*/ */
really_inline size_t capacity() const noexcept; simdjson_really_inline size_t capacity() const noexcept;
/** /**
* The maximum level of nested object and arrays supported by this parser. * The maximum level of nested object and arrays supported by this parser.
* *
* @return Maximum depth, in bytes. * @return Maximum depth, in bytes.
*/ */
really_inline size_t max_depth() const noexcept; simdjson_really_inline size_t max_depth() const noexcept;
/** /**
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
@ -136,7 +136,7 @@ public:
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return The error, if there is one. * @return The error, if there is one.
*/ */
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth) noexcept; SIMDJSON_WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
protected: protected:
/** /**
@ -154,15 +154,15 @@ protected:
size_t _max_depth{0}; size_t _max_depth{0};
}; // class dom_parser_implementation }; // class dom_parser_implementation
really_inline size_t dom_parser_implementation::capacity() const noexcept { simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept {
return _capacity; return _capacity;
} }
really_inline size_t dom_parser_implementation::max_depth() const noexcept { simdjson_really_inline size_t dom_parser_implementation::max_depth() const noexcept {
return _max_depth; return _max_depth;
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
if (this->max_depth() != max_depth) { if (this->max_depth() != max_depth) {
error_code err = set_max_depth(max_depth); error_code err = set_max_depth(max_depth);

View File

@ -19,24 +19,25 @@ constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF;
*/ */
class tape_ref { class tape_ref {
public: public:
really_inline tape_ref() noexcept; simdjson_really_inline tape_ref() noexcept;
really_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; simdjson_really_inline tape_ref(const dom::document *doc, size_t json_index) noexcept;
inline size_t after_element() const noexcept; inline size_t after_element() const noexcept;
really_inline tape_type tape_ref_type() const noexcept; simdjson_really_inline tape_type tape_ref_type() const noexcept;
really_inline uint64_t tape_value() const noexcept; simdjson_really_inline uint64_t tape_value() const noexcept;
really_inline bool is_double() const noexcept; simdjson_really_inline bool is_double() const noexcept;
really_inline bool is_int64() const noexcept; simdjson_really_inline bool is_int64() const noexcept;
really_inline bool is_uint64() const noexcept; simdjson_really_inline bool is_uint64() const noexcept;
really_inline bool is_false() const noexcept; simdjson_really_inline bool is_false() const noexcept;
really_inline bool is_true() const noexcept; simdjson_really_inline bool is_true() const noexcept;
really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. simdjson_really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null.
really_inline uint32_t matching_brace_index() const noexcept; simdjson_really_inline uint32_t matching_brace_index() const noexcept;
really_inline uint32_t scope_count() const noexcept; simdjson_really_inline uint32_t scope_count() const noexcept;
template<typename T> template<typename T>
really_inline T next_tape_value() const noexcept; simdjson_really_inline T next_tape_value() const noexcept;
really_inline uint32_t get_string_length() const noexcept; simdjson_really_inline uint32_t get_string_length() const noexcept;
really_inline const char * get_c_str() const noexcept; simdjson_really_inline const char * get_c_str() const noexcept;
inline std::string_view get_string_view() const noexcept; inline std::string_view get_string_view() const noexcept;
simdjson_really_inline bool is_document_root() const noexcept;
/** The document this element references. */ /** The document this element references. */
const dom::document *doc; const dom::document *doc;

View File

@ -24,7 +24,7 @@ namespace simdjson {
* @param dst_len the number of bytes written. Output only. * @param dst_len the number of bytes written. Output only.
* @return the error code, or SUCCESS if there was no error. * @return the error code, or SUCCESS if there was no error.
*/ */
WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; SIMDJSON_WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept;
/** /**
* Minifies a JSON element or document, printing the smallest possible valid JSON. * Minifies a JSON element or document, printing the smallest possible valid JSON.

View File

@ -9,7 +9,7 @@ int main(int argc, char *argv[]) {
} }
const char * filename = argv[1]; const char * filename = argv[1];
simdjson::dom::parser parser; simdjson::dom::parser parser;
UNUSED simdjson::dom::element elem; simdjson::dom::element elem;
auto error = parser.load(filename).get(elem); // do the parsing auto error = parser.load(filename).get(elem); // do the parsing
if (error) { if (error) {
std::cout << "parse failed" << std::endl; std::cout << "parse failed" << std::endl;
@ -17,7 +17,7 @@ int main(int argc, char *argv[]) {
std::cout << error << std::endl; std::cout << error << std::endl;
return EXIT_FAILURE; return EXIT_FAILURE;
} else { } else {
std::cout << "parse valid" << std::endl; std::cout << "parse valid: " << elem << std::endl;
} }
if(argc == 2) { if(argc == 2) {
return EXIT_SUCCESS; return EXIT_SUCCESS;

View File

@ -8,7 +8,7 @@ namespace arm64 {
// but the algorithms do not end up using the returned value. // but the algorithms do not end up using the returned value.
// Sadly, sanitizers are not smart enough to figure it out. // Sadly, sanitizers are not smart enough to figure it out.
NO_SANITIZE_UNDEFINED NO_SANITIZE_UNDEFINED
really_inline int trailing_zeroes(uint64_t input_num) { simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long ret; unsigned long ret;
// Search the mask data from least significant bit (LSB) // Search the mask data from least significant bit (LSB)
@ -21,12 +21,12 @@ really_inline int trailing_zeroes(uint64_t input_num) {
} }
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline uint64_t clear_lowest_bit(uint64_t input_num) { simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
return input_num & (input_num-1); return input_num & (input_num-1);
} }
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline int leading_zeroes(uint64_t input_num) { simdjson_really_inline int leading_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long leading_zero = 0; unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB) // Search the mask data from most significant bit (MSB)
@ -41,11 +41,11 @@ really_inline int leading_zeroes(uint64_t input_num) {
} }
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline int count_ones(uint64_t input_num) { simdjson_really_inline int count_ones(uint64_t input_num) {
return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
} }
really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
*result = value1 + value2; *result = value1 + value2;
return *result < value1; return *result < value1;

View File

@ -9,7 +9,7 @@ namespace arm64 {
// //
// For example, prefix_xor(00100100) == 00011100 // For example, prefix_xor(00100100) == 00011100
// //
really_inline uint64_t prefix_xor(uint64_t bitmask) { simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) {
///////////// /////////////
// We could do this with PMULL, but it is apparently slow. // We could do this with PMULL, but it is apparently slow.
// //

View File

@ -11,17 +11,17 @@ namespace SIMDJSON_IMPLEMENTATION {
using namespace simd; using namespace simd;
struct json_character_block { struct json_character_block {
static really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in); static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
really_inline uint64_t whitespace() const { return _whitespace; } simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
really_inline uint64_t op() const { return _op; } simdjson_really_inline uint64_t op() const { return _op; }
really_inline uint64_t scalar() { return ~(op() | whitespace()); } simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
uint64_t _whitespace; uint64_t _whitespace;
uint64_t _op; uint64_t _op;
}; };
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) { simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
// Functional programming causes trouble with Visual Studio. // Functional programming causes trouble with Visual Studio.
// Keeping this version in comments since it is much nicer: // Keeping this version in comments since it is much nicer:
// auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) { // auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
@ -75,12 +75,12 @@ really_inline json_character_block json_character_block::classify(const simd::si
return { whitespace, op }; return { whitespace, op };
} }
really_inline bool is_ascii(const simd8x64<uint8_t>& input) { simdjson_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
simd8<uint8_t> bits = input.reduce_or(); simd8<uint8_t> bits = input.reduce_or();
return bits.max() < 0b10000000u; return bits.max() < 0b10000000u;
} }
UNUSED really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { SIMDJSON_UNUSED simdjson_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<bool> is_second_byte = prev1 >= uint8_t(0b11000000u); simd8<bool> is_second_byte = prev1 >= uint8_t(0b11000000u);
simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u); simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u); simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
@ -92,7 +92,7 @@ UNUSED really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1
return is_second_byte ^ is_third_byte ^ is_fourth_byte; return is_second_byte ^ is_third_byte ^ is_fourth_byte;
} }
really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u); simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u); simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
return is_third_byte ^ is_fourth_byte; return is_third_byte ^ is_fourth_byte;
@ -121,7 +121,7 @@ namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
namespace stage1 { namespace stage1 {
really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
// On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no
// benefit and therefore makes things worse. // benefit and therefore makes things worse.
// if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
@ -130,33 +130,33 @@ really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
} // namespace stage1 } // namespace stage1
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { SIMDJSON_WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
} }
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf; this->buf = _buf;
this->len = _len; this->len = _len;
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
} }
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { SIMDJSON_WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return arm64::stage1::generic_validate_utf8(buf,len); return arm64::stage1::generic_validate_utf8(buf,len);
} }
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(*doc); stage2::tape_builder builder(*doc);
return stage2::structural_parser::parse<false>(*this, builder); return stage2::structural_parser::parse<false>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(_doc); stage2::tape_builder builder(_doc);
return stage2::structural_parser::parse<true>(*this, builder); return stage2::structural_parser::parse<true>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
auto error = stage1(_buf, _len, false); auto error = stage1(_buf, _len, false);
if (error) { return error; } if (error) { return error; }
return stage2(_doc); return stage2(_doc);

View File

@ -4,7 +4,7 @@
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
WARN_UNUSED error_code implementation::create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_depth, size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst

View File

@ -12,14 +12,14 @@ using namespace simdjson::dom;
class implementation final : public simdjson::implementation { class implementation final : public simdjson::implementation {
public: public:
really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {} simdjson_really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {}
WARN_UNUSED error_code create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_length, size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final; ) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; SIMDJSON_WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final; SIMDJSON_WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
}; };
} // namespace arm64 } // namespace arm64

View File

@ -6,7 +6,7 @@ namespace arm64 {
// we don't have SSE, so let us use a scalar function // we don't have SSE, so let us use a scalar function
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
static really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
uint64_t val; uint64_t val;
memcpy(&val, chars, sizeof(uint64_t)); memcpy(&val, chars, sizeof(uint64_t));
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;

View File

@ -27,7 +27,7 @@ namespace {
* You should not use this function except for compile-time constants: * You should not use this function except for compile-time constants:
* it is not efficient. * it is not efficient.
*/ */
really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, simdjson_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4,
uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8,
uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12,
uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) {
@ -58,7 +58,7 @@ really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3,
// We have to do the same work for make_int8x16_t // We have to do the same work for make_int8x16_t
really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4,
int8_t x5, int8_t x6, int8_t x7, int8_t x8, int8_t x5, int8_t x6, int8_t x7, int8_t x8,
int8_t x9, int8_t x10, int8_t x11, int8_t x12, int8_t x9, int8_t x10, int8_t x11, int8_t x12,
int8_t x13, int8_t x14, int8_t x15, int8_t x16) { int8_t x13, int8_t x14, int8_t x15, int8_t x16) {
@ -104,24 +104,24 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
static const int SIZE = sizeof(value); static const int SIZE = sizeof(value);
// Conversion from/to SIMD register // Conversion from/to SIMD register
really_inline base_u8(const uint8x16_t _value) : value(_value) {} simdjson_really_inline base_u8(const uint8x16_t _value) : value(_value) {}
really_inline operator const uint8x16_t&() const { return this->value; } simdjson_really_inline operator const uint8x16_t&() const { return this->value; }
really_inline operator uint8x16_t&() { return this->value; } simdjson_really_inline operator uint8x16_t&() { return this->value; }
// Bit operations // Bit operations
really_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); } simdjson_really_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); }
really_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); } simdjson_really_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); }
really_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); } simdjson_really_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); }
really_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); } simdjson_really_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); }
really_inline simd8<T> operator~() const { return *this ^ 0xFFu; } simdjson_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
really_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast | other; return *this_cast; } simdjson_really_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast | other; return *this_cast; }
really_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast & other; return *this_cast; } simdjson_really_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast & other; return *this_cast; }
really_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast ^ other; return *this_cast; } simdjson_really_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast ^ other; return *this_cast; }
really_inline Mask operator==(const simd8<T> other) const { return vceqq_u8(*this, other); } simdjson_really_inline Mask operator==(const simd8<T> other) const { return vceqq_u8(*this, other); }
template<int N=1> template<int N=1>
really_inline simd8<T> prev(const simd8<T> prev_chunk) const { simdjson_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
return vextq_u8(prev_chunk, *this, 16 - N); return vextq_u8(prev_chunk, *this, 16 - N);
} }
}; };
@ -132,17 +132,17 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
typedef uint16_t bitmask_t; typedef uint16_t bitmask_t;
typedef uint32_t bitmask2_t; typedef uint32_t bitmask2_t;
static really_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } static simdjson_really_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); }
really_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {} simdjson_really_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {}
// False constructor // False constructor
really_inline simd8() : simd8(vdupq_n_u8(0)) {} simdjson_really_inline simd8() : simd8(vdupq_n_u8(0)) {}
// Splat constructor // Splat constructor
really_inline simd8(bool _value) : simd8(splat(_value)) {} simdjson_really_inline simd8(bool _value) : simd8(splat(_value)) {}
// We return uint32_t instead of uint16_t because that seems to be more efficient for most // We return uint32_t instead of uint16_t because that seems to be more efficient for most
// purposes (cutting it down to uint16_t costs performance in some compilers). // purposes (cutting it down to uint16_t costs performance in some compilers).
really_inline uint32_t to_bitmask() const { simdjson_really_inline uint32_t to_bitmask() const {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
@ -156,26 +156,26 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
tmp = vpaddq_u8(tmp, tmp); tmp = vpaddq_u8(tmp, tmp);
return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
} }
really_inline bool any() const { return vmaxvq_u8(*this) != 0; } simdjson_really_inline bool any() const { return vmaxvq_u8(*this) != 0; }
}; };
// Unsigned bytes // Unsigned bytes
template<> template<>
struct simd8<uint8_t>: base_u8<uint8_t> { struct simd8<uint8_t>: base_u8<uint8_t> {
static really_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } static simdjson_really_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); }
static really_inline uint8x16_t zero() { return vdupq_n_u8(0); } static simdjson_really_inline uint8x16_t zero() { return vdupq_n_u8(0); }
static really_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } static simdjson_really_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); }
really_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {} simdjson_really_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {}
// Zero constructor // Zero constructor
really_inline simd8() : simd8(zero()) {} simdjson_really_inline simd8() : simd8(zero()) {}
// Array constructor // Array constructor
really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} simdjson_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
// Splat constructor // Splat constructor
really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
// Member-by-member initialization // Member-by-member initialization
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
really_inline simd8( simdjson_really_inline simd8(
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
) : simd8(make_uint8x16_t( ) : simd8(make_uint8x16_t(
@ -183,7 +183,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
v8, v9, v10,v11,v12,v13,v14,v15 v8, v9, v10,v11,v12,v13,v14,v15
)) {} )) {}
#else #else
really_inline simd8( simdjson_really_inline simd8(
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
) : simd8(uint8x16_t{ ) : simd8(uint8x16_t{
@ -193,7 +193,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
#endif #endif
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
really_inline static simd8<uint8_t> repeat_16( simdjson_really_inline static simd8<uint8_t> repeat_16(
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
) { ) {
@ -204,44 +204,44 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
} }
// Store to array // Store to array
really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } simdjson_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
// Saturated math // Saturated math
really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); } simdjson_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); } simdjson_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
// Addition/subtraction are the same for signed and unsigned // Addition/subtraction are the same for signed and unsigned
really_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); } simdjson_really_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); }
really_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); } simdjson_really_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); }
really_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; } simdjson_really_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; }
really_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; } simdjson_really_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
// Order-specific operations // Order-specific operations
really_inline uint8_t max() const { return vmaxvq_u8(*this); } simdjson_really_inline uint8_t max() const { return vmaxvq_u8(*this); }
really_inline uint8_t min() const { return vminvq_u8(*this); } simdjson_really_inline uint8_t min() const { return vminvq_u8(*this); }
really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); } simdjson_really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return vminq_u8(*this, other); } simdjson_really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); } simdjson_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); } simdjson_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); } simdjson_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); } simdjson_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); }
// Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); } simdjson_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
// Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); } simdjson_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
// Bit-specific operations // Bit-specific operations
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); } simdjson_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
really_inline bool any_bits_set_anywhere() const { return this->max() != 0; } simdjson_really_inline bool any_bits_set_anywhere() const { return this->max() != 0; }
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); } simdjson_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
template<int N> template<int N>
really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); } simdjson_really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
template<int N> template<int N>
really_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); } simdjson_really_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
// Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
template<typename L> template<typename L>
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const { simdjson_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
return lookup_table.apply_lookup_16_to(*this); return lookup_table.apply_lookup_16_to(*this);
} }
@ -254,7 +254,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
// signature simd8<L> compress(uint16_t mask) would be // signature simd8<L> compress(uint16_t mask) would be
// sensible, but the AVX ISA makes this kind of approach difficult. // sensible, but the AVX ISA makes this kind of approach difficult.
template<typename L> template<typename L>
really_inline void compress(uint16_t mask, L * output) const { simdjson_really_inline void compress(uint16_t mask, L * output) const {
// this particular implementation was inspired by work done by @animetosho // this particular implementation was inspired by work done by @animetosho
// we do it in two steps, first 8 bytes and then second 8 bytes // we do it in two steps, first 8 bytes and then second 8 bytes
uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask1 = uint8_t(mask); // least significant 8 bits
@ -286,7 +286,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
} }
template<typename L> template<typename L>
really_inline simd8<L> lookup_16( simdjson_really_inline simd8<L> lookup_16(
L replace0, L replace1, L replace2, L replace3, L replace0, L replace1, L replace2, L replace3,
L replace4, L replace5, L replace6, L replace7, L replace4, L replace5, L replace6, L replace7,
L replace8, L replace9, L replace10, L replace11, L replace8, L replace9, L replace10, L replace11,
@ -300,7 +300,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
} }
template<typename T> template<typename T>
really_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) { simdjson_really_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) {
return vqtbl1q_u8(*this, simd8<uint8_t>(original)); return vqtbl1q_u8(*this, simd8<uint8_t>(original));
} }
}; };
@ -310,24 +310,24 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
struct simd8<int8_t> { struct simd8<int8_t> {
int8x16_t value; int8x16_t value;
static really_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); } static simdjson_really_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); }
static really_inline simd8<int8_t> zero() { return vdupq_n_s8(0); } static simdjson_really_inline simd8<int8_t> zero() { return vdupq_n_s8(0); }
static really_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); } static simdjson_really_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); }
// Conversion from/to SIMD register // Conversion from/to SIMD register
really_inline simd8(const int8x16_t _value) : value{_value} {} simdjson_really_inline simd8(const int8x16_t _value) : value{_value} {}
really_inline operator const int8x16_t&() const { return this->value; } simdjson_really_inline operator const int8x16_t&() const { return this->value; }
really_inline operator int8x16_t&() { return this->value; } simdjson_really_inline operator int8x16_t&() { return this->value; }
// Zero constructor // Zero constructor
really_inline simd8() : simd8(zero()) {} simdjson_really_inline simd8() : simd8(zero()) {}
// Splat constructor // Splat constructor
really_inline simd8(int8_t _value) : simd8(splat(_value)) {} simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
// Array constructor // Array constructor
really_inline simd8(const int8_t* values) : simd8(load(values)) {} simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
// Member-by-member initialization // Member-by-member initialization
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
really_inline simd8( simdjson_really_inline simd8(
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
) : simd8(make_int8x16_t( ) : simd8(make_int8x16_t(
@ -335,7 +335,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
v8, v9, v10,v11,v12,v13,v14,v15 v8, v9, v10,v11,v12,v13,v14,v15
)) {} )) {}
#else #else
really_inline simd8( simdjson_really_inline simd8(
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
) : simd8(int8x16_t{ ) : simd8(int8x16_t{
@ -344,7 +344,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
}) {} }) {}
#endif #endif
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
really_inline static simd8<int8_t> repeat_16( simdjson_really_inline static simd8<int8_t> repeat_16(
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
) { ) {
@ -355,7 +355,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
} }
// Store to array // Store to array
really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } simdjson_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); }
// Explicit conversion to/from unsigned // Explicit conversion to/from unsigned
// //
@ -363,35 +363,35 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
// In theory, we could check this occurence with std::same_as and std::enabled_if but it is C++14 // In theory, we could check this occurence with std::same_as and std::enabled_if but it is C++14
// and relatively ugly and hard to read. // and relatively ugly and hard to read.
#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO #ifndef SIMDJSON_REGULAR_VISUAL_STUDIO
really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} simdjson_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
#endif #endif
really_inline explicit operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); } simdjson_really_inline explicit operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); }
// Math // Math
really_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(*this, other); } simdjson_really_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(*this, other); }
really_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(*this, other); } simdjson_really_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(*this, other); }
really_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; } simdjson_really_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; }
really_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; } simdjson_really_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; }
// Order-sensitive comparisons // Order-sensitive comparisons
really_inline simd8<int8_t> max(const simd8<int8_t> other) const { return vmaxq_s8(*this, other); } simdjson_really_inline simd8<int8_t> max(const simd8<int8_t> other) const { return vmaxq_s8(*this, other); }
really_inline simd8<int8_t> min(const simd8<int8_t> other) const { return vminq_s8(*this, other); } simdjson_really_inline simd8<int8_t> min(const simd8<int8_t> other) const { return vminq_s8(*this, other); }
really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(*this, other); } simdjson_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(*this, other); }
really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(*this, other); } simdjson_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(*this, other); }
really_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(*this, other); } simdjson_really_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(*this, other); }
template<int N=1> template<int N=1>
really_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const { simdjson_really_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const {
return vextq_s8(prev_chunk, *this, 16 - N); return vextq_s8(prev_chunk, *this, 16 - N);
} }
// Perform a lookup assuming no value is larger than 16 // Perform a lookup assuming no value is larger than 16
template<typename L> template<typename L>
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const { simdjson_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
return lookup_table.apply_lookup_16_to(*this); return lookup_table.apply_lookup_16_to(*this);
} }
template<typename L> template<typename L>
really_inline simd8<L> lookup_16( simdjson_really_inline simd8<L> lookup_16(
L replace0, L replace1, L replace2, L replace3, L replace0, L replace1, L replace2, L replace3,
L replace4, L replace5, L replace6, L replace7, L replace4, L replace5, L replace6, L replace7,
L replace8, L replace9, L replace10, L replace11, L replace8, L replace9, L replace10, L replace11,
@ -405,7 +405,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
} }
template<typename T> template<typename T>
really_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) { simdjson_really_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) {
return vqtbl1q_s8(*this, simd8<uint8_t>(original)); return vqtbl1q_s8(*this, simd8<uint8_t>(original));
} }
}; };
@ -420,29 +420,29 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
simd8x64() = delete; // no default constructor allowed simd8x64() = delete; // no default constructor allowed
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {} simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
really_inline void store(T ptr[64]) const { simdjson_really_inline void store(T ptr[64]) const {
this->chunks[0].store(ptr+sizeof(simd8<T>)*0); this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
this->chunks[1].store(ptr+sizeof(simd8<T>)*1); this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
this->chunks[2].store(ptr+sizeof(simd8<T>)*2); this->chunks[2].store(ptr+sizeof(simd8<T>)*2);
this->chunks[3].store(ptr+sizeof(simd8<T>)*3); this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
} }
really_inline simd8<T> reduce_or() const { simdjson_really_inline simd8<T> reduce_or() const {
return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
} }
really_inline void compress(uint64_t mask, T * output) const { simdjson_really_inline void compress(uint64_t mask, T * output) const {
this->chunks[0].compress(uint16_t(mask), output); this->chunks[0].compress(uint16_t(mask), output);
this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF));
this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF));
this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
} }
really_inline uint64_t to_bitmask() const { simdjson_really_inline uint64_t to_bitmask() const {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
const uint8x16_t bit_mask = make_uint8x16_t( const uint8x16_t bit_mask = make_uint8x16_t(
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
@ -462,7 +462,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
} }
really_inline simd8x64<T> bit_or(const T m) const { simdjson_really_inline simd8x64<T> bit_or(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<T>( return simd8x64<T>(
this->chunks[0] | mask, this->chunks[0] | mask,
@ -472,7 +472,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
); );
} }
really_inline uint64_t eq(const T m) const { simdjson_really_inline uint64_t eq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>( return simd8x64<bool>(
this->chunks[0] == mask, this->chunks[0] == mask,
@ -482,7 +482,7 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
).to_bitmask(); ).to_bitmask();
} }
really_inline uint64_t lteq(const T m) const { simdjson_really_inline uint64_t lteq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>( return simd8x64<bool>(
this->chunks[0] <= mask, this->chunks[0] <= mask,

View File

@ -14,18 +14,18 @@ using namespace simd;
struct backslash_and_quote { struct backslash_and_quote {
public: public:
static constexpr uint32_t BYTES_PROCESSED = 32; static constexpr uint32_t BYTES_PROCESSED = 32;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
really_inline bool has_backslash() { return bs_bits != 0; } simdjson_really_inline bool has_backslash() { return bs_bits != 0; }
really_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); }
really_inline int backslash_index() { return trailing_zeroes(bs_bits); } simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); }
uint32_t bs_bits; uint32_t bs_bits;
uint32_t quote_bits; uint32_t quote_bits;
}; // struct backslash_and_quote }; // struct backslash_and_quote
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require // this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding // SIMDJSON_PADDING of padding
static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");

View File

@ -25,7 +25,7 @@ static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
#endif #endif
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline int leading_zeroes(uint64_t input_num) { simdjson_really_inline int leading_zeroes(uint64_t input_num) {
#ifdef _MSC_VER #ifdef _MSC_VER
unsigned long leading_zero = 0; unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB) // Search the mask data from most significant bit (MSB)

View File

@ -14,7 +14,7 @@ namespace stage1 {
class structural_scanner { class structural_scanner {
public: public:
really_inline structural_scanner(dom_parser_implementation &_parser, bool _partial) simdjson_really_inline structural_scanner(dom_parser_implementation &_parser, bool _partial)
: buf{_parser.buf}, : buf{_parser.buf},
next_structural_index{_parser.structural_indexes.get()}, next_structural_index{_parser.structural_indexes.get()},
parser{_parser}, parser{_parser},
@ -22,18 +22,18 @@ really_inline structural_scanner(dom_parser_implementation &_parser, bool _parti
partial{_partial} { partial{_partial} {
} }
really_inline void add_structural() { simdjson_really_inline void add_structural() {
*next_structural_index = idx; *next_structural_index = idx;
next_structural_index++; next_structural_index++;
} }
really_inline bool is_continuation(uint8_t c) { simdjson_really_inline bool is_continuation(uint8_t c) {
return (c & 0b11000000) == 0b10000000; return (c & 0b11000000) == 0b10000000;
} }
really_inline void validate_utf8_character() { simdjson_really_inline void validate_utf8_character() {
// Continuation // Continuation
if (unlikely((buf[idx] & 0b01000000) == 0)) { if (simdjson_unlikely((buf[idx] & 0b01000000) == 0)) {
// extra continuation // extra continuation
error = UTF8_ERROR; error = UTF8_ERROR;
idx++; idx++;
@ -43,7 +43,7 @@ really_inline void validate_utf8_character() {
// 2-byte // 2-byte
if ((buf[idx] & 0b00100000) == 0) { if ((buf[idx] & 0b00100000) == 0) {
// missing continuation // missing continuation
if (unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) {
if (idx+1 > len && partial) { idx = len; return; } if (idx+1 > len && partial) { idx = len; return; }
error = UTF8_ERROR; error = UTF8_ERROR;
idx++; idx++;
@ -58,7 +58,7 @@ really_inline void validate_utf8_character() {
// 3-byte // 3-byte
if ((buf[idx] & 0b00010000) == 0) { if ((buf[idx] & 0b00010000) == 0) {
// missing continuation // missing continuation
if (unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) {
if (idx+2 > len && partial) { idx = len; return; } if (idx+2 > len && partial) { idx = len; return; }
error = UTF8_ERROR; error = UTF8_ERROR;
idx++; idx++;
@ -74,7 +74,7 @@ really_inline void validate_utf8_character() {
// 4-byte // 4-byte
// missing continuation // missing continuation
if (unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) {
if (idx+2 > len && partial) { idx = len; return; } if (idx+2 > len && partial) { idx = len; return; }
error = UTF8_ERROR; error = UTF8_ERROR;
idx++; idx++;
@ -92,12 +92,12 @@ really_inline void validate_utf8_character() {
idx += 4; idx += 4;
} }
really_inline void validate_string() { simdjson_really_inline void validate_string() {
idx++; // skip first quote idx++; // skip first quote
while (idx < len && buf[idx] != '"') { while (idx < len && buf[idx] != '"') {
if (buf[idx] == '\\') { if (buf[idx] == '\\') {
idx += 2; idx += 2;
} else if (unlikely(buf[idx] & 0b10000000)) { } else if (simdjson_unlikely(buf[idx] & 0b10000000)) {
validate_utf8_character(); validate_utf8_character();
} else { } else {
if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; }
@ -107,7 +107,7 @@ really_inline void validate_string() {
if (idx >= len && !partial) { error = UNCLOSED_STRING; } if (idx >= len && !partial) { error = UNCLOSED_STRING; }
} }
really_inline bool is_whitespace_or_operator(uint8_t c) { simdjson_really_inline bool is_whitespace_or_operator(uint8_t c) {
switch (c) { switch (c) {
case '{': case '}': case '[': case ']': case ',': case ':': case '{': case '}': case '[': case ']': case ',': case ':':
case ' ': case '\r': case '\n': case '\t': case ' ': case '\r': case '\n': case '\t':
@ -120,7 +120,7 @@ really_inline bool is_whitespace_or_operator(uint8_t c) {
// //
// Parse the entire input in STEP_SIZE-byte chunks. // Parse the entire input in STEP_SIZE-byte chunks.
// //
really_inline error_code scan() { simdjson_really_inline error_code scan() {
for (;idx<len;idx++) { for (;idx<len;idx++) {
switch (buf[idx]) { switch (buf[idx]) {
// String // String
@ -153,7 +153,7 @@ really_inline error_code scan() {
parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get()); parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get());
parser.next_structural_index = 0; parser.next_structural_index = 0;
if (unlikely(parser.n_structural_indexes == 0)) { if (simdjson_unlikely(parser.n_structural_indexes == 0)) {
return EMPTY; return EMPTY;
} }
@ -180,7 +180,7 @@ private:
} // namespace stage1 } // namespace stage1
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool partial) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool partial) noexcept {
this->buf = _buf; this->buf = _buf;
this->len = _len; this->len = _len;
stage1::structural_scanner scanner(*this, partial); stage1::structural_scanner scanner(*this, partial);
@ -222,7 +222,7 @@ static uint8_t jump_table[256 * 3] = {
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
}; };
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { SIMDJSON_WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
size_t i = 0, pos = 0; size_t i = 0, pos = 0;
uint8_t quote = 0; uint8_t quote = 0;
uint8_t nonescape = 1; uint8_t nonescape = 1;
@ -244,7 +244,7 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
} }
// credit: based on code from Google Fuchsia (Apache Licensed) // credit: based on code from Google Fuchsia (Apache Licensed)
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { SIMDJSON_WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
const uint8_t *data = (const uint8_t *)buf; const uint8_t *data = (const uint8_t *)buf;
uint64_t pos = 0; uint64_t pos = 0;
uint64_t next_pos = 0; uint64_t next_pos = 0;
@ -321,19 +321,19 @@ WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) cons
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(*doc); stage2::tape_builder builder(*doc);
return stage2::structural_parser::parse<false>(*this, builder); return stage2::structural_parser::parse<false>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(_doc); stage2::tape_builder builder(_doc);
return stage2::structural_parser::parse<true>(*this, builder); return stage2::structural_parser::parse<true>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
auto error = stage1(_buf, _len, false); auto error = stage1(_buf, _len, false);
if (error) { return error; } if (error) { return error; }
return stage2(_doc); return stage2(_doc);

View File

@ -4,7 +4,7 @@
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
WARN_UNUSED error_code implementation::create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_depth, size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst

View File

@ -12,18 +12,18 @@ using namespace simdjson::dom;
class implementation final : public simdjson::implementation { class implementation final : public simdjson::implementation {
public: public:
really_inline implementation() : simdjson::implementation( simdjson_really_inline implementation() : simdjson::implementation(
"fallback", "fallback",
"Generic fallback implementation", "Generic fallback implementation",
0 0
) {} ) {}
WARN_UNUSED error_code create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_length, size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final; ) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; SIMDJSON_WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final; SIMDJSON_WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
}; };
} // namespace fallback } // namespace fallback

View File

@ -10,14 +10,14 @@ void found_float(double result, const uint8_t *buf);
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
static really_inline uint32_t parse_eight_digits_unrolled(const char *chars) { static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) {
uint32_t result = 0; uint32_t result = 0;
for (int i=0;i<8;i++) { for (int i=0;i<8;i++) {
result = result*10 + (chars[i] - '0'); result = result*10 + (chars[i] - '0');
} }
return result; return result;
} }
static really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
return parse_eight_digits_unrolled((const char *)chars); return parse_eight_digits_unrolled((const char *)chars);
} }

View File

@ -10,17 +10,17 @@ namespace fallback {
struct backslash_and_quote { struct backslash_and_quote {
public: public:
static constexpr uint32_t BYTES_PROCESSED = 1; static constexpr uint32_t BYTES_PROCESSED = 1;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return c == '"'; } simdjson_really_inline bool has_quote_first() { return c == '"'; }
really_inline bool has_backslash() { return c == '\\'; } simdjson_really_inline bool has_backslash() { return c == '\\'; }
really_inline int quote_index() { return c == '"' ? 0 : 1; } simdjson_really_inline int quote_index() { return c == '"' ? 0 : 1; }
really_inline int backslash_index() { return c == '\\' ? 0 : 1; } simdjson_really_inline int backslash_index() { return c == '\\' ? 0 : 1; }
uint8_t c; uint8_t c;
}; // struct backslash_and_quote }; // struct backslash_and_quote
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// store to dest unconditionally - we can overwrite the bits we don't like later // store to dest unconditionally - we can overwrite the bits we don't like later
dst[0] = src[0]; dst[0] = src[0];
return { src[0] }; return { src[0] };

View File

@ -23,17 +23,17 @@ public:
/** Document passed to stage 2 */ /** Document passed to stage 2 */
dom::document *doc{}; dom::document *doc{};
really_inline dom_parser_implementation(); simdjson_really_inline dom_parser_implementation();
dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation(const dom_parser_implementation &) = delete;
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; SIMDJSON_WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; SIMDJSON_WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final;
WARN_UNUSED error_code check_for_unclosed_array() noexcept; SIMDJSON_WARN_UNUSED error_code check_for_unclosed_array() noexcept;
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final; SIMDJSON_WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
WARN_UNUSED error_code stage2_next(dom::document &doc) noexcept final; SIMDJSON_WARN_UNUSED error_code stage2_next(dom::document &doc) noexcept final;
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final; SIMDJSON_WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final; SIMDJSON_WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
}; };
} // namespace SIMDJSON_IMPLEMENTATION } // namespace SIMDJSON_IMPLEMENTATION
@ -45,17 +45,17 @@ public:
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
really_inline dom_parser_implementation::dom_parser_implementation() {} simdjson_really_inline dom_parser_implementation::dom_parser_implementation() {}
// Leaving these here so they can be inlined if so desired // Leaving these here so they can be inlined if so desired
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
error_code err = stage1::allocate::set_capacity(*this, capacity); error_code err = stage1::allocate::set_capacity(*this, capacity);
if (err) { _capacity = 0; return err; } if (err) { _capacity = 0; return err; }
_capacity = capacity; _capacity = capacity;
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
error_code err = stage2::allocate::set_max_depth(*this, max_depth); error_code err = stage2::allocate::set_max_depth(*this, max_depth);
if (err) { _max_depth = 0; return err; } if (err) { _max_depth = 0; return err; }
_max_depth = max_depth; _max_depth = max_depth;

View File

@ -6,8 +6,8 @@ namespace allocate {
// //
// Allocates stage 1 internal state and outputs in the parser // Allocates stage 1 internal state and outputs in the parser
// //
really_inline error_code set_capacity(internal::dom_parser_implementation &parser, size_t capacity) { simdjson_really_inline error_code set_capacity(internal::dom_parser_implementation &parser, size_t capacity) {
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7; size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
parser.structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); parser.structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
if (!parser.structural_indexes) { return MEMALLOC; } if (!parser.structural_indexes) { return MEMALLOC; }
parser.structural_indexes[0] = 0; parser.structural_indexes[0] = 0;

View File

@ -5,10 +5,10 @@ namespace SIMDJSON_IMPLEMENTATION {
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
struct buf_block_reader { struct buf_block_reader {
public: public:
really_inline buf_block_reader(const uint8_t *_buf, size_t _len); simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
really_inline size_t block_index(); simdjson_really_inline size_t block_index();
really_inline bool has_full_block() const; simdjson_really_inline bool has_full_block() const;
really_inline const uint8_t *full_block() const; simdjson_really_inline const uint8_t *full_block() const;
/** /**
* Get the last block, padded with spaces. * Get the last block, padded with spaces.
* *
@ -18,8 +18,8 @@ public:
* *
* @return the number of effective characters in the last block. * @return the number of effective characters in the last block.
*/ */
really_inline size_t get_remainder(uint8_t *dst) const; simdjson_really_inline size_t get_remainder(uint8_t *dst) const;
really_inline void advance(); simdjson_really_inline void advance();
private: private:
const uint8_t *buf; const uint8_t *buf;
const size_t len; const size_t len;
@ -28,7 +28,7 @@ private:
}; };
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
UNUSED static char * format_input_text_64(const uint8_t *text) { SIMDJSON_UNUSED static char * format_input_text_64(const uint8_t *text) {
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1); static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
@ -38,7 +38,7 @@ UNUSED static char * format_input_text_64(const uint8_t *text) {
} }
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
UNUSED static char * format_input_text(const simd8x64<uint8_t>& in) { SIMDJSON_UNUSED static char * format_input_text(const simd8x64<uint8_t>& in) {
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1); static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
in.store((uint8_t*)buf); in.store((uint8_t*)buf);
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
@ -48,7 +48,7 @@ UNUSED static char * format_input_text(const simd8x64<uint8_t>& in) {
return buf; return buf;
} }
UNUSED static char * format_mask(uint64_t mask) { SIMDJSON_UNUSED static char * format_mask(uint64_t mask) {
static char *buf = (char*)malloc(64 + 1); static char *buf = (char*)malloc(64 + 1);
for (size_t i=0; i<64; i++) { for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
@ -58,30 +58,30 @@ UNUSED static char * format_mask(uint64_t mask) {
} }
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} simdjson_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; } simdjson_really_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const { simdjson_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
return idx < lenminusstep; return idx < lenminusstep;
} }
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const { simdjson_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
return &buf[idx]; return &buf[idx];
} }
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const { simdjson_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
memset(dst, 0x20, STEP_SIZE); // memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. memset(dst, 0x20, STEP_SIZE); // memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
memcpy(dst, buf + idx, len - idx); memcpy(dst, buf + idx, len - idx);
return len - idx; return len - idx;
} }
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline void buf_block_reader<STEP_SIZE>::advance() { simdjson_really_inline void buf_block_reader<STEP_SIZE>::advance() {
idx += STEP_SIZE; idx += STEP_SIZE;
} }

View File

@ -26,7 +26,7 @@ namespace SIMDJSON_IMPLEMENTATION {
* complete document, therefore the last json buffer location is the end of the * complete document, therefore the last json buffer location is the end of the
* batch. * batch.
*/ */
really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
// TODO don't count separately, just figure out depth // TODO don't count separately, just figure out depth
auto arr_cnt = 0; auto arr_cnt = 0;
auto obj_cnt = 0; auto obj_cnt = 0;

View File

@ -13,24 +13,24 @@ public:
static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
private: private:
really_inline json_minifier(uint8_t *_dst) simdjson_really_inline json_minifier(uint8_t *_dst)
: dst{_dst} : dst{_dst}
{} {}
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block);
really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
json_scanner scanner{}; json_scanner scanner{};
uint8_t *dst; uint8_t *dst;
}; };
really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) { simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) {
uint64_t mask = block.whitespace(); uint64_t mask = block.whitespace();
in.compress(mask, dst); in.compress(mask, dst);
dst += 64 - count_ones(mask); dst += 64 - count_ones(mask);
} }
really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
*dst = '\0'; *dst = '\0';
error_code error = scanner.finish(false); error_code error = scanner.finish(false);
if (error) { dst_len = 0; return error; } if (error) { dst_len = 0; return error; }
@ -39,7 +39,7 @@ really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_l
} }
template<> template<>
really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
simd::simd8x64<uint8_t> in_1(block_buf); simd::simd8x64<uint8_t> in_1(block_buf);
simd::simd8x64<uint8_t> in_2(block_buf+64); simd::simd8x64<uint8_t> in_2(block_buf+64);
json_block block_1 = scanner.next(in_1); json_block block_1 = scanner.next(in_1);
@ -50,7 +50,7 @@ really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_
} }
template<> template<>
really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
simd::simd8x64<uint8_t> in_1(block_buf); simd::simd8x64<uint8_t> in_1(block_buf);
json_block block_1 = scanner.next(in_1); json_block block_1 = scanner.next(in_1);
this->next(block_buf, block_1); this->next(block_buf, block_1);
@ -69,7 +69,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
// Index the last (remainder) block, padded with spaces // Index the last (remainder) block, padded with spaces
uint8_t block[STEP_SIZE]; uint8_t block[STEP_SIZE];
if (likely(reader.get_remainder(block)) > 0) { if (simdjson_likely(reader.get_remainder(block)) > 0) {
minifier.step<STEP_SIZE>(block, reader); minifier.step<STEP_SIZE>(block, reader);
} }

View File

@ -25,16 +25,16 @@ public:
* The start of structurals. * The start of structurals.
* In simdjson prior to v0.3, these were called the pseudo-structural characters. * In simdjson prior to v0.3, these were called the pseudo-structural characters.
**/ **/
really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); } simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); }
/** All JSON whitespace (i.e. not in a string) */ /** All JSON whitespace (i.e. not in a string) */
really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); } simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); }
// Helpers // Helpers
/** Whether the given characters are inside a string (only works on non-quotes) */ /** Whether the given characters are inside a string (only works on non-quotes) */
really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); } simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); }
/** Whether the given characters are outside a string (only works on non-quotes) */ /** Whether the given characters are outside a string (only works on non-quotes) */
really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); } simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); }
// string and escape characters // string and escape characters
json_string_block _string; json_string_block _string;
@ -49,12 +49,12 @@ private:
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
* They may reside inside a string. * They may reside inside a string.
**/ **/
really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); } simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); }
/** /**
* The start of non-operator runs, like 123, true and "abc". * The start of non-operator runs, like 123, true and "abc".
* It main reside inside a string. * It main reside inside a string.
**/ **/
really_inline uint64_t potential_scalar_start() { simdjson_really_inline uint64_t potential_scalar_start() {
// The term "scalar" refers to anything except structural characters and white space // The term "scalar" refers to anything except structural characters and white space
// (so letters, numbers, quotes). // (so letters, numbers, quotes).
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
@ -65,7 +65,7 @@ private:
* Whether the given character is immediately after a non-operator like 123, true. * Whether the given character is immediately after a non-operator like 123, true.
* The characters following a quote are not included. * The characters following a quote are not included.
*/ */
really_inline uint64_t follows_potential_scalar() { simdjson_really_inline uint64_t follows_potential_scalar() {
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
// white space. // white space.
@ -90,8 +90,8 @@ private:
class json_scanner { class json_scanner {
public: public:
json_scanner() {} json_scanner() {}
really_inline json_block next(const simd::simd8x64<uint8_t>& in); simdjson_really_inline json_block next(const simd::simd8x64<uint8_t>& in);
really_inline error_code finish(bool streaming); simdjson_really_inline error_code finish(bool streaming);
private: private:
// Whether the last character of the previous iteration is part of a scalar token // Whether the last character of the previous iteration is part of a scalar token
@ -108,13 +108,13 @@ private:
// //
// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
// //
really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
const uint64_t result = match << 1 | overflow; const uint64_t result = match << 1 | overflow;
overflow = match >> 63; overflow = match >> 63;
return result; return result;
} }
really_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) { simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
json_string_block strings = string_scanner.next(in); json_string_block strings = string_scanner.next(in);
// identifies the white-space and the structurat characters // identifies the white-space and the structurat characters
json_character_block characters = json_character_block::classify(in); json_character_block characters = json_character_block::classify(in);
@ -137,7 +137,7 @@ really_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
}; };
} }
really_inline error_code json_scanner::finish(bool streaming) { simdjson_really_inline error_code json_scanner::finish(bool streaming) {
return string_scanner.finish(streaming); return string_scanner.finish(streaming);
} }

View File

@ -4,23 +4,23 @@ namespace stage1 {
struct json_string_block { struct json_string_block {
// Escaped characters (characters following an escape() character) // Escaped characters (characters following an escape() character)
really_inline uint64_t escaped() const { return _escaped; } simdjson_really_inline uint64_t escaped() const { return _escaped; }
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
really_inline uint64_t escape() const { return _backslash & ~_escaped; } simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; }
// Real (non-backslashed) quotes // Real (non-backslashed) quotes
really_inline uint64_t quote() const { return _quote; } simdjson_really_inline uint64_t quote() const { return _quote; }
// Start quotes of strings // Start quotes of strings
really_inline uint64_t string_start() const { return _quote & _in_string; } simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; }
// End quotes of strings // End quotes of strings
really_inline uint64_t string_end() const { return _quote & ~_in_string; } simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; }
// Only characters inside the string (not including the quotes) // Only characters inside the string (not including the quotes)
really_inline uint64_t string_content() const { return _in_string & ~_quote; } simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; }
// Return a mask of whether the given characters are inside a string (only works on non-quotes) // Return a mask of whether the given characters are inside a string (only works on non-quotes)
really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
// Return a mask of whether the given characters are inside a string (only works on non-quotes) // Return a mask of whether the given characters are inside a string (only works on non-quotes)
really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
// Tail of string (everything except the start quote) // Tail of string (everything except the start quote)
really_inline uint64_t string_tail() const { return _in_string ^ _quote; } simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; }
// backslash characters // backslash characters
uint64_t _backslash; uint64_t _backslash;
@ -35,13 +35,13 @@ struct json_string_block {
// Scans blocks for string characters, storing the state necessary to do so // Scans blocks for string characters, storing the state necessary to do so
class json_string_scanner { class json_string_scanner {
public: public:
really_inline json_string_block next(const simd::simd8x64<uint8_t>& in); simdjson_really_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
really_inline error_code finish(bool streaming); simdjson_really_inline error_code finish(bool streaming);
private: private:
// Intended to be defined by the implementation // Intended to be defined by the implementation
really_inline uint64_t find_escaped(uint64_t escape); simdjson_really_inline uint64_t find_escaped(uint64_t escape);
really_inline uint64_t find_escaped_branchless(uint64_t escape); simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape);
// Whether the last iteration was still inside a string (all 1's = true, all 0's = false). // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
uint64_t prev_in_string = 0ULL; uint64_t prev_in_string = 0ULL;
@ -76,7 +76,7 @@ private:
// desired | x | x x x x x x x x | // desired | x | x x x x x x x x |
// text | \\\ | \\\"\\\" \\\" \\"\\" | // text | \\\ | \\\"\\\" \\\" \\"\\" |
// //
really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
// If there was overflow, pretend the first character isn't a backslash // If there was overflow, pretend the first character isn't a backslash
backslash &= ~prev_escaped; backslash &= ~prev_escaped;
uint64_t follows_escape = backslash << 1 | prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped;
@ -101,7 +101,7 @@ really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t bac
// //
// Backslash sequences outside of quotes will be detected in stage 2. // Backslash sequences outside of quotes will be detected in stage 2.
// //
really_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) { simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
const uint64_t backslash = in.eq('\\'); const uint64_t backslash = in.eq('\\');
const uint64_t escaped = find_escaped(backslash); const uint64_t escaped = find_escaped(backslash);
const uint64_t quote = in.eq('"') & ~escaped; const uint64_t quote = in.eq('"') & ~escaped;
@ -131,7 +131,7 @@ really_inline json_string_block json_string_scanner::next(const simd::simd8x64<u
}; };
} }
really_inline error_code json_string_scanner::finish(bool streaming) { simdjson_really_inline error_code json_string_scanner::finish(bool streaming) {
if (prev_in_string and (not streaming)) { if (prev_in_string and (not streaming)) {
return UNCLOSED_STRING; return UNCLOSED_STRING;
} }

View File

@ -17,14 +17,14 @@ class bit_indexer {
public: public:
uint32_t *tail; uint32_t *tail;
really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
// flatten out values in 'bits' assuming that they are are to have values of idx // flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at // plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go // base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr // will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this // needs to be large enough to handle this
really_inline void write(uint32_t idx, uint64_t bits) { simdjson_really_inline void write(uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted. // In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases, // Unfortunately, in other cases,
// it helps tremendously. // it helps tremendously.
@ -40,7 +40,7 @@ public:
// Do the next 8 all together (we hope in most cases it won't happen at all // Do the next 8 all together (we hope in most cases it won't happen at all
// and the branch is easily predicted). // and the branch is easily predicted).
if (unlikely(cnt > 8)) { if (simdjson_unlikely(cnt > 8)) {
for (int i=8; i<16; i++) { for (int i=8; i<16; i++) {
this->tail[i] = idx + trailing_zeroes(bits); this->tail[i] = idx + trailing_zeroes(bits);
bits = clear_lowest_bit(bits); bits = clear_lowest_bit(bits);
@ -49,7 +49,7 @@ public:
// Most files don't have 16+ structurals per block, so we take several basically guaranteed // Most files don't have 16+ structurals per block, so we take several basically guaranteed
// branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
// or the start of a value ("abc" true 123) every four characters. // or the start of a value ("abc" true 123) every four characters.
if (unlikely(cnt > 16)) { if (simdjson_unlikely(cnt > 16)) {
int i = 16; int i = 16;
do { do {
this->tail[i] = idx + trailing_zeroes(bits); this->tail[i] = idx + trailing_zeroes(bits);
@ -76,11 +76,11 @@ public:
static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept; static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept;
private: private:
really_inline json_structural_indexer(uint32_t *structural_indexes); simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx);
really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
json_scanner scanner{}; json_scanner scanner{};
utf8_checker checker{}; utf8_checker checker{};
@ -89,11 +89,11 @@ private:
uint64_t unescaped_chars_error = 0; uint64_t unescaped_chars_error = 0;
}; };
really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
// Skip the last character if it is partial // Skip the last character if it is partial
really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
if (unlikely(len < 3)) { if (simdjson_unlikely(len < 3)) {
switch (len) { switch (len) {
case 2: case 2:
if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
@ -131,7 +131,7 @@ really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
// //
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept { error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept {
if (unlikely(len > parser.capacity())) { return CAPACITY; } if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
if (partial) { len = trim_partial_utf8(buf, len); } if (partial) { len = trim_partial_utf8(buf, len); }
buf_block_reader<STEP_SIZE> reader(buf, len); buf_block_reader<STEP_SIZE> reader(buf, len);
@ -144,14 +144,14 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa
// Take care of the last block (will always be there unless file is empty) // Take care of the last block (will always be there unless file is empty)
uint8_t block[STEP_SIZE]; uint8_t block[STEP_SIZE];
if (unlikely(reader.get_remainder(block) == 0)) { return EMPTY; } if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return EMPTY; }
indexer.step<STEP_SIZE>(block, reader); indexer.step<STEP_SIZE>(block, reader);
return indexer.finish(parser, reader.block_index(), len, partial); return indexer.finish(parser, reader.block_index(), len, partial);
} }
template<> template<>
really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
simd::simd8x64<uint8_t> in_1(block); simd::simd8x64<uint8_t> in_1(block);
simd::simd8x64<uint8_t> in_2(block+64); simd::simd8x64<uint8_t> in_2(block+64);
json_block block_1 = scanner.next(in_1); json_block block_1 = scanner.next(in_1);
@ -162,14 +162,14 @@ really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_
} }
template<> template<>
really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
simd::simd8x64<uint8_t> in_1(block); simd::simd8x64<uint8_t> in_1(block);
json_block block_1 = scanner.next(in_1); json_block block_1 = scanner.next(in_1);
this->next(in_1, block_1, reader.block_index()); this->next(in_1, block_1, reader.block_index());
reader.advance(); reader.advance();
} }
really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) { simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) {
uint64_t unescaped = in.lteq(0x1F); uint64_t unescaped = in.lteq(0x1F);
checker.check_next_input(in); checker.check_next_input(in);
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
@ -177,12 +177,12 @@ really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>&
unescaped_chars_error |= block.non_quote_inside_string(unescaped); unescaped_chars_error |= block.non_quote_inside_string(unescaped);
} }
really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial) { simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial) {
// Write out the final iteration's structurals // Write out the final iteration's structurals
indexer.write(uint32_t(idx-64), prev_structurals); indexer.write(uint32_t(idx-64), prev_structurals);
error_code error = scanner.finish(partial); error_code error = scanner.finish(partial);
if (unlikely(error != SUCCESS)) { return error; } if (simdjson_unlikely(error != SUCCESS)) { return error; }
if (unescaped_chars_error) { if (unescaped_chars_error) {
return UNESCAPED_CHARS; return UNESCAPED_CHARS;
@ -208,10 +208,10 @@ really_inline error_code json_structural_indexer::finish(dom_parser_implementati
parser.structural_indexes[parser.n_structural_indexes + 2] = 0; parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
parser.next_structural_index = 0; parser.next_structural_index = 0;
// a valid JSON file cannot have zero structural indexes - we should have found something // a valid JSON file cannot have zero structural indexes - we should have found something
if (unlikely(parser.n_structural_indexes == 0u)) { if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
return EMPTY; return EMPTY;
} }
if (unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
return UNEXPECTED_ERROR; return UNEXPECTED_ERROR;
} }
if (partial) { if (partial) {

View File

@ -33,12 +33,12 @@ struct utf8_checker {
processed_utf_bytes previous; processed_utf_bytes previous;
// all byte values must be no larger than 0xF4 // all byte values must be no larger than 0xF4
really_inline void check_smaller_than_0xF4(const simd8<uint8_t> current_bytes) { simdjson_really_inline void check_smaller_than_0xF4(const simd8<uint8_t> current_bytes) {
// unsigned, saturates to 0 below max // unsigned, saturates to 0 below max
this->has_error |= current_bytes.saturating_sub(0xF4u); this->has_error |= current_bytes.saturating_sub(0xF4u);
} }
really_inline simd8<int8_t> continuation_lengths(const simd8<int8_t> high_nibbles) { simdjson_really_inline simd8<int8_t> continuation_lengths(const simd8<int8_t> high_nibbles) {
return high_nibbles.lookup_16<int8_t>( return high_nibbles.lookup_16<int8_t>(
1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
0, 0, 0, 0, // 10xx (continuation) 0, 0, 0, 0, // 10xx (continuation)
@ -47,7 +47,7 @@ struct utf8_checker {
4); // 1111, next should be 0 (not checked here) 4); // 1111, next should be 0 (not checked here)
} }
really_inline simd8<int8_t> carry_continuations(const simd8<int8_t>& initial_lengths) { simdjson_really_inline simd8<int8_t> carry_continuations(const simd8<int8_t>& initial_lengths) {
simd8<int8_t> prev_carried_continuations = initial_lengths.prev(this->previous.carried_continuations); simd8<int8_t> prev_carried_continuations = initial_lengths.prev(this->previous.carried_continuations);
simd8<int8_t> right1 = simd8<int8_t>(simd8<uint8_t>(prev_carried_continuations).saturating_sub(1)); simd8<int8_t> right1 = simd8<int8_t>(simd8<uint8_t>(prev_carried_continuations).saturating_sub(1));
simd8<int8_t> sum = initial_lengths + right1; simd8<int8_t> sum = initial_lengths + right1;
@ -57,7 +57,7 @@ struct utf8_checker {
return sum + right2; return sum + right2;
} }
really_inline void check_continuations(const simd8<int8_t>& initial_lengths, const simd8<int8_t>& carries) { simdjson_really_inline void check_continuations(const simd8<int8_t>& initial_lengths, const simd8<int8_t>& carries) {
// overlap || underlap // overlap || underlap
// carry > length && length > 0 || !(carry > length) && !(length > 0) // carry > length && length > 0 || !(carry > length) && !(length > 0)
// (carries > length) == (lengths > 0) // (carries > length) == (lengths > 0)
@ -66,7 +66,7 @@ struct utf8_checker {
(carries > initial_lengths) == (initial_lengths > simd8<int8_t>::zero())); (carries > initial_lengths) == (initial_lengths > simd8<int8_t>::zero()));
} }
really_inline void check_carried_continuations() { simdjson_really_inline void check_carried_continuations() {
static const int8_t last_1[32] = { static const int8_t last_1[32] = {
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@ -79,7 +79,7 @@ struct utf8_checker {
// when 0xED is found, next byte must be no larger than 0x9F // when 0xED is found, next byte must be no larger than 0x9F
// when 0xF4 is found, next byte must be no larger than 0x8F // when 0xF4 is found, next byte must be no larger than 0x8F
// next byte must be continuation, ie sign bit is set, so signed < is ok // next byte must be continuation, ie sign bit is set, so signed < is ok
really_inline void check_first_continuation_max(const simd8<uint8_t> current_bytes, simdjson_really_inline void check_first_continuation_max(const simd8<uint8_t> current_bytes,
const simd8<uint8_t> off1_current_bytes) { const simd8<uint8_t> off1_current_bytes) {
simd8<bool> prev_ED = off1_current_bytes == 0xEDu; simd8<bool> prev_ED = off1_current_bytes == 0xEDu;
simd8<bool> prev_F4 = off1_current_bytes == 0xF4u; simd8<bool> prev_F4 = off1_current_bytes == 0xF4u;
@ -97,7 +97,7 @@ struct utf8_checker {
// E => < E1 && < A0 // E => < E1 && < A0
// F => < F1 && < 90 // F => < F1 && < 90
// else false && false // else false && false
really_inline void check_overlong(const simd8<uint8_t> current_bytes, simdjson_really_inline void check_overlong(const simd8<uint8_t> current_bytes,
const simd8<uint8_t> off1_current_bytes, const simd8<uint8_t> off1_current_bytes,
const simd8<int8_t>& high_nibbles) { const simd8<int8_t>& high_nibbles) {
simd8<int8_t> off1_high_nibbles = high_nibbles.prev(this->previous.high_nibbles); simd8<int8_t> off1_high_nibbles = high_nibbles.prev(this->previous.high_nibbles);
@ -128,14 +128,14 @@ struct utf8_checker {
this->has_error |= simd8<uint8_t>(initial_under & second_under); this->has_error |= simd8<uint8_t>(initial_under & second_under);
} }
really_inline void count_nibbles(simd8<uint8_t> bytes, struct processed_utf_bytes *answer) { simdjson_really_inline void count_nibbles(simd8<uint8_t> bytes, struct processed_utf_bytes *answer) {
answer->raw_bytes = bytes; answer->raw_bytes = bytes;
answer->high_nibbles = simd8<int8_t>(bytes.shr<4>()); answer->high_nibbles = simd8<int8_t>(bytes.shr<4>());
} }
// check whether the current bytes are valid UTF-8 // check whether the current bytes are valid UTF-8
// at the end of the function, previous gets updated // at the end of the function, previous gets updated
really_inline void check_utf8_bytes(const simd8<uint8_t> current_bytes) { simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> current_bytes) {
struct processed_utf_bytes pb {}; struct processed_utf_bytes pb {};
this->count_nibbles(current_bytes, &pb); this->count_nibbles(current_bytes, &pb);
@ -154,17 +154,17 @@ struct utf8_checker {
this->previous = pb; this->previous = pb;
} }
really_inline void check_next_input(Dconst simd8<uint8_t> in) { simdjson_really_inline void check_next_input(Dconst simd8<uint8_t> in) {
if (likely(!in.any_bits_set_anywhere(0x80u))) { if (simdjson_likely(!in.any_bits_set_anywhere(0x80u))) {
this->check_carried_continuations(); this->check_carried_continuations();
} else { } else {
this->check_utf8_bytes(in); this->check_utf8_bytes(in);
} }
} }
really_inline void check_next_input(const simd8x64<uint8_t>& in) { simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& in) {
simd8<uint8_t> bits = in.reduce_or(); simd8<uint8_t> bits = in.reduce_or();
if (likely(!bits.any_bits_set_anywhere(0x80u))) { if (simdjson_likely(!bits.any_bits_set_anywhere(0x80u))) {
// it is ascii, we just check carried continuations. // it is ascii, we just check carried continuations.
this->check_carried_continuations(); this->check_carried_continuations();
} else { } else {
@ -175,7 +175,7 @@ struct utf8_checker {
} }
} }
really_inline error_code errors() { simdjson_really_inline error_code errors() {
return this->has_error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->has_error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }
}; // struct utf8_checker }; // struct utf8_checker

View File

@ -80,7 +80,7 @@ using namespace simd;
// 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together. // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together.
// If all 3 lookups detect the same error, it's an error. // If all 3 lookups detect the same error, it's an error.
// //
really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) { simdjson_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
// //
// These are the errors we're going to match for bytes 1-2, by looking at the first three // These are the errors we're going to match for bytes 1-2, by looking at the first three
// nibbles of the character: <high bits of byte 1>> & <low bits of byte 1> & <high bits of byte 2> // nibbles of the character: <high bits of byte 1>> & <low bits of byte 1> & <high bits of byte 2>
@ -143,7 +143,7 @@ using namespace simd;
return byte_1_high & byte_1_low & byte_2_high; return byte_1_high & byte_1_low & byte_2_high;
} }
really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input, const simd8<uint8_t> prev_input, const simd8<uint8_t> prev1) { simdjson_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input, const simd8<uint8_t> prev_input, const simd8<uint8_t> prev1) {
simd8<uint8_t> prev2 = input.prev<2>(prev_input); simd8<uint8_t> prev2 = input.prev<2>(prev_input);
simd8<uint8_t> prev3 = input.prev<3>(prev_input); simd8<uint8_t> prev3 = input.prev<3>(prev_input);
@ -157,7 +157,7 @@ using namespace simd;
// Return nonzero if there are incomplete multibyte characters at the end of the block: // Return nonzero if there are incomplete multibyte characters at the end of the block:
// e.g. if there is a 4-byte character, but it's 3 bytes from the end. // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
// //
really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) { simdjson_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
// If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
// ... 1111____ 111_____ 11______ // ... 1111____ 111_____ 11______
static const uint8_t max_array[32] = { static const uint8_t max_array[32] = {
@ -181,7 +181,7 @@ using namespace simd;
// //
// Check whether the current bytes are valid UTF-8. // Check whether the current bytes are valid UTF-8.
// //
really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) { simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
// (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
simd8<uint8_t> prev1 = input.prev<1>(prev_input); simd8<uint8_t> prev1 = input.prev<1>(prev_input);
@ -190,13 +190,13 @@ using namespace simd;
} }
// The only problem that can happen at EOF is that a multibyte character is too short. // The only problem that can happen at EOF is that a multibyte character is too short.
really_inline void check_eof() { simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them. // possibly finish them.
this->error |= this->prev_incomplete; this->error |= this->prev_incomplete;
} }
really_inline void check_next_input(const simd8x64<uint8_t>& input) { simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
if (likely(is_ascii(input))) { if (likely(is_ascii(input))) {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them. // possibly finish them.
@ -211,7 +211,7 @@ using namespace simd;
} }
} }
really_inline error_code errors() { simdjson_really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }

View File

@ -80,7 +80,7 @@ using namespace simd;
// 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together. // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together.
// If all 3 lookups detect the same error, it's an error. // If all 3 lookups detect the same error, it's an error.
// //
really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) { simdjson_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
// //
// These are the errors we're going to match for bytes 1-2, by looking at the first three // These are the errors we're going to match for bytes 1-2, by looking at the first three
// nibbles of the character: <high bits of byte 1>> & <low bits of byte 1> & <high bits of byte 2> // nibbles of the character: <high bits of byte 1>> & <low bits of byte 1> & <high bits of byte 2>
@ -154,7 +154,7 @@ using namespace simd;
return byte_1_high & byte_1_low & byte_2_high; return byte_1_high & byte_1_low & byte_2_high;
} }
really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input, const simd8<uint8_t> prev_input, simdjson_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input, const simd8<uint8_t> prev_input,
simd8<uint8_t> prev1) { simd8<uint8_t> prev1) {
simd8<uint8_t> prev2 = input.prev<2>(prev_input); simd8<uint8_t> prev2 = input.prev<2>(prev_input);
simd8<uint8_t> prev3 = input.prev<3>(prev_input); simd8<uint8_t> prev3 = input.prev<3>(prev_input);
@ -169,7 +169,7 @@ using namespace simd;
// Return nonzero if there are incomplete multibyte characters at the end of the block: // Return nonzero if there are incomplete multibyte characters at the end of the block:
// e.g. if there is a 4-byte character, but it's 3 bytes from the end. // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
// //
really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) { simdjson_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
// If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
// ... 1111____ 111_____ 11______ // ... 1111____ 111_____ 11______
static const uint8_t max_array[32] = { static const uint8_t max_array[32] = {
@ -193,7 +193,7 @@ using namespace simd;
// //
// Check whether the current bytes are valid UTF-8. // Check whether the current bytes are valid UTF-8.
// //
really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) { simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
// (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
simd8<uint8_t> prev1 = input.prev<1>(prev_input); simd8<uint8_t> prev1 = input.prev<1>(prev_input);
@ -202,14 +202,14 @@ using namespace simd;
} }
// The only problem that can happen at EOF is that a multibyte character is too short. // The only problem that can happen at EOF is that a multibyte character is too short.
really_inline void check_eof() { simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them. // possibly finish them.
this->error |= this->prev_incomplete; this->error |= this->prev_incomplete;
} }
really_inline void check_next_input(const simd8x64<uint8_t>& input) { simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
if(likely(is_ascii(input))) { if(simdjson_likely(is_ascii(input))) {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them. // possibly finish them.
this->error |= this->prev_incomplete; this->error |= this->prev_incomplete;
@ -231,7 +231,7 @@ using namespace simd;
} }
} }
really_inline error_code errors() { simdjson_really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }

View File

@ -4,7 +4,7 @@ namespace utf8_validation {
using namespace simd; using namespace simd;
really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) { simdjson_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
// Bit 1 = Too Long (ASCII followed by continuation) // Bit 1 = Too Long (ASCII followed by continuation)
// Bit 2 = Overlong 3-byte // Bit 2 = Overlong 3-byte
@ -94,7 +94,7 @@ using namespace simd;
); );
return (byte_1_high & byte_1_low & byte_2_high); return (byte_1_high & byte_1_low & byte_2_high);
} }
really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input, simdjson_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) { const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
simd8<uint8_t> prev2 = input.prev<2>(prev_input); simd8<uint8_t> prev2 = input.prev<2>(prev_input);
simd8<uint8_t> prev3 = input.prev<3>(prev_input); simd8<uint8_t> prev3 = input.prev<3>(prev_input);
@ -107,7 +107,7 @@ using namespace simd;
// Return nonzero if there are incomplete multibyte characters at the end of the block: // Return nonzero if there are incomplete multibyte characters at the end of the block:
// e.g. if there is a 4-byte character, but it's 3 bytes from the end. // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
// //
really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) { simdjson_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
// If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
// ... 1111____ 111_____ 11______ // ... 1111____ 111_____ 11______
static const uint8_t max_array[32] = { static const uint8_t max_array[32] = {
@ -131,7 +131,7 @@ using namespace simd;
// //
// Check whether the current bytes are valid UTF-8. // Check whether the current bytes are valid UTF-8.
// //
really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) { simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
// (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
simd8<uint8_t> prev1 = input.prev<1>(prev_input); simd8<uint8_t> prev1 = input.prev<1>(prev_input);
@ -140,14 +140,14 @@ using namespace simd;
} }
// The only problem that can happen at EOF is that a multibyte character is too short. // The only problem that can happen at EOF is that a multibyte character is too short.
really_inline void check_eof() { simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them. // possibly finish them.
this->error |= this->prev_incomplete; this->error |= this->prev_incomplete;
} }
really_inline void check_next_input(const simd8x64<uint8_t>& input) { simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
if(likely(is_ascii(input))) { if(simdjson_likely(is_ascii(input))) {
this->error |= this->prev_incomplete; this->error |= this->prev_incomplete;
} else { } else {
// you might think that a for-loop would work, but under Visual Studio, it is not good enough. // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@ -168,7 +168,7 @@ using namespace simd;
} }
} }
really_inline error_code errors() { simdjson_really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
} }

View File

@ -95,7 +95,7 @@ struct utf8_checker {
static const uint8_t LEAD_1111 = 0x80; // [1111]____ ... static const uint8_t LEAD_1111 = 0x80; // [1111]____ ...
// Prepare fast_path_error in case the next block is ASCII // Prepare fast_path_error in case the next block is ASCII
really_inline void set_fast_path_error() { simdjson_really_inline void set_fast_path_error() {
// If any of the last 3 bytes in the input needs a continuation at the start of the next input, // If any of the last 3 bytes in the input needs a continuation at the start of the next input,
// it is an error for the next input to be ASCII. // it is an error for the next input to be ASCII.
// static const uint8_t incomplete_long[32] = { // static const uint8_t incomplete_long[32] = {
@ -119,7 +119,7 @@ struct utf8_checker {
this->prev_incomplete = this->prev_input_block.saturating_sub(max_value); this->prev_incomplete = this->prev_input_block.saturating_sub(max_value);
} }
really_inline simd8<uint8_t> get_lead_flags(const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) { simdjson_really_inline simd8<uint8_t> get_lead_flags(const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) {
// Total: 2 instructions, 1 constant // Total: 2 instructions, 1 constant
// - 1 byte shift (shuffle) // - 1 byte shift (shuffle)
// - 1 table lookup (shuffle) // - 1 table lookup (shuffle)
@ -138,7 +138,7 @@ struct utf8_checker {
} }
// Find errors in bytes 1 and 2 together (one single multi-nibble &) // Find errors in bytes 1 and 2 together (one single multi-nibble &)
really_inline simd8<uint8_t> get_byte_1_2_errors(const simd8<uint8_t> input, const simd8<uint8_t> prev_input, const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) { simdjson_really_inline simd8<uint8_t> get_byte_1_2_errors(const simd8<uint8_t> input, const simd8<uint8_t> prev_input, const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) {
// //
// These are the errors we're going to match for bytes 1-2, by looking at the first three // These are the errors we're going to match for bytes 1-2, by looking at the first three
// nibbles of the character: lead_flags & <low bits of byte 1> & <high bits of byte 2> // nibbles of the character: lead_flags & <low bits of byte 1> & <high bits of byte 2>
@ -196,7 +196,7 @@ struct utf8_checker {
return byte_1_flags & byte_2_flags; return byte_1_flags & byte_2_flags;
} }
really_inline simd8<uint8_t> get_byte_3_4_5_errors(const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) { simdjson_really_inline simd8<uint8_t> get_byte_3_4_5_errors(const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) {
// Total 7 instructions, 3 simd constants: // Total 7 instructions, 3 simd constants:
// - 3 table lookups (shuffles) // - 3 table lookups (shuffles)
// - 2 byte shifts (shuffles) // - 2 byte shifts (shuffles)
@ -224,7 +224,7 @@ struct utf8_checker {
// Check whether the current bytes are valid UTF-8. // Check whether the current bytes are valid UTF-8.
// At the end of the function, previous gets updated // At the end of the function, previous gets updated
// This should come down to 22 instructions if table definitions are in registers--30 if not. // This should come down to 22 instructions if table definitions are in registers--30 if not.
really_inline simd8<uint8_t> check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) { simdjson_really_inline simd8<uint8_t> check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
// When we process bytes M through N, we look for lead characters in M-4 through N-4. This allows // When we process bytes M through N, we look for lead characters in M-4 through N-4. This allows
// us to look for all errors related to any lead character at one time (since UTF-8 characters // us to look for all errors related to any lead character at one time (since UTF-8 characters
// can only be up to 4 bytes, and the next byte after a character finishes must be another lead, // can only be up to 4 bytes, and the next byte after a character finishes must be another lead,
@ -270,15 +270,15 @@ struct utf8_checker {
// TODO special case start of file, too, so that small documents are efficient! No shifting needed ... // TODO special case start of file, too, so that small documents are efficient! No shifting needed ...
// The only problem that can happen at EOF is that a multibyte character is too short. // The only problem that can happen at EOF is that a multibyte character is too short.
really_inline void check_eof() { simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them. // possibly finish them.
this->error |= this->prev_incomplete; this->error |= this->prev_incomplete;
} }
really_inline void check_next_input(const simd8x64<uint8_t>& input) { simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
simd8<uint8_t> bits = input.reduce_or(); simd8<uint8_t> bits = input.reduce_or();
if (likely(!bits.any_bits_set_anywhere(0b10000000u))) { if (simdjson_likely(!bits.any_bits_set_anywhere(0b10000000u))) {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
// possibly finish them. // possibly finish them.
this->error |= this->prev_incomplete; this->error |= this->prev_incomplete;
@ -292,7 +292,7 @@ struct utf8_checker {
} }
} }
really_inline error_code errors() { simdjson_really_inline error_code errors() {
return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }

View File

@ -31,7 +31,7 @@ struct utf8_checker {
simd8<bool> has_error; simd8<bool> has_error;
processed_utf_bytes previous; processed_utf_bytes previous;
really_inline void check_carried_continuations() { simdjson_really_inline void check_carried_continuations() {
static const int8_t last_len[32] = { static const int8_t last_len[32] = {
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@ -43,7 +43,7 @@ struct utf8_checker {
// check whether the current bytes are valid UTF-8 // check whether the current bytes are valid UTF-8
// at the end of the function, previous gets updated // at the end of the function, previous gets updated
really_inline void check_utf8_bytes(const simd8<uint8_t> current_bytes) { simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> current_bytes) {
/* high_nibbles = input >> 4 */ /* high_nibbles = input >> 4 */
const simd8<uint8_t> high_nibbles = current_bytes.shr<4>(); const simd8<uint8_t> high_nibbles = current_bytes.shr<4>();
@ -156,17 +156,17 @@ struct utf8_checker {
this->previous.first_len = first_len; this->previous.first_len = first_len;
} }
really_inline void check_next_input(const simd8<uint8_t> in) { simdjson_really_inline void check_next_input(const simd8<uint8_t> in) {
if (likely(!in.any_bits_set_anywhere(0x80u))) { if (simdjson_likely(!in.any_bits_set_anywhere(0x80u))) {
this->check_carried_continuations(); this->check_carried_continuations();
} else { } else {
this->check_utf8_bytes(in); this->check_utf8_bytes(in);
} }
} }
really_inline void check_next_input(const simd8x64<uint8_t>& in) { simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& in) {
simd8<uint8_t> bits = in.reduce_or(); simd8<uint8_t> bits = in.reduce_or();
if (likely(!bits.any_bits_set_anywhere(0x80u))) { if (simdjson_likely(!bits.any_bits_set_anywhere(0x80u))) {
// it is ascii, we just check carried continuations. // it is ascii, we just check carried continuations.
this->check_carried_continuations(); this->check_carried_continuations();
} else { } else {
@ -177,7 +177,7 @@ struct utf8_checker {
} }
} }
really_inline error_code errors() { simdjson_really_inline error_code errors() {
return this->has_error.any() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return this->has_error.any() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }
}; // struct utf8_checker }; // struct utf8_checker

View File

@ -139,7 +139,7 @@ struct utf8_checker {
// boundaries, so we need to keep a "carry" mask of the bits that were shifted // boundaries, so we need to keep a "carry" mask of the bits that were shifted
// past the boundary in the last loop iteration. // past the boundary in the last loop iteration.
// //
really_inline void check_length_errors(const simd8<uint8_t> bytes, const vmask_t bit_7) { simdjson_really_inline void check_length_errors(const simd8<uint8_t> bytes, const vmask_t bit_7) {
// Compute the continuation byte mask by finding bytes that start with // Compute the continuation byte mask by finding bytes that start with
// 11x, 111x, and 1111. For each of these prefixes, we get a bitmask // 11x, 111x, and 1111. For each of these prefixes, we get a bitmask
// and shift it forward by 1, 2, or 3. This loop should be unrolled by // and shift it forward by 1, 2, or 3. This loop should be unrolled by
@ -260,7 +260,7 @@ struct utf8_checker {
// bytes, we AND them together. Only when all three have an error bit in common // bytes, we AND them together. Only when all three have an error bit in common
// do we fail validation. // do we fail validation.
// //
really_inline void check_special_cases(const simd8<uint8_t> bytes) { simdjson_really_inline void check_special_cases(const simd8<uint8_t> bytes) {
const simd8<uint8_t> shifted_bytes = bytes.prev<1>(this->prev_bytes); const simd8<uint8_t> shifted_bytes = bytes.prev<1>(this->prev_bytes);
this->prev_bytes = bytes; this->prev_bytes = bytes;
@ -332,14 +332,14 @@ struct utf8_checker {
// check whether the current bytes are valid UTF-8 // check whether the current bytes are valid UTF-8
// at the end of the function, previous gets updated // at the end of the function, previous gets updated
really_inline void check_utf8_bytes(const simd8<uint8_t> bytes, const vmask_t bit_7) { simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> bytes, const vmask_t bit_7) {
this->check_length_errors(bytes, bit_7); this->check_length_errors(bytes, bit_7);
this->check_special_cases(bytes); this->check_special_cases(bytes);
} }
really_inline void check_next_input(const simd8<uint8_t> bytes) { simdjson_really_inline void check_next_input(const simd8<uint8_t> bytes) {
vmask_t bit_7 = bytes.get_bit<7>(); vmask_t bit_7 = bytes.get_bit<7>();
if (unlikely(bit_7)) { if (simdjson_unlikely(bit_7)) {
// TODO (@jkeiser): To work with simdjson's caller model, I moved the calculation of // TODO (@jkeiser): To work with simdjson's caller model, I moved the calculation of
// shifted_bytes inside check_utf8_bytes. I believe this adds an extra instruction to the hot // shifted_bytes inside check_utf8_bytes. I believe this adds an extra instruction to the hot
// path (saving prev_bytes), which is undesirable, though 2 register accesses vs. 1 memory // path (saving prev_bytes), which is undesirable, though 2 register accesses vs. 1 memory
@ -350,13 +350,13 @@ struct utf8_checker {
} }
} }
really_inline void check_next_input(const simd8x64<uint8_t>& in) { simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& in) {
for (int i=0; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) { for (int i=0; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) {
this->check_next_input(in.chunks[i]); this->check_next_input(in.chunks[i]);
} }
} }
really_inline error_code errors() { simdjson_really_inline error_code errors() {
return (this->special_case_errors.any_bits_set_anywhere() | this->length_errors) ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return (this->special_case_errors.any_bits_set_anywhere() | this->length_errors) ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
} }
}; // struct utf8_checker }; // struct utf8_checker

View File

@ -6,7 +6,7 @@ namespace allocate {
// //
// Allocates stage 2 internal state and outputs in the parser // Allocates stage 2 internal state and outputs in the parser
// //
really_inline error_code set_max_depth(dom_parser_implementation &parser, size_t max_depth) { simdjson_really_inline error_code set_max_depth(dom_parser_implementation &parser, size_t max_depth) {
parser.containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]); parser.containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]);
parser.is_array.reset(new (std::nothrow) bool[max_depth]); parser.is_array.reset(new (std::nothrow) bool[max_depth]);

View File

@ -9,50 +9,50 @@ namespace atomparsing {
// You might think that using memcpy makes this function expensive, but you'd be wrong. // You might think that using memcpy makes this function expensive, but you'd be wrong.
// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
// to the compile-time constant 1936482662. // to the compile-time constant 1936482662.
really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
std::memcpy(&srcval, src, sizeof(uint32_t)); std::memcpy(&srcval, src, sizeof(uint32_t));
return srcval ^ string_to_uint32(atom); return srcval ^ string_to_uint32(atom);
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline bool is_valid_true_atom(const uint8_t *src) { simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) {
return (str4ncmp(src, "true") | is_not_structural_or_whitespace(src[4])) == 0; return (str4ncmp(src, "true") | is_not_structural_or_whitespace(src[4])) == 0;
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
if (len > 4) { return is_valid_true_atom(src); } if (len > 4) { return is_valid_true_atom(src); }
else if (len == 4) { return !str4ncmp(src, "true"); } else if (len == 4) { return !str4ncmp(src, "true"); }
else { return false; } else { return false; }
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline bool is_valid_false_atom(const uint8_t *src) { simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) {
return (str4ncmp(src+1, "alse") | is_not_structural_or_whitespace(src[5])) == 0; return (str4ncmp(src+1, "alse") | is_not_structural_or_whitespace(src[5])) == 0;
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
if (len > 5) { return is_valid_false_atom(src); } if (len > 5) { return is_valid_false_atom(src); }
else if (len == 5) { return !str4ncmp(src+1, "alse"); } else if (len == 5) { return !str4ncmp(src+1, "alse"); }
else { return false; } else { return false; }
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline bool is_valid_null_atom(const uint8_t *src) { simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) {
return (str4ncmp(src, "null") | is_not_structural_or_whitespace(src[4])) == 0; return (str4ncmp(src, "null") | is_not_structural_or_whitespace(src[4])) == 0;
} }
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
if (len > 4) { return is_valid_null_atom(src); } if (len > 4) { return is_valid_null_atom(src); }
else if (len == 4) { return !str4ncmp(src, "null"); } else if (len == 4) { return !str4ncmp(src, "null"); }
else { return false; } else { return false; }

View File

@ -4,11 +4,11 @@ namespace stage2 {
// return non-zero if not a structural or whitespace char // return non-zero if not a structural or whitespace char
// zero otherwise // zero otherwise
really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
return structural_or_whitespace_negated[c]; return structural_or_whitespace_negated[c];
} }
really_inline uint32_t is_structural_or_whitespace(uint8_t c) { simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
return structural_or_whitespace[c]; return structural_or_whitespace[c];
} }
@ -39,7 +39,7 @@ static inline uint32_t hex_to_u32_nocheck(
// //
// Note: we assume that surrogates are treated separately // Note: we assume that surrogates are treated separately
// //
really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
if (cp <= 0x7F) { if (cp <= 0x7F) {
c[0] = uint8_t(cp); c[0] = uint8_t(cp);
return 1; // ascii return 1; // ascii
@ -71,10 +71,10 @@ really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm #ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
// this is a slow emulation routine for 32-bit // this is a slow emulation routine for 32-bit
// //
static really_inline uint64_t __emulu(uint32_t x, uint32_t y) { static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) {
return x * (uint64_t)y; return x * (uint64_t)y;
} }
static really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
@ -86,7 +86,7 @@ static really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
} }
#endif #endif
really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
value128 answer; value128 answer;
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
#ifdef _M_ARM64 #ifdef _M_ARM64

View File

@ -15,7 +15,7 @@ namespace logger {
static int log_depth; // Not threadsafe. Log only. static int log_depth; // Not threadsafe. Log only.
// Helper to turn unprintable or newline characters into spaces // Helper to turn unprintable or newline characters into spaces
static really_inline char printable_char(char c) { static simdjson_really_inline char printable_char(char c) {
if (c >= 0x20) { if (c >= 0x20) {
return c; return c;
} else { } else {
@ -24,7 +24,7 @@ namespace logger {
} }
// Print the header and set up log_start // Print the header and set up log_start
static really_inline void log_start() { static simdjson_really_inline void log_start() {
if (LOG_ENABLED) { if (LOG_ENABLED) {
log_depth = 0; log_depth = 0;
printf("\n"); printf("\n");
@ -33,7 +33,7 @@ namespace logger {
} }
} }
static really_inline void log_string(const char *message) { static simdjson_really_inline void log_string(const char *message) {
if (LOG_ENABLED) { if (LOG_ENABLED) {
printf("%s\n", message); printf("%s\n", message);
} }
@ -41,7 +41,7 @@ namespace logger {
// Logs a single line of // Logs a single line of
template<typename S> template<typename S>
static really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
if (LOG_ENABLED) { if (LOG_ENABLED) {
printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;

View File

@ -24,7 +24,7 @@ namespace numberparsing {
// set to false. This should work *most of the time* (like 99% of the time). // set to false. This should work *most of the time* (like 99% of the time).
// We assume that power is in the [FASTFLOAT_SMALLEST_POWER, // We assume that power is in the [FASTFLOAT_SMALLEST_POWER,
// FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check. // FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check.
really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, bool *success) { simdjson_really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, bool *success) {
// we start with a fast path // we start with a fast path
// It was described in // It was described in
// Clinger WD. How to read floating point numbers accurately. // Clinger WD. How to read floating point numbers accurately.
@ -117,7 +117,7 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative,
// know that we have an exact computed value for the leading // know that we have an exact computed value for the leading
// 55 bits because any imprecision would play out as a +1, in // 55 bits because any imprecision would play out as a +1, in
// the worst case. // the worst case.
if (unlikely((upper & 0x1FF) == 0x1FF) && (lower + i < lower)) { if (simdjson_unlikely((upper & 0x1FF) == 0x1FF) && (lower + i < lower)) {
uint64_t factor_mantissa_low = uint64_t factor_mantissa_low =
mantissa_128[power - FASTFLOAT_SMALLEST_POWER]; mantissa_128[power - FASTFLOAT_SMALLEST_POWER];
// next, we compute the 64-bit x 128-bit multiplication, getting a 192-bit // next, we compute the 64-bit x 128-bit multiplication, getting a 192-bit
@ -155,7 +155,7 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative,
// which we guard against. // which we guard against.
// If we have lots of trailing zeros, we may fall right between two // If we have lots of trailing zeros, we may fall right between two
// floating-point values. // floating-point values.
if (unlikely((lower == 0) && ((upper & 0x1FF) == 0) && if (simdjson_unlikely((lower == 0) && ((upper & 0x1FF) == 0) &&
((mantissa & 3) == 1))) { ((mantissa & 3) == 1))) {
// if mantissa & 1 == 1 we might need to round up. // if mantissa & 1 == 1 we might need to round up.
// //
@ -192,7 +192,7 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative,
mantissa &= ~(1ULL << 52); mantissa &= ~(1ULL << 52);
uint64_t real_exponent = c.exp - lz; uint64_t real_exponent = c.exp - lz;
// we have to check that real_exponent is in range, otherwise we bail out // we have to check that real_exponent is in range, otherwise we bail out
if (unlikely((real_exponent < 1) || (real_exponent > 2046))) { if (simdjson_unlikely((real_exponent < 1) || (real_exponent > 2046))) {
*success = false; *success = false;
return 0; return 0;
} }
@ -236,7 +236,7 @@ static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) {
// check quickly whether the next 8 chars are made of digits // check quickly whether the next 8 chars are made of digits
// at a glance, it looks better than Mula's // at a glance, it looks better than Mula's
// http://0x80.pl/articles/swar-digits-validate.html // http://0x80.pl/articles/swar-digits-validate.html
really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
uint64_t val; uint64_t val;
// this can read up to 7 bytes beyond the buffer size, but we require // this can read up to 7 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding // SIMDJSON_PADDING of padding
@ -252,7 +252,7 @@ really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
} }
template<typename W> template<typename W>
bool slow_float_parsing(UNUSED const uint8_t * src, W writer) { bool slow_float_parsing(SIMDJSON_UNUSED const uint8_t * src, W writer) {
double d; double d;
if (parse_float_strtod(src, &d)) { if (parse_float_strtod(src, &d)) {
WRITE_DOUBLE(d, src, writer); WRITE_DOUBLE(d, src, writer);
@ -263,7 +263,7 @@ bool slow_float_parsing(UNUSED const uint8_t * src, W writer) {
template<typename I> template<typename I>
NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
really_inline bool parse_digit(const uint8_t c, I &i) { simdjson_really_inline bool parse_digit(const uint8_t c, I &i) {
const uint8_t digit = static_cast<uint8_t>(c - '0'); const uint8_t digit = static_cast<uint8_t>(c - '0');
if (digit > 9) { if (digit > 9) {
return false; return false;
@ -273,7 +273,7 @@ really_inline bool parse_digit(const uint8_t c, I &i) {
return true; return true;
} }
really_inline bool parse_decimal(UNUSED const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { simdjson_really_inline bool parse_decimal(SIMDJSON_UNUSED const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
// we continue with the fiction that we have an integer. If the // we continue with the fiction that we have an integer. If the
// floating point number is representable as x * 10^z for some integer // floating point number is representable as x * 10^z for some integer
// z that fits in 53 bits, then we will be able to convert back the // z that fits in 53 bits, then we will be able to convert back the
@ -299,7 +299,7 @@ really_inline bool parse_decimal(UNUSED const uint8_t *const src, const uint8_t
return true; return true;
} }
really_inline bool parse_exponent(UNUSED const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { simdjson_really_inline bool parse_exponent(SIMDJSON_UNUSED const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
// Exp Sign: -123.456e[-]78 // Exp Sign: -123.456e[-]78
bool neg_exp = ('-' == *p); bool neg_exp = ('-' == *p);
if (neg_exp || '+' == *p) { p++; } // Skip + as well if (neg_exp || '+' == *p) { p++; } // Skip + as well
@ -319,7 +319,7 @@ really_inline bool parse_exponent(UNUSED const uint8_t *const src, const uint8_t
// instructions for a likely branch, an unconclusive gain. // instructions for a likely branch, an unconclusive gain.
// If there were no digits, it's an error. // If there were no digits, it's an error.
if (unlikely(p == start_exp)) { if (simdjson_unlikely(p == start_exp)) {
return INVALID_NUMBER(src); return INVALID_NUMBER(src);
} }
// We have a valid positive exponent in exp_number at this point, except that // We have a valid positive exponent in exp_number at this point, except that
@ -327,7 +327,7 @@ really_inline bool parse_exponent(UNUSED const uint8_t *const src, const uint8_t
// If there were more than 18 digits, we may have overflowed the integer. We have to do // If there were more than 18 digits, we may have overflowed the integer. We have to do
// something!!!! // something!!!!
if (unlikely(p > start_exp+18)) { if (simdjson_unlikely(p > start_exp+18)) {
// Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
while (*start_exp == '0') { start_exp++; } while (*start_exp == '0') { start_exp++; }
// 19 digits could overflow int64_t and is kind of absurd anyway. We don't // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
@ -351,12 +351,12 @@ really_inline bool parse_exponent(UNUSED const uint8_t *const src, const uint8_t
} }
template<typename W> template<typename W>
really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, int digit_count, int64_t exponent, W &writer) { simdjson_really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, int digit_count, int64_t exponent, W &writer) {
// If we frequently had to deal with long strings of digits, // If we frequently had to deal with long strings of digits,
// we could extend our code by using a 128-bit integer instead // we could extend our code by using a 128-bit integer instead
// of a 64-bit integer. However, this is uncommon in practice. // of a 64-bit integer. However, this is uncommon in practice.
// digit count is off by 1 because of the decimal (assuming there was one). // digit count is off by 1 because of the decimal (assuming there was one).
if (unlikely((digit_count-1 >= 19))) { // this is uncommon if (simdjson_unlikely((digit_count-1 >= 19))) { // this is uncommon
// It is possible that the integer had an overflow. // It is possible that the integer had an overflow.
// We have to handle the case where we have 0.0000somenumber. // We have to handle the case where we have 0.0000somenumber.
const uint8_t *start = start_digits; const uint8_t *start = start_digits;
@ -383,7 +383,7 @@ really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t
// NOTE: it's weird that the unlikely() only wraps half the if, but it seems to get slower any other // NOTE: it's weird that the unlikely() only wraps half the if, but it seems to get slower any other
// way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
// To future reader: we'd love if someone found a better way, or at least could explain this result! // To future reader: we'd love if someone found a better way, or at least could explain this result!
if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) { if (simdjson_unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) {
// this is almost never going to get called!!! // this is almost never going to get called!!!
// we start anew, going slowly!!! // we start anew, going slowly!!!
bool success = slow_float_parsing(src, writer); bool success = slow_float_parsing(src, writer);
@ -406,7 +406,7 @@ really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t
#ifdef SIMDJSON_SKIPNUMBERPARSING #ifdef SIMDJSON_SKIPNUMBERPARSING
template<typename W> template<typename W>
really_inline bool parse_number(const uint8_t *const, W &writer) { simdjson_really_inline bool parse_number(const uint8_t *const, W &writer) {
writer.append_s64(0); // always write zero writer.append_s64(0); // always write zero
return true; // always succeeds return true; // always succeeds
} }
@ -423,7 +423,7 @@ really_inline bool parse_number(const uint8_t *const, W &writer) {
// //
// Our objective is accurate parsing (ULP of 0) at high speed. // Our objective is accurate parsing (ULP of 0) at high speed.
template<typename W> template<typename W>
really_inline bool parse_number(const uint8_t *const src, W &writer) { simdjson_really_inline bool parse_number(const uint8_t *const src, W &writer) {
// //
// Check for minus sign // Check for minus sign

View File

@ -38,8 +38,8 @@ static const uint8_t escape_map[256] = {
// dest will advance a variable amount (return via pointer) // dest will advance a variable amount (return via pointer)
// return true if the unicode codepoint was valid // return true if the unicode codepoint was valid
// We work in little-endian then swap at write time // We work in little-endian then swap at write time
WARN_UNUSED SIMDJSON_WARN_UNUSED
really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
uint8_t **dst_ptr) { uint8_t **dst_ptr) {
// hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
// conversion isn't valid; we defer the check for this to inside the // conversion isn't valid; we defer the check for this to inside the
@ -72,7 +72,7 @@ really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
return offset > 0; return offset > 0;
} }
WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { SIMDJSON_WARN_UNUSED simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
src++; src++;
while (1) { while (1) {
// Copy the next n bytes, and find the backslash and quote in them. // Copy the next n bytes, and find the backslash and quote in them.

View File

@ -9,40 +9,40 @@ public:
dom_parser_implementation &dom_parser; dom_parser_implementation &dom_parser;
// Start a structural // Start a structural
really_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) simdjson_really_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
: buf{_dom_parser.buf}, : buf{_dom_parser.buf},
next_structural{&_dom_parser.structural_indexes[start_structural_index]}, next_structural{&_dom_parser.structural_indexes[start_structural_index]},
dom_parser{_dom_parser} { dom_parser{_dom_parser} {
} }
// Get the buffer position of the current structural character // Get the buffer position of the current structural character
really_inline const uint8_t* current() { simdjson_really_inline const uint8_t* current() {
return &buf[*(next_structural-1)]; return &buf[*(next_structural-1)];
} }
// Get the current structural character // Get the current structural character
really_inline char current_char() { simdjson_really_inline char current_char() {
return buf[*(next_structural-1)]; return buf[*(next_structural-1)];
} }
// Get the next structural character without advancing // Get the next structural character without advancing
really_inline char peek_next_char() { simdjson_really_inline char peek_next_char() {
return buf[*next_structural]; return buf[*next_structural];
} }
really_inline const uint8_t* peek() { simdjson_really_inline const uint8_t* peek() {
return &buf[*next_structural]; return &buf[*next_structural];
} }
really_inline const uint8_t* advance() { simdjson_really_inline const uint8_t* advance() {
return &buf[*(next_structural++)]; return &buf[*(next_structural++)];
} }
really_inline char advance_char() { simdjson_really_inline char advance_char() {
return buf[*(next_structural++)]; return buf[*(next_structural++)];
} }
really_inline size_t remaining_len() { simdjson_really_inline size_t remaining_len() {
return dom_parser.len - *(next_structural-1); return dom_parser.len - *(next_structural-1);
} }
really_inline bool at_end() { simdjson_really_inline bool at_end() {
return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
} }
really_inline bool at_beginning() { simdjson_really_inline bool at_beginning() {
return next_structural == dom_parser.structural_indexes.get(); return next_structural == dom_parser.structural_indexes.get();
} }
}; };

View File

@ -17,24 +17,24 @@ struct structural_parser : structural_iterator {
uint32_t depth{0}; uint32_t depth{0};
template<bool STREAMING, typename T> template<bool STREAMING, typename T>
WARN_UNUSED really_inline error_code parse(T &builder) noexcept; SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse(T &builder) noexcept;
template<bool STREAMING, typename T> template<bool STREAMING, typename T>
WARN_UNUSED static really_inline error_code parse(dom_parser_implementation &dom_parser, T &builder) noexcept { SIMDJSON_WARN_UNUSED static simdjson_really_inline error_code parse(dom_parser_implementation &dom_parser, T &builder) noexcept {
structural_parser parser(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); structural_parser parser(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
return parser.parse<STREAMING>(builder); return parser.parse<STREAMING>(builder);
} }
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations // For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
really_inline structural_parser(dom_parser_implementation &_dom_parser, uint32_t start_structural_index) simdjson_really_inline structural_parser(dom_parser_implementation &_dom_parser, uint32_t start_structural_index)
: structural_iterator(_dom_parser, start_structural_index) { : structural_iterator(_dom_parser, start_structural_index) {
} }
WARN_UNUSED really_inline error_code start_document() { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code start_document() {
dom_parser.is_array[depth] = false; dom_parser.is_array[depth] = false;
return SUCCESS; return SUCCESS;
} }
template<typename T> template<typename T>
WARN_UNUSED really_inline error_code start_array(T &builder) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code start_array(T &builder) {
depth++; depth++;
if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
builder.start_array(*this); builder.start_array(*this);
@ -43,7 +43,7 @@ struct structural_parser : structural_iterator {
} }
template<typename T> template<typename T>
WARN_UNUSED really_inline bool empty_object(T &builder) { SIMDJSON_WARN_UNUSED simdjson_really_inline bool empty_object(T &builder) {
if (peek_next_char() == '}') { if (peek_next_char() == '}') {
advance_char(); advance_char();
builder.empty_object(*this); builder.empty_object(*this);
@ -52,7 +52,7 @@ struct structural_parser : structural_iterator {
return false; return false;
} }
template<typename T> template<typename T>
WARN_UNUSED really_inline bool empty_array(T &builder) { SIMDJSON_WARN_UNUSED simdjson_really_inline bool empty_array(T &builder) {
if (peek_next_char() == ']') { if (peek_next_char() == ']') {
advance_char(); advance_char();
builder.empty_array(*this); builder.empty_array(*this);
@ -62,7 +62,7 @@ struct structural_parser : structural_iterator {
} }
template<bool STREAMING> template<bool STREAMING>
WARN_UNUSED really_inline error_code finish() { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code finish() {
dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
if (depth != 0) { if (depth != 0) {
@ -79,27 +79,31 @@ struct structural_parser : structural_iterator {
return SUCCESS; return SUCCESS;
} }
really_inline void log_value(const char *type) { simdjson_really_inline uint8_t last_structural() {
return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
}
simdjson_really_inline void log_value(const char *type) {
logger::log_line(*this, "", type, ""); logger::log_line(*this, "", type, "");
} }
really_inline void log_start_value(const char *type) { simdjson_really_inline void log_start_value(const char *type) {
logger::log_line(*this, "+", type, ""); logger::log_line(*this, "+", type, "");
if (logger::LOG_ENABLED) { logger::log_depth++; } if (logger::LOG_ENABLED) { logger::log_depth++; }
} }
really_inline void log_end_value(const char *type) { simdjson_really_inline void log_end_value(const char *type) {
if (logger::LOG_ENABLED) { logger::log_depth--; } if (logger::LOG_ENABLED) { logger::log_depth--; }
logger::log_line(*this, "-", type, ""); logger::log_line(*this, "-", type, "");
} }
really_inline void log_error(const char *error) { simdjson_really_inline void log_error(const char *error) {
logger::log_line(*this, "", "ERROR", error); logger::log_line(*this, "", "ERROR", error);
} }
}; // struct structural_parser }; // struct structural_parser
template<bool STREAMING, typename T> template<bool STREAMING, typename T>
WARN_UNUSED really_inline error_code structural_parser::parse(T &builder) noexcept { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code structural_parser::parse(T &builder) noexcept {
logger::log_start(); logger::log_start();
// //
@ -114,18 +118,27 @@ WARN_UNUSED really_inline error_code structural_parser::parse(T &builder) noexce
// //
{ {
const uint8_t *value = advance(); const uint8_t *value = advance();
switch (*value) {
case '{': if (!empty_object(builder)) { goto object_begin; }; break; // Make sure the outer hash or array is closed before continuing; otherwise, there are ways we
case '[': { // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get if (!STREAMING) {
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906 switch (*value) {
if (!STREAMING) { case '{':
if (buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') { if (last_structural() != '}') {
return TAPE_ERROR; return TAPE_ERROR;
} }
} break;
if (!empty_array(builder)) { goto array_begin; }; break; case '[':
if (last_structural() != ']') {
return TAPE_ERROR;
}
break;
} }
}
switch (*value) {
case '{': if (!empty_object(builder)) { goto object_begin; }; break;
case '[': if (!empty_array(builder)) { goto array_begin; }; break;
default: SIMDJSON_TRY( builder.parse_root_primitive(*this, value) ); default: SIMDJSON_TRY( builder.parse_root_primitive(*this, value) );
} }
goto document_end; goto document_end;
@ -151,7 +164,7 @@ object_begin: {
} // object_begin: } // object_begin:
object_field: { object_field: {
if (unlikely( advance_char() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } if (simdjson_unlikely( advance_char() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
const uint8_t *value = advance(); const uint8_t *value = advance();
switch (*value) { switch (*value) {
case '{': if (!empty_object(builder)) { goto object_begin; }; break; case '{': if (!empty_object(builder)) { goto object_begin; }; break;
@ -165,7 +178,7 @@ object_continue: {
case ',': { case ',': {
builder.increment_count(*this); builder.increment_count(*this);
const uint8_t *key = advance(); const uint8_t *key = advance();
if (unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
SIMDJSON_TRY( builder.parse_key(*this, key) ); SIMDJSON_TRY( builder.parse_key(*this, key) );
goto object_field; goto object_field;
} }

View File

@ -11,12 +11,12 @@ struct tape_builder {
/** Next write location in the string buf for stage 2 parsing */ /** Next write location in the string buf for stage 2 parsing */
uint8_t *current_string_buf_loc; uint8_t *current_string_buf_loc;
really_inline tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} simdjson_really_inline tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
private: private:
friend struct structural_parser; friend struct structural_parser;
really_inline error_code parse_root_primitive(structural_parser &parser, const uint8_t *value) { simdjson_really_inline error_code parse_root_primitive(structural_parser &parser, const uint8_t *value) {
switch (*value) { switch (*value) {
case '"': return parse_string(parser, value); case '"': return parse_string(parser, value);
case 't': return parse_root_true_atom(parser, value); case 't': return parse_root_true_atom(parser, value);
@ -31,7 +31,7 @@ private:
return TAPE_ERROR; return TAPE_ERROR;
} }
} }
really_inline error_code parse_primitive(structural_parser &parser, const uint8_t *value) { simdjson_really_inline error_code parse_primitive(structural_parser &parser, const uint8_t *value) {
switch (*value) { switch (*value) {
case '"': return parse_string(parser, value); case '"': return parse_string(parser, value);
case 't': return parse_true_atom(parser, value); case 't': return parse_true_atom(parser, value);
@ -46,47 +46,47 @@ private:
return TAPE_ERROR; return TAPE_ERROR;
} }
} }
really_inline void empty_object(structural_parser &parser) { simdjson_really_inline void empty_object(structural_parser &parser) {
parser.log_value("empty object"); parser.log_value("empty object");
empty_container(parser, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); empty_container(parser, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
} }
really_inline void empty_array(structural_parser &parser) { simdjson_really_inline void empty_array(structural_parser &parser) {
parser.log_value("empty array"); parser.log_value("empty array");
empty_container(parser, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); empty_container(parser, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
} }
really_inline void start_document(structural_parser &parser) { simdjson_really_inline void start_document(structural_parser &parser) {
parser.log_start_value("document"); parser.log_start_value("document");
start_container(parser); start_container(parser);
} }
really_inline void start_object(structural_parser &parser) { simdjson_really_inline void start_object(structural_parser &parser) {
parser.log_start_value("object"); parser.log_start_value("object");
start_container(parser); start_container(parser);
} }
really_inline void start_array(structural_parser &parser) { simdjson_really_inline void start_array(structural_parser &parser) {
parser.log_start_value("array"); parser.log_start_value("array");
start_container(parser); start_container(parser);
} }
really_inline void end_object(structural_parser &parser) { simdjson_really_inline void end_object(structural_parser &parser) {
parser.log_end_value("object"); parser.log_end_value("object");
end_container(parser, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); end_container(parser, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
} }
really_inline void end_array(structural_parser &parser) { simdjson_really_inline void end_array(structural_parser &parser) {
parser.log_end_value("array"); parser.log_end_value("array");
end_container(parser, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); end_container(parser, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
} }
really_inline void end_document(structural_parser &parser) { simdjson_really_inline void end_document(structural_parser &parser) {
parser.log_end_value("document"); parser.log_end_value("document");
constexpr uint32_t start_tape_index = 0; constexpr uint32_t start_tape_index = 0;
tape.append(start_tape_index, internal::tape_type::ROOT); tape.append(start_tape_index, internal::tape_type::ROOT);
tape_writer::write(parser.dom_parser.doc->tape[start_tape_index], next_tape_index(parser), internal::tape_type::ROOT); tape_writer::write(parser.dom_parser.doc->tape[start_tape_index], next_tape_index(parser), internal::tape_type::ROOT);
} }
WARN_UNUSED really_inline error_code parse_key(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_key(structural_parser &parser, const uint8_t *value) {
return parse_string(parser, value, true); return parse_string(parser, value, true);
} }
WARN_UNUSED really_inline error_code parse_string(structural_parser &parser, const uint8_t *value, bool key = false) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_string(structural_parser &parser, const uint8_t *value, bool key = false) {
parser.log_value(key ? "key" : "string"); parser.log_value(key ? "key" : "string");
uint8_t *dst = on_start_string(parser); uint8_t *dst = on_start_string(parser);
dst = stringparsing::parse_string(value, dst); dst = stringparsing::parse_string(value, dst);
@ -98,13 +98,13 @@ private:
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED really_inline error_code parse_number(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_number(structural_parser &parser, const uint8_t *value) {
parser.log_value("number"); parser.log_value("number");
if (!numberparsing::parse_number(value, tape)) { parser.log_error("Invalid number"); return NUMBER_ERROR; } if (!numberparsing::parse_number(value, tape)) { parser.log_error("Invalid number"); return NUMBER_ERROR; }
return SUCCESS; return SUCCESS;
} }
really_inline error_code parse_root_number(structural_parser &parser, const uint8_t *value) { simdjson_really_inline error_code parse_root_number(structural_parser &parser, const uint8_t *value) {
// //
// We need to make a copy to make sure that the string is space terminated. // We need to make a copy to make sure that the string is space terminated.
// This is not about padding the input, which should already padded up // This is not about padding the input, which should already padded up
@ -129,42 +129,42 @@ private:
return error; return error;
} }
WARN_UNUSED really_inline error_code parse_true_atom(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_true_atom(structural_parser &parser, const uint8_t *value) {
parser.log_value("true"); parser.log_value("true");
if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
tape.append(0, internal::tape_type::TRUE_VALUE); tape.append(0, internal::tape_type::TRUE_VALUE);
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED really_inline error_code parse_root_true_atom(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_true_atom(structural_parser &parser, const uint8_t *value) {
parser.log_value("true"); parser.log_value("true");
if (!atomparsing::is_valid_true_atom(value, parser.remaining_len())) { return T_ATOM_ERROR; } if (!atomparsing::is_valid_true_atom(value, parser.remaining_len())) { return T_ATOM_ERROR; }
tape.append(0, internal::tape_type::TRUE_VALUE); tape.append(0, internal::tape_type::TRUE_VALUE);
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED really_inline error_code parse_false_atom(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_false_atom(structural_parser &parser, const uint8_t *value) {
parser.log_value("false"); parser.log_value("false");
if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
tape.append(0, internal::tape_type::FALSE_VALUE); tape.append(0, internal::tape_type::FALSE_VALUE);
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED really_inline error_code parse_root_false_atom(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_false_atom(structural_parser &parser, const uint8_t *value) {
parser.log_value("false"); parser.log_value("false");
if (!atomparsing::is_valid_false_atom(value, parser.remaining_len())) { return F_ATOM_ERROR; } if (!atomparsing::is_valid_false_atom(value, parser.remaining_len())) { return F_ATOM_ERROR; }
tape.append(0, internal::tape_type::FALSE_VALUE); tape.append(0, internal::tape_type::FALSE_VALUE);
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED really_inline error_code parse_null_atom(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_null_atom(structural_parser &parser, const uint8_t *value) {
parser.log_value("null"); parser.log_value("null");
if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
tape.append(0, internal::tape_type::NULL_VALUE); tape.append(0, internal::tape_type::NULL_VALUE);
return SUCCESS; return SUCCESS;
} }
WARN_UNUSED really_inline error_code parse_root_null_atom(structural_parser &parser, const uint8_t *value) { SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_null_atom(structural_parser &parser, const uint8_t *value) {
parser.log_value("null"); parser.log_value("null");
if (!atomparsing::is_valid_null_atom(value, parser.remaining_len())) { return N_ATOM_ERROR; } if (!atomparsing::is_valid_null_atom(value, parser.remaining_len())) { return N_ATOM_ERROR; }
tape.append(0, internal::tape_type::NULL_VALUE); tape.append(0, internal::tape_type::NULL_VALUE);
@ -172,29 +172,29 @@ private:
} }
// increment_count increments the count of keys in an object or values in an array. // increment_count increments the count of keys in an object or values in an array.
really_inline void increment_count(structural_parser &parser) { simdjson_really_inline void increment_count(structural_parser &parser) {
parser.dom_parser.containing_scope[parser.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 parser.dom_parser.containing_scope[parser.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
} }
// private: // private:
really_inline uint32_t next_tape_index(structural_parser &parser) { simdjson_really_inline uint32_t next_tape_index(structural_parser &parser) {
return uint32_t(tape.next_tape_loc - parser.dom_parser.doc->tape.get()); return uint32_t(tape.next_tape_loc - parser.dom_parser.doc->tape.get());
} }
really_inline void empty_container(structural_parser &parser, internal::tape_type start, internal::tape_type end) { simdjson_really_inline void empty_container(structural_parser &parser, internal::tape_type start, internal::tape_type end) {
auto start_index = next_tape_index(parser); auto start_index = next_tape_index(parser);
tape.append(start_index+2, start); tape.append(start_index+2, start);
tape.append(start_index, end); tape.append(start_index, end);
} }
really_inline void start_container(structural_parser &parser) { simdjson_really_inline void start_container(structural_parser &parser) {
parser.dom_parser.containing_scope[parser.depth].tape_index = next_tape_index(parser); parser.dom_parser.containing_scope[parser.depth].tape_index = next_tape_index(parser);
parser.dom_parser.containing_scope[parser.depth].count = 0; parser.dom_parser.containing_scope[parser.depth].count = 0;
tape.skip(); // We don't actually *write* the start element until the end. tape.skip(); // We don't actually *write* the start element until the end.
} }
really_inline void end_container(structural_parser &parser, internal::tape_type start, internal::tape_type end) noexcept { simdjson_really_inline void end_container(structural_parser &parser, internal::tape_type start, internal::tape_type end) noexcept {
// Write the ending tape element, pointing at the start location // Write the ending tape element, pointing at the start location
const uint32_t start_tape_index = parser.dom_parser.containing_scope[parser.depth].tape_index; const uint32_t start_tape_index = parser.dom_parser.containing_scope[parser.depth].tape_index;
tape.append(start_tape_index, end); tape.append(start_tape_index, end);
@ -206,13 +206,13 @@ private:
tape_writer::write(parser.dom_parser.doc->tape[start_tape_index], next_tape_index(parser) | (uint64_t(cntsat) << 32), start); tape_writer::write(parser.dom_parser.doc->tape[start_tape_index], next_tape_index(parser) | (uint64_t(cntsat) << 32), start);
} }
really_inline uint8_t *on_start_string(structural_parser &parser) noexcept { simdjson_really_inline uint8_t *on_start_string(structural_parser &parser) noexcept {
// we advance the point, accounting for the fact that we have a NULL termination // we advance the point, accounting for the fact that we have a NULL termination
tape.append(current_string_buf_loc - parser.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); tape.append(current_string_buf_loc - parser.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
return current_string_buf_loc + sizeof(uint32_t); return current_string_buf_loc + sizeof(uint32_t);
} }
really_inline void on_end_string(uint8_t *dst) noexcept { simdjson_really_inline void on_end_string(uint8_t *dst) noexcept {
uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
// TODO check for overflow in case someone has a crazy string (>=4GB?) // TODO check for overflow in case someone has a crazy string (>=4GB?)
// But only add the overflow check when the document itself exceeds 4GB // But only add the overflow check when the document itself exceeds 4GB

View File

@ -7,18 +7,18 @@ struct tape_writer {
uint64_t *next_tape_loc; uint64_t *next_tape_loc;
/** Write a signed 64-bit value to tape. */ /** Write a signed 64-bit value to tape. */
really_inline void append_s64(int64_t value) noexcept; simdjson_really_inline void append_s64(int64_t value) noexcept;
/** Write an unsigned 64-bit value to tape. */ /** Write an unsigned 64-bit value to tape. */
really_inline void append_u64(uint64_t value) noexcept; simdjson_really_inline void append_u64(uint64_t value) noexcept;
/** Write a double value to tape. */ /** Write a double value to tape. */
really_inline void append_double(double value) noexcept; simdjson_really_inline void append_double(double value) noexcept;
/** /**
* Append a tape entry (an 8-bit type,and 56 bits worth of value). * Append a tape entry (an 8-bit type,and 56 bits worth of value).
*/ */
really_inline void append(uint64_t val, internal::tape_type t) noexcept; simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept;
/** /**
* Skip the current tape entry without writing. * Skip the current tape entry without writing.
@ -26,24 +26,24 @@ struct tape_writer {
* Used to skip the start of the container, since we'll come back later to fill it in when the * Used to skip the start of the container, since we'll come back later to fill it in when the
* container ends. * container ends.
*/ */
really_inline void skip() noexcept; simdjson_really_inline void skip() noexcept;
/** /**
* Skip the number of tape entries necessary to write a large u64 or i64. * Skip the number of tape entries necessary to write a large u64 or i64.
*/ */
really_inline void skip_large_integer() noexcept; simdjson_really_inline void skip_large_integer() noexcept;
/** /**
* Skip the number of tape entries necessary to write a double. * Skip the number of tape entries necessary to write a double.
*/ */
really_inline void skip_double() noexcept; simdjson_really_inline void skip_double() noexcept;
/** /**
* Write a value to a known location on tape. * Write a value to a known location on tape.
* *
* Used to go back and write out the start of a container after the container ends. * Used to go back and write out the start of a container after the container ends.
*/ */
really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
private: private:
/** /**
@ -51,50 +51,50 @@ private:
* all 64 bits, such as double and uint64_t. * all 64 bits, such as double and uint64_t.
*/ */
template<typename T> template<typename T>
really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
}; // struct number_writer }; // struct number_writer
really_inline void tape_writer::append_s64(int64_t value) noexcept { simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept {
append2(0, value, internal::tape_type::INT64); append2(0, value, internal::tape_type::INT64);
} }
really_inline void tape_writer::append_u64(uint64_t value) noexcept { simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept {
append(0, internal::tape_type::UINT64); append(0, internal::tape_type::UINT64);
*next_tape_loc = value; *next_tape_loc = value;
next_tape_loc++; next_tape_loc++;
} }
/** Write a double value to tape. */ /** Write a double value to tape. */
really_inline void tape_writer::append_double(double value) noexcept { simdjson_really_inline void tape_writer::append_double(double value) noexcept {
append2(0, value, internal::tape_type::DOUBLE); append2(0, value, internal::tape_type::DOUBLE);
} }
really_inline void tape_writer::skip() noexcept { simdjson_really_inline void tape_writer::skip() noexcept {
next_tape_loc++; next_tape_loc++;
} }
really_inline void tape_writer::skip_large_integer() noexcept { simdjson_really_inline void tape_writer::skip_large_integer() noexcept {
next_tape_loc += 2; next_tape_loc += 2;
} }
really_inline void tape_writer::skip_double() noexcept { simdjson_really_inline void tape_writer::skip_double() noexcept {
next_tape_loc += 2; next_tape_loc += 2;
} }
really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
*next_tape_loc = val | ((uint64_t(char(t))) << 56); *next_tape_loc = val | ((uint64_t(char(t))) << 56);
next_tape_loc++; next_tape_loc++;
} }
template<typename T> template<typename T>
really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
append(val, t); append(val, t);
static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
memcpy(next_tape_loc, &val2, sizeof(val2)); memcpy(next_tape_loc, &val2, sizeof(val2));
next_tape_loc++; next_tape_loc++;
} }
really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
tape_loc = val | ((uint64_t(char(t))) << 56); tape_loc = val | ((uint64_t(char(t))) << 56);
} }

View File

@ -8,7 +8,7 @@ namespace SIMDJSON_IMPLEMENTATION {
// but the algorithms do not end up using the returned value. // but the algorithms do not end up using the returned value.
// Sadly, sanitizers are not smart enough to figure it out. // Sadly, sanitizers are not smart enough to figure it out.
NO_SANITIZE_UNDEFINED NO_SANITIZE_UNDEFINED
really_inline int trailing_zeroes(uint64_t input_num) { simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
return (int)_tzcnt_u64(input_num); return (int)_tzcnt_u64(input_num);
#else // SIMDJSON_REGULAR_VISUAL_STUDIO #else // SIMDJSON_REGULAR_VISUAL_STUDIO
@ -22,27 +22,27 @@ really_inline int trailing_zeroes(uint64_t input_num) {
} }
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline uint64_t clear_lowest_bit(uint64_t input_num) { simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
return _blsr_u64(input_num); return _blsr_u64(input_num);
} }
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline int leading_zeroes(uint64_t input_num) { simdjson_really_inline int leading_zeroes(uint64_t input_num) {
return int(_lzcnt_u64(input_num)); return int(_lzcnt_u64(input_num));
} }
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
really_inline unsigned __int64 count_ones(uint64_t input_num) { simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) {
// note: we do not support legacy 32-bit Windows // note: we do not support legacy 32-bit Windows
return __popcnt64(input_num);// Visual Studio wants two underscores return __popcnt64(input_num);// Visual Studio wants two underscores
} }
#else #else
really_inline long long int count_ones(uint64_t input_num) { simdjson_really_inline long long int count_ones(uint64_t input_num) {
return _popcnt64(input_num); return _popcnt64(input_num);
} }
#endif #endif
really_inline bool add_overflow(uint64_t value1, uint64_t value2, simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2,
uint64_t *result) { uint64_t *result) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
return _addcarry_u64(0, value1, value2, return _addcarry_u64(0, value1, value2,

View File

@ -9,7 +9,7 @@ namespace SIMDJSON_IMPLEMENTATION {
// //
// For example, prefix_xor(00100100) == 00011100 // For example, prefix_xor(00100100) == 00011100
// //
really_inline uint64_t prefix_xor(const uint64_t bitmask) { simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) {
// There should be no such thing with a processor supporting avx2 // There should be no such thing with a processor supporting avx2
// but not clmul. // but not clmul.
__m128i all_ones = _mm_set1_epi8('\xFF'); __m128i all_ones = _mm_set1_epi8('\xFF');

View File

@ -12,13 +12,13 @@ namespace SIMDJSON_IMPLEMENTATION {
using namespace simd; using namespace simd;
struct json_character_block { struct json_character_block {
static really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in); static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
// ASCII white-space ('\r','\n','\t',' ') // ASCII white-space ('\r','\n','\t',' ')
really_inline uint64_t whitespace() const { return _whitespace; } simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
// non-quote structural characters (comma, colon, braces, brackets) // non-quote structural characters (comma, colon, braces, brackets)
really_inline uint64_t op() const { return _op; } simdjson_really_inline uint64_t op() const { return _op; }
// neither a structural character nor a white-space, so letters, numbers and quotes // neither a structural character nor a white-space, so letters, numbers and quotes
really_inline uint64_t scalar() { return ~(op() | whitespace()); } simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
@ -26,7 +26,7 @@ struct json_character_block {
// This identifies structural characters (comma, colon, braces, brackets), // This identifies structural characters (comma, colon, braces, brackets),
// and ASCII white-space ('\r','\n','\t',' '). // and ASCII white-space ('\r','\n','\t',' ').
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) { simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
// These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
// we can't use the generic lookup_16. // we can't use the generic lookup_16.
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
@ -49,11 +49,11 @@ really_inline json_character_block json_character_block::classify(const simd::si
return { whitespace, op }; return { whitespace, op };
} }
really_inline bool is_ascii(const simd8x64<uint8_t>& input) { simdjson_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
return input.reduce_or().is_ascii(); return input.reduce_or().is_ascii();
} }
UNUSED really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { SIMDJSON_UNUSED simdjson_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
@ -61,7 +61,7 @@ UNUSED really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
} }
really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
@ -90,40 +90,40 @@ namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
namespace stage1 { namespace stage1 {
really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
return find_escaped_branchless(backslash); return find_escaped_branchless(backslash);
} }
} // namespace stage1 } // namespace stage1
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { SIMDJSON_WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
} }
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf; this->buf = _buf;
this->len = _len; this->len = _len;
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
} }
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { SIMDJSON_WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return haswell::stage1::generic_validate_utf8(buf,len); return haswell::stage1::generic_validate_utf8(buf,len);
} }
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(_doc); stage2::tape_builder builder(_doc);
return stage2::structural_parser::parse<false>(*this, builder); return stage2::structural_parser::parse<false>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(_doc); stage2::tape_builder builder(_doc);
return stage2::structural_parser::parse<true>(*this, builder); return stage2::structural_parser::parse<true>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
auto error = stage1(_buf, _len, false); auto error = stage1(_buf, _len, false);
if (error) { return error; } if (error) { return error; }
return stage2(_doc); return stage2(_doc);

View File

@ -4,7 +4,7 @@
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
WARN_UNUSED error_code implementation::create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_depth, size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst

View File

@ -12,18 +12,18 @@ using namespace simdjson;
class implementation final : public simdjson::implementation { class implementation final : public simdjson::implementation {
public: public:
really_inline implementation() : simdjson::implementation( simdjson_really_inline implementation() : simdjson::implementation(
"haswell", "haswell",
"Intel/AMD AVX2", "Intel/AMD AVX2",
instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2 instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2
) {} ) {}
WARN_UNUSED error_code create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_length, size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final; ) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; SIMDJSON_WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final; SIMDJSON_WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
}; };
} // namespace haswell } // namespace haswell

View File

@ -42,7 +42,7 @@
#ifndef _blsr_u64 #ifndef _blsr_u64
// we roll our own // we roll our own
SIMDJSON_TARGET_HASWELL SIMDJSON_TARGET_HASWELL
static really_inline uint64_t _blsr_u64(uint64_t n) { static simdjson_really_inline uint64_t _blsr_u64(uint64_t n) {
return (n - 1) & n; return (n - 1) & n;
} }
SIMDJSON_UNTARGET_REGION SIMDJSON_UNTARGET_REGION

View File

@ -4,7 +4,7 @@
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
static really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
// this actually computes *16* values so we are being wasteful. // this actually computes *16* values so we are being wasteful.
const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i ascii0 = _mm_set1_epi8('0');
const __m128i mul_1_10 = const __m128i mul_1_10 =

View File

@ -13,23 +13,23 @@ namespace simd {
__m256i value; __m256i value;
// Zero constructor // Zero constructor
really_inline base() : value{__m256i()} {} simdjson_really_inline base() : value{__m256i()} {}
// Conversion from SIMD register // Conversion from SIMD register
really_inline base(const __m256i _value) : value(_value) {} simdjson_really_inline base(const __m256i _value) : value(_value) {}
// Conversion to SIMD register // Conversion to SIMD register
really_inline operator const __m256i&() const { return this->value; } simdjson_really_inline operator const __m256i&() const { return this->value; }
really_inline operator __m256i&() { return this->value; } simdjson_really_inline operator __m256i&() { return this->value; }
// Bit operations // Bit operations
really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } simdjson_really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); }
really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } simdjson_really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); }
really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } simdjson_really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); }
really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } simdjson_really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); }
really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; } simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; }
really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; } simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; }
really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; } simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; }
}; };
// Forward-declared so they can be used by splat and friends. // Forward-declared so they can be used by splat and friends.
@ -41,15 +41,15 @@ namespace simd {
typedef uint32_t bitmask_t; typedef uint32_t bitmask_t;
typedef uint64_t bitmask2_t; typedef uint64_t bitmask2_t;
really_inline base8() : base<simd8<T>>() {} simdjson_really_inline base8() : base<simd8<T>>() {}
really_inline base8(const __m256i _value) : base<simd8<T>>(_value) {} simdjson_really_inline base8(const __m256i _value) : base<simd8<T>>(_value) {}
really_inline Mask operator==(const simd8<T> other) const { return _mm256_cmpeq_epi8(*this, other); } simdjson_really_inline Mask operator==(const simd8<T> other) const { return _mm256_cmpeq_epi8(*this, other); }
static const int SIZE = sizeof(base<T>::value); static const int SIZE = sizeof(base<T>::value);
template<int N=1> template<int N=1>
really_inline simd8<T> prev(const simd8<T> prev_chunk) const { simdjson_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N);
} }
}; };
@ -57,27 +57,27 @@ namespace simd {
// SIMD byte mask type (returned by things like eq and gt) // SIMD byte mask type (returned by things like eq and gt)
template<> template<>
struct simd8<bool>: base8<bool> { struct simd8<bool>: base8<bool> {
static really_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } static simdjson_really_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); }
really_inline simd8<bool>() : base8() {} simdjson_really_inline simd8<bool>() : base8() {}
really_inline simd8<bool>(const __m256i _value) : base8<bool>(_value) {} simdjson_really_inline simd8<bool>(const __m256i _value) : base8<bool>(_value) {}
// Splat constructor // Splat constructor
really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {} simdjson_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
really_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } simdjson_really_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); }
really_inline bool any() const { return !_mm256_testz_si256(*this, *this); } simdjson_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
really_inline simd8<bool> operator~() const { return *this ^ true; } simdjson_really_inline simd8<bool> operator~() const { return *this ^ true; }
}; };
template<typename T> template<typename T>
struct base8_numeric: base8<T> { struct base8_numeric: base8<T> {
static really_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); } static simdjson_really_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); }
static really_inline simd8<T> zero() { return _mm256_setzero_si256(); } static simdjson_really_inline simd8<T> zero() { return _mm256_setzero_si256(); }
static really_inline simd8<T> load(const T values[32]) { static simdjson_really_inline simd8<T> load(const T values[32]) {
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values)); return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
} }
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
static really_inline simd8<T> repeat_16( static simdjson_really_inline simd8<T> repeat_16(
T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
) { ) {
@ -89,24 +89,24 @@ namespace simd {
); );
} }
really_inline base8_numeric() : base8<T>() {} simdjson_really_inline base8_numeric() : base8<T>() {}
really_inline base8_numeric(const __m256i _value) : base8<T>(_value) {} simdjson_really_inline base8_numeric(const __m256i _value) : base8<T>(_value) {}
// Store to array // Store to array
really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } simdjson_really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
// Addition/subtraction are the same for signed and unsigned // Addition/subtraction are the same for signed and unsigned
really_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); } simdjson_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); }
really_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); } simdjson_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); }
really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *(simd8<T>*)this; } simdjson_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *(simd8<T>*)this; }
really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *(simd8<T>*)this; } simdjson_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *(simd8<T>*)this; }
// Override to distinguish from bool version // Override to distinguish from bool version
really_inline simd8<T> operator~() const { return *this ^ 0xFFu; } simdjson_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
// Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
template<typename L> template<typename L>
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const { simdjson_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
return _mm256_shuffle_epi8(lookup_table, *this); return _mm256_shuffle_epi8(lookup_table, *this);
} }
@ -118,7 +118,7 @@ namespace simd {
// signature simd8<L> compress(uint32_t mask) would be // signature simd8<L> compress(uint32_t mask) would be
// sensible, but the AVX ISA makes this kind of approach difficult. // sensible, but the AVX ISA makes this kind of approach difficult.
template<typename L> template<typename L>
really_inline void compress(uint32_t mask, L * output) const { simdjson_really_inline void compress(uint32_t mask, L * output) const {
// this particular implementation was inspired by work done by @animetosho // this particular implementation was inspired by work done by @animetosho
// we do it in four steps, first 8 bytes and then second 8 bytes... // we do it in four steps, first 8 bytes and then second 8 bytes...
uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask1 = uint8_t(mask); // least significant 8 bits
@ -161,7 +161,7 @@ namespace simd {
} }
template<typename L> template<typename L>
really_inline simd8<L> lookup_16( simdjson_really_inline simd8<L> lookup_16(
L replace0, L replace1, L replace2, L replace3, L replace0, L replace1, L replace2, L replace3,
L replace4, L replace5, L replace6, L replace7, L replace4, L replace5, L replace6, L replace7,
L replace8, L replace9, L replace10, L replace11, L replace8, L replace9, L replace10, L replace11,
@ -178,14 +178,14 @@ namespace simd {
// Signed bytes // Signed bytes
template<> template<>
struct simd8<int8_t> : base8_numeric<int8_t> { struct simd8<int8_t> : base8_numeric<int8_t> {
really_inline simd8() : base8_numeric<int8_t>() {} simdjson_really_inline simd8() : base8_numeric<int8_t>() {}
really_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {} simdjson_really_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {}
// Splat constructor // Splat constructor
really_inline simd8(int8_t _value) : simd8(splat(_value)) {} simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
// Array constructor // Array constructor
really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} simdjson_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
// Member-by-member initialization // Member-by-member initialization
really_inline simd8( simdjson_really_inline simd8(
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15,
int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23,
@ -197,7 +197,7 @@ namespace simd {
v24,v25,v26,v27,v28,v29,v30,v31 v24,v25,v26,v27,v28,v29,v30,v31
)) {} )) {}
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
really_inline static simd8<int8_t> repeat_16( simdjson_really_inline static simd8<int8_t> repeat_16(
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
) { ) {
@ -210,23 +210,23 @@ namespace simd {
} }
// Order-sensitive comparisons // Order-sensitive comparisons
really_inline simd8<int8_t> max(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); } simdjson_really_inline simd8<int8_t> max(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); }
really_inline simd8<int8_t> min(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); } simdjson_really_inline simd8<int8_t> min(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); }
really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); } simdjson_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); }
really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); } simdjson_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); }
}; };
// Unsigned bytes // Unsigned bytes
template<> template<>
struct simd8<uint8_t>: base8_numeric<uint8_t> { struct simd8<uint8_t>: base8_numeric<uint8_t> {
really_inline simd8() : base8_numeric<uint8_t>() {} simdjson_really_inline simd8() : base8_numeric<uint8_t>() {}
really_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {} simdjson_really_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {}
// Splat constructor // Splat constructor
really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
// Array constructor // Array constructor
really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} simdjson_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
// Member-by-member initialization // Member-by-member initialization
really_inline simd8( simdjson_really_inline simd8(
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23,
@ -238,7 +238,7 @@ namespace simd {
v24,v25,v26,v27,v28,v29,v30,v31 v24,v25,v26,v27,v28,v29,v30,v31
)) {} )) {}
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
really_inline static simd8<uint8_t> repeat_16( simdjson_really_inline static simd8<uint8_t> repeat_16(
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
) { ) {
@ -251,39 +251,39 @@ namespace simd {
} }
// Saturated math // Saturated math
really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); } simdjson_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); }
really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); } simdjson_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); }
// Order-specific operations // Order-specific operations
really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); } simdjson_really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); } simdjson_really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); }
// Same as >, but only guarantees true is nonzero (< guarantees true = -1) // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); } simdjson_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
// Same as <, but only guarantees true is nonzero (< guarantees true = -1) // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); } simdjson_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max(*this) == other; } simdjson_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max(*this) == other; }
really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min(*this) == other; } simdjson_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min(*this) == other; }
really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } simdjson_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); } simdjson_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
// Bit-specific operations // Bit-specific operations
really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); } simdjson_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); } simdjson_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); } simdjson_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); } simdjson_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } simdjson_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; }
really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } simdjson_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); }
really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); } simdjson_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); }
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); } simdjson_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
template<int N> template<int N>
really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } simdjson_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
template<int N> template<int N>
really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } simdjson_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
// Get one of the bits and make a bitmask out of it. // Get one of the bits and make a bitmask out of it.
// e.g. value.get_bit<7>() gets the high bit // e.g. value.get_bit<7>() gets the high bit
template<int N> template<int N>
really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } simdjson_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); }
}; };
template<typename T> template<typename T>
@ -296,32 +296,32 @@ namespace simd {
simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
simd8x64() = delete; // no default constructor allowed simd8x64() = delete; // no default constructor allowed
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {} simdjson_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {} simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {}
really_inline void compress(uint64_t mask, T * output) const { simdjson_really_inline void compress(uint64_t mask, T * output) const {
uint32_t mask1 = uint32_t(mask); uint32_t mask1 = uint32_t(mask);
uint32_t mask2 = uint32_t(mask >> 32); uint32_t mask2 = uint32_t(mask >> 32);
this->chunks[0].compress(mask1, output); this->chunks[0].compress(mask1, output);
this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); this->chunks[1].compress(mask2, output + 32 - count_ones(mask1));
} }
really_inline void store(T ptr[64]) const { simdjson_really_inline void store(T ptr[64]) const {
this->chunks[0].store(ptr+sizeof(simd8<T>)*0); this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
this->chunks[1].store(ptr+sizeof(simd8<T>)*1); this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
} }
really_inline uint64_t to_bitmask() const { simdjson_really_inline uint64_t to_bitmask() const {
uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
uint64_t r_hi = this->chunks[1].to_bitmask(); uint64_t r_hi = this->chunks[1].to_bitmask();
return r_lo | (r_hi << 32); return r_lo | (r_hi << 32);
} }
really_inline simd8<T> reduce_or() const { simdjson_really_inline simd8<T> reduce_or() const {
return this->chunks[0] | this->chunks[1]; return this->chunks[0] | this->chunks[1];
} }
really_inline simd8x64<T> bit_or(const T m) const { simdjson_really_inline simd8x64<T> bit_or(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<T>( return simd8x64<T>(
this->chunks[0] | mask, this->chunks[0] | mask,
@ -329,7 +329,7 @@ namespace simd {
); );
} }
really_inline uint64_t eq(const T m) const { simdjson_really_inline uint64_t eq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>( return simd8x64<bool>(
this->chunks[0] == mask, this->chunks[0] == mask,
@ -337,7 +337,7 @@ namespace simd {
).to_bitmask(); ).to_bitmask();
} }
really_inline uint64_t lteq(const T m) const { simdjson_really_inline uint64_t lteq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>( return simd8x64<bool>(
this->chunks[0] <= mask, this->chunks[0] <= mask,

View File

@ -14,18 +14,18 @@ using namespace simd;
struct backslash_and_quote { struct backslash_and_quote {
public: public:
static constexpr uint32_t BYTES_PROCESSED = 32; static constexpr uint32_t BYTES_PROCESSED = 32;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; }
really_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); }
really_inline int backslash_index() { return trailing_zeroes(bs_bits); } simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); }
uint32_t bs_bits; uint32_t bs_bits;
uint32_t quote_bits; uint32_t quote_bits;
}; // struct backslash_and_quote }; // struct backslash_and_quote
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// this can read up to 15 bytes beyond the buffer size, but we require // this can read up to 15 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding // SIMDJSON_PADDING of padding
static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");

View File

@ -38,20 +38,20 @@ public:
const std::string &name() const noexcept final { return set_best()->name(); } const std::string &name() const noexcept final { return set_best()->name(); }
const std::string &description() const noexcept final { return set_best()->description(); } const std::string &description() const noexcept final { return set_best()->description(); }
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
WARN_UNUSED error_code create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_length, size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final { ) const noexcept final {
return set_best()->create_dom_parser_implementation(capacity, max_length, dst); return set_best()->create_dom_parser_implementation(capacity, max_length, dst);
} }
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { SIMDJSON_WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
return set_best()->minify(buf, len, dst, dst_len); return set_best()->minify(buf, len, dst, dst_len);
} }
WARN_UNUSED bool validate_utf8(const char * buf, size_t len) const noexcept final override { SIMDJSON_WARN_UNUSED bool validate_utf8(const char * buf, size_t len) const noexcept final override {
return set_best()->validate_utf8(buf, len); return set_best()->validate_utf8(buf, len);
} }
really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} simdjson_really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
private: private:
const implementation *set_best() const noexcept; const implementation *set_best() const noexcept;
}; };
@ -76,17 +76,17 @@ const std::initializer_list<const implementation *> available_implementation_poi
// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support // So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
class unsupported_implementation final : public implementation { class unsupported_implementation final : public implementation {
public: public:
WARN_UNUSED error_code create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code create_dom_parser_implementation(
size_t, size_t,
size_t, size_t,
std::unique_ptr<internal::dom_parser_implementation>& std::unique_ptr<internal::dom_parser_implementation>&
) const noexcept final { ) const noexcept final {
return UNSUPPORTED_ARCHITECTURE; return UNSUPPORTED_ARCHITECTURE;
} }
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override { SIMDJSON_WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override {
return UNSUPPORTED_ARCHITECTURE; return UNSUPPORTED_ARCHITECTURE;
} }
WARN_UNUSED bool validate_utf8(const char *, size_t) const noexcept final override { SIMDJSON_WARN_UNUSED bool validate_utf8(const char *, size_t) const noexcept final override {
return false; // Just refuse to validate. Given that we have a fallback implementation return false; // Just refuse to validate. Given that we have a fallback implementation
// it seems unlikely that unsupported_implementation will ever be used. If it is used, // it seems unlikely that unsupported_implementation will ever be used. If it is used,
// then it will flag all strings as invalid. The alternative is to return an error_code // then it will flag all strings as invalid. The alternative is to return an error_code
@ -143,10 +143,10 @@ const implementation *detect_best_supported_implementation_on_first_use::set_bes
SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list available_implementations{}; SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list available_implementations{};
SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> active_implementation{&internal::detect_best_supported_implementation_on_first_use_singleton}; SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> active_implementation{&internal::detect_best_supported_implementation_on_first_use_singleton};
WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept { SIMDJSON_WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept {
return active_implementation->minify((const uint8_t *)buf, len, (uint8_t *)dst, dst_len); return active_implementation->minify((const uint8_t *)buf, len, (uint8_t *)dst, dst_len);
} }
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) noexcept { SIMDJSON_WARN_UNUSED bool validate_utf8(const char *buf, size_t len) noexcept {
return active_implementation->validate_utf8(buf, len); return active_implementation->validate_utf8(buf, len);
} }

View File

@ -8,7 +8,7 @@ namespace SIMDJSON_IMPLEMENTATION {
// but the algorithms do not end up using the returned value. // but the algorithms do not end up using the returned value.
// Sadly, sanitizers are not smart enough to figure it out. // Sadly, sanitizers are not smart enough to figure it out.
NO_SANITIZE_UNDEFINED NO_SANITIZE_UNDEFINED
really_inline int trailing_zeroes(uint64_t input_num) { simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long ret; unsigned long ret;
// Search the mask data from least significant bit (LSB) // Search the mask data from least significant bit (LSB)
@ -21,12 +21,12 @@ really_inline int trailing_zeroes(uint64_t input_num) {
} }
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline uint64_t clear_lowest_bit(uint64_t input_num) { simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
return input_num & (input_num-1); return input_num & (input_num-1);
} }
/* result might be undefined when input_num is zero */ /* result might be undefined when input_num is zero */
really_inline int leading_zeroes(uint64_t input_num) { simdjson_really_inline int leading_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long leading_zero = 0; unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB) // Search the mask data from most significant bit (MSB)
@ -41,17 +41,17 @@ really_inline int leading_zeroes(uint64_t input_num) {
} }
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
really_inline unsigned __int64 count_ones(uint64_t input_num) { simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) {
// note: we do not support legacy 32-bit Windows // note: we do not support legacy 32-bit Windows
return __popcnt64(input_num);// Visual Studio wants two underscores return __popcnt64(input_num);// Visual Studio wants two underscores
} }
#else #else
really_inline long long int count_ones(uint64_t input_num) { simdjson_really_inline long long int count_ones(uint64_t input_num) {
return _popcnt64(input_num); return _popcnt64(input_num);
} }
#endif #endif
really_inline bool add_overflow(uint64_t value1, uint64_t value2, simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2,
uint64_t *result) { uint64_t *result) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
return _addcarry_u64(0, value1, value2, return _addcarry_u64(0, value1, value2,

View File

@ -9,7 +9,7 @@ namespace SIMDJSON_IMPLEMENTATION {
// //
// For example, prefix_xor(00100100) == 00011100 // For example, prefix_xor(00100100) == 00011100
// //
really_inline uint64_t prefix_xor(const uint64_t bitmask) { simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) {
// There should be no such thing with a processing supporting avx2 // There should be no such thing with a processing supporting avx2
// but not clmul. // but not clmul.
__m128i all_ones = _mm_set1_epi8('\xFF'); __m128i all_ones = _mm_set1_epi8('\xFF');

View File

@ -12,17 +12,17 @@ namespace SIMDJSON_IMPLEMENTATION {
using namespace simd; using namespace simd;
struct json_character_block { struct json_character_block {
static really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in); static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
really_inline uint64_t whitespace() const { return _whitespace; } simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
really_inline uint64_t op() const { return _op; } simdjson_really_inline uint64_t op() const { return _op; }
really_inline uint64_t scalar() { return ~(op() | whitespace()); } simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
uint64_t _whitespace; uint64_t _whitespace;
uint64_t _op; uint64_t _op;
}; };
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) { simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
// These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
// we can't use the generic lookup_16. // we can't use the generic lookup_16.
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
@ -50,11 +50,11 @@ really_inline json_character_block json_character_block::classify(const simd::si
return { whitespace, op }; return { whitespace, op };
} }
really_inline bool is_ascii(const simd8x64<uint8_t>& input) { simdjson_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
return input.reduce_or().is_ascii(); return input.reduce_or().is_ascii();
} }
UNUSED really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { SIMDJSON_UNUSED simdjson_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
@ -62,7 +62,7 @@ UNUSED really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
} }
really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
@ -92,40 +92,40 @@ namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
namespace stage1 { namespace stage1 {
really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
return find_escaped_branchless(backslash); return find_escaped_branchless(backslash);
} }
} // namespace stage1 } // namespace stage1
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { SIMDJSON_WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
} }
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf; this->buf = _buf;
this->len = _len; this->len = _len;
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
} }
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { SIMDJSON_WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return westmere::stage1::generic_validate_utf8(buf,len); return westmere::stage1::generic_validate_utf8(buf,len);
} }
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(*doc); stage2::tape_builder builder(*doc);
return stage2::structural_parser::parse<false>(*this, builder); return stage2::structural_parser::parse<false>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
doc = &_doc; doc = &_doc;
stage2::tape_builder builder(_doc); stage2::tape_builder builder(_doc);
return stage2::structural_parser::parse<true>(*this, builder); return stage2::structural_parser::parse<true>(*this, builder);
} }
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
auto error = stage1(_buf, _len, false); auto error = stage1(_buf, _len, false);
if (error) { return error; } if (error) { return error; }
return stage2(_doc); return stage2(_doc);

View File

@ -4,7 +4,7 @@
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
WARN_UNUSED error_code implementation::create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_depth, size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst

View File

@ -14,14 +14,14 @@ using namespace simdjson::dom;
class implementation final : public simdjson::implementation { class implementation final : public simdjson::implementation {
public: public:
really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", instruction_set::SSE42 | instruction_set::PCLMULQDQ) {} simdjson_really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", instruction_set::SSE42 | instruction_set::PCLMULQDQ) {}
WARN_UNUSED error_code create_dom_parser_implementation( SIMDJSON_WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity, size_t capacity,
size_t max_length, size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final; ) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; SIMDJSON_WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final; SIMDJSON_WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
}; };
} // namespace westmere } // namespace westmere

View File

@ -4,7 +4,7 @@
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
static really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
// this actually computes *16* values so we are being wasteful. // this actually computes *16* values so we are being wasteful.
const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i ascii0 = _mm_set1_epi8('0');
const __m128i mul_1_10 = const __m128i mul_1_10 =

View File

@ -12,23 +12,23 @@ namespace simd {
__m128i value; __m128i value;
// Zero constructor // Zero constructor
really_inline base() : value{__m128i()} {} simdjson_really_inline base() : value{__m128i()} {}
// Conversion from SIMD register // Conversion from SIMD register
really_inline base(const __m128i _value) : value(_value) {} simdjson_really_inline base(const __m128i _value) : value(_value) {}
// Conversion to SIMD register // Conversion to SIMD register
really_inline operator const __m128i&() const { return this->value; } simdjson_really_inline operator const __m128i&() const { return this->value; }
really_inline operator __m128i&() { return this->value; } simdjson_really_inline operator __m128i&() { return this->value; }
// Bit operations // Bit operations
really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } simdjson_really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); }
really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } simdjson_really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); }
really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } simdjson_really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); }
really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } simdjson_really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); }
really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; } simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; }
really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; } simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; }
really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; } simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; }
}; };
// Forward-declared so they can be used by splat and friends. // Forward-declared so they can be used by splat and friends.
@ -40,15 +40,15 @@ namespace simd {
typedef uint16_t bitmask_t; typedef uint16_t bitmask_t;
typedef uint32_t bitmask2_t; typedef uint32_t bitmask2_t;
really_inline base8() : base<simd8<T>>() {} simdjson_really_inline base8() : base<simd8<T>>() {}
really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {} simdjson_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
really_inline Mask operator==(const simd8<T> other) const { return _mm_cmpeq_epi8(*this, other); } simdjson_really_inline Mask operator==(const simd8<T> other) const { return _mm_cmpeq_epi8(*this, other); }
static const int SIZE = sizeof(base<simd8<T>>::value); static const int SIZE = sizeof(base<simd8<T>>::value);
template<int N=1> template<int N=1>
really_inline simd8<T> prev(const simd8<T> prev_chunk) const { simdjson_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
return _mm_alignr_epi8(*this, prev_chunk, 16 - N); return _mm_alignr_epi8(*this, prev_chunk, 16 - N);
} }
}; };
@ -56,27 +56,27 @@ namespace simd {
// SIMD byte mask type (returned by things like eq and gt) // SIMD byte mask type (returned by things like eq and gt)
template<> template<>
struct simd8<bool>: base8<bool> { struct simd8<bool>: base8<bool> {
static really_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } static simdjson_really_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); }
really_inline simd8<bool>() : base8() {} simdjson_really_inline simd8<bool>() : base8() {}
really_inline simd8<bool>(const __m128i _value) : base8<bool>(_value) {} simdjson_really_inline simd8<bool>(const __m128i _value) : base8<bool>(_value) {}
// Splat constructor // Splat constructor
really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {} simdjson_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } simdjson_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
really_inline bool any() const { return !_mm_testz_si128(*this, *this); } simdjson_really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
really_inline simd8<bool> operator~() const { return *this ^ true; } simdjson_really_inline simd8<bool> operator~() const { return *this ^ true; }
}; };
template<typename T> template<typename T>
struct base8_numeric: base8<T> { struct base8_numeric: base8<T> {
static really_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); } static simdjson_really_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); }
static really_inline simd8<T> zero() { return _mm_setzero_si128(); } static simdjson_really_inline simd8<T> zero() { return _mm_setzero_si128(); }
static really_inline simd8<T> load(const T values[16]) { static simdjson_really_inline simd8<T> load(const T values[16]) {
return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values)); return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
} }
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
static really_inline simd8<T> repeat_16( static simdjson_really_inline simd8<T> repeat_16(
T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
) { ) {
@ -86,24 +86,24 @@ namespace simd {
); );
} }
really_inline base8_numeric() : base8<T>() {} simdjson_really_inline base8_numeric() : base8<T>() {}
really_inline base8_numeric(const __m128i _value) : base8<T>(_value) {} simdjson_really_inline base8_numeric(const __m128i _value) : base8<T>(_value) {}
// Store to array // Store to array
really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } simdjson_really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
// Override to distinguish from bool version // Override to distinguish from bool version
really_inline simd8<T> operator~() const { return *this ^ 0xFFu; } simdjson_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
// Addition/subtraction are the same for signed and unsigned // Addition/subtraction are the same for signed and unsigned
really_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); } simdjson_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
really_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); } simdjson_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *(simd8<T>*)this; } simdjson_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *(simd8<T>*)this; }
really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *(simd8<T>*)this; } simdjson_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *(simd8<T>*)this; }
// Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
template<typename L> template<typename L>
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const { simdjson_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
return _mm_shuffle_epi8(lookup_table, *this); return _mm_shuffle_epi8(lookup_table, *this);
} }
@ -115,7 +115,7 @@ namespace simd {
// signature simd8<L> compress(uint32_t mask) would be // signature simd8<L> compress(uint32_t mask) would be
// sensible, but the AVX ISA makes this kind of approach difficult. // sensible, but the AVX ISA makes this kind of approach difficult.
template<typename L> template<typename L>
really_inline void compress(uint16_t mask, L * output) const { simdjson_really_inline void compress(uint16_t mask, L * output) const {
// this particular implementation was inspired by work done by @animetosho // this particular implementation was inspired by work done by @animetosho
// we do it in two steps, first 8 bytes and then second 8 bytes // we do it in two steps, first 8 bytes and then second 8 bytes
uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask1 = uint8_t(mask); // least significant 8 bits
@ -143,7 +143,7 @@ namespace simd {
} }
template<typename L> template<typename L>
really_inline simd8<L> lookup_16( simdjson_really_inline simd8<L> lookup_16(
L replace0, L replace1, L replace2, L replace3, L replace0, L replace1, L replace2, L replace3,
L replace4, L replace5, L replace6, L replace7, L replace4, L replace5, L replace6, L replace7,
L replace8, L replace9, L replace10, L replace11, L replace8, L replace9, L replace10, L replace11,
@ -160,14 +160,14 @@ namespace simd {
// Signed bytes // Signed bytes
template<> template<>
struct simd8<int8_t> : base8_numeric<int8_t> { struct simd8<int8_t> : base8_numeric<int8_t> {
really_inline simd8() : base8_numeric<int8_t>() {} simdjson_really_inline simd8() : base8_numeric<int8_t>() {}
really_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {} simdjson_really_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {}
// Splat constructor // Splat constructor
really_inline simd8(int8_t _value) : simd8(splat(_value)) {} simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
// Array constructor // Array constructor
really_inline simd8(const int8_t* values) : simd8(load(values)) {} simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
// Member-by-member initialization // Member-by-member initialization
really_inline simd8( simdjson_really_inline simd8(
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
) : simd8(_mm_setr_epi8( ) : simd8(_mm_setr_epi8(
@ -175,7 +175,7 @@ namespace simd {
v8, v9, v10,v11,v12,v13,v14,v15 v8, v9, v10,v11,v12,v13,v14,v15
)) {} )) {}
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
really_inline static simd8<int8_t> repeat_16( simdjson_really_inline static simd8<int8_t> repeat_16(
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
) { ) {
@ -186,23 +186,23 @@ namespace simd {
} }
// Order-sensitive comparisons // Order-sensitive comparisons
really_inline simd8<int8_t> max(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); } simdjson_really_inline simd8<int8_t> max(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); }
really_inline simd8<int8_t> min(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); } simdjson_really_inline simd8<int8_t> min(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); }
really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); } simdjson_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); }
really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); } simdjson_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); }
}; };
// Unsigned bytes // Unsigned bytes
template<> template<>
struct simd8<uint8_t>: base8_numeric<uint8_t> { struct simd8<uint8_t>: base8_numeric<uint8_t> {
really_inline simd8() : base8_numeric<uint8_t>() {} simdjson_really_inline simd8() : base8_numeric<uint8_t>() {}
really_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {} simdjson_really_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {}
// Splat constructor // Splat constructor
really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
// Array constructor // Array constructor
really_inline simd8(const uint8_t* values) : simd8(load(values)) {} simdjson_really_inline simd8(const uint8_t* values) : simd8(load(values)) {}
// Member-by-member initialization // Member-by-member initialization
really_inline simd8( simdjson_really_inline simd8(
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
) : simd8(_mm_setr_epi8( ) : simd8(_mm_setr_epi8(
@ -210,7 +210,7 @@ namespace simd {
v8, v9, v10,v11,v12,v13,v14,v15 v8, v9, v10,v11,v12,v13,v14,v15
)) {} )) {}
// Repeat 16 values as many times as necessary (usually for lookup tables) // Repeat 16 values as many times as necessary (usually for lookup tables)
really_inline static simd8<uint8_t> repeat_16( simdjson_really_inline static simd8<uint8_t> repeat_16(
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
) { ) {
@ -221,39 +221,39 @@ namespace simd {
} }
// Saturated math // Saturated math
really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); } simdjson_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); }
really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); } simdjson_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); }
// Order-specific operations // Order-specific operations
really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); } simdjson_really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); } simdjson_really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
// Same as >, but only guarantees true is nonzero (< guarantees true = -1) // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); } simdjson_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
// Same as <, but only guarantees true is nonzero (< guarantees true = -1) // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); } simdjson_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max(*this) == other; } simdjson_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max(*this) == other; }
really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min(*this) == other; } simdjson_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min(*this) == other; }
really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } simdjson_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } simdjson_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
// Bit-specific operations // Bit-specific operations
really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); } simdjson_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); } simdjson_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); } simdjson_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); } simdjson_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } simdjson_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; }
really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } simdjson_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); } simdjson_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); }
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); } simdjson_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
template<int N> template<int N>
really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } simdjson_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
template<int N> template<int N>
really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } simdjson_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
// Get one of the bits and make a bitmask out of it. // Get one of the bits and make a bitmask out of it.
// e.g. value.get_bit<7>() gets the high bit // e.g. value.get_bit<7>() gets the high bit
template<int N> template<int N>
really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } simdjson_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); }
}; };
template<typename T> template<typename T>
@ -266,28 +266,28 @@ namespace simd {
simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
simd8x64() = delete; // no default constructor allowed simd8x64() = delete; // no default constructor allowed
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {} simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
really_inline void store(T ptr[64]) const { simdjson_really_inline void store(T ptr[64]) const {
this->chunks[0].store(ptr+sizeof(simd8<T>)*0); this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
this->chunks[1].store(ptr+sizeof(simd8<T>)*1); this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
this->chunks[2].store(ptr+sizeof(simd8<T>)*2); this->chunks[2].store(ptr+sizeof(simd8<T>)*2);
this->chunks[3].store(ptr+sizeof(simd8<T>)*3); this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
} }
really_inline simd8<T> reduce_or() const { simdjson_really_inline simd8<T> reduce_or() const {
return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
} }
really_inline void compress(uint64_t mask, T * output) const { simdjson_really_inline void compress(uint64_t mask, T * output) const {
this->chunks[0].compress(uint16_t(mask), output); this->chunks[0].compress(uint16_t(mask), output);
this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF));
this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF));
this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
} }
really_inline uint64_t to_bitmask() const { simdjson_really_inline uint64_t to_bitmask() const {
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
uint64_t r1 = this->chunks[1].to_bitmask(); uint64_t r1 = this->chunks[1].to_bitmask();
uint64_t r2 = this->chunks[2].to_bitmask(); uint64_t r2 = this->chunks[2].to_bitmask();
@ -295,7 +295,7 @@ namespace simd {
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
} }
really_inline simd8x64<T> bit_or(const T m) const { simdjson_really_inline simd8x64<T> bit_or(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<T>( return simd8x64<T>(
this->chunks[0] | mask, this->chunks[0] | mask,
@ -305,7 +305,7 @@ namespace simd {
); );
} }
really_inline uint64_t eq(const T m) const { simdjson_really_inline uint64_t eq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>( return simd8x64<bool>(
this->chunks[0] == mask, this->chunks[0] == mask,
@ -315,7 +315,7 @@ namespace simd {
).to_bitmask(); ).to_bitmask();
} }
really_inline uint64_t lteq(const T m) const { simdjson_really_inline uint64_t lteq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m); const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>( return simd8x64<bool>(
this->chunks[0] <= mask, this->chunks[0] <= mask,

View File

@ -10,18 +10,18 @@ using namespace simd;
struct backslash_and_quote { struct backslash_and_quote {
public: public:
static constexpr uint32_t BYTES_PROCESSED = 32; static constexpr uint32_t BYTES_PROCESSED = 32;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
really_inline bool has_backslash() { return bs_bits != 0; } simdjson_really_inline bool has_backslash() { return bs_bits != 0; }
really_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); }
really_inline int backslash_index() { return trailing_zeroes(bs_bits); } simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); }
uint32_t bs_bits; uint32_t bs_bits;
uint32_t quote_bits; uint32_t quote_bits;
}; // struct backslash_and_quote }; // struct backslash_and_quote
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require // this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding // SIMDJSON_PADDING of padding
static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");

View File

@ -59,7 +59,7 @@ add_cpp_test(integer_tests LABELS acceptance per_implementation)
add_cpp_test(jsoncheck LABELS acceptance per_implementation) add_cpp_test(jsoncheck LABELS acceptance per_implementation)
add_cpp_test(minefieldcheck LABELS acceptance per_implementation) add_cpp_test(minefieldcheck LABELS acceptance per_implementation)
add_cpp_test(parse_many_test LABELS acceptance per_implementation) add_cpp_test(parse_many_test LABELS acceptance per_implementation)
add_cpp_test(pointercheck LABELS acceptance per_implementation) add_cpp_test(pointercheck LABELS acceptance per_implementation) # https://tools.ietf.org/html/rfc6901
add_cpp_test(extracting_values_example LABELS acceptance per_implementation) add_cpp_test(extracting_values_example LABELS acceptance per_implementation)
add_cpp_test(unicode_tests LABELS acceptance per_implementation) add_cpp_test(unicode_tests LABELS acceptance per_implementation)

View File

@ -212,7 +212,7 @@ namespace parse_api_tests {
simdjson::dom::document_stream stream; simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(BASIC_NDJSON).get(stream) ); ASSERT_SUCCESS( parser.parse_many(BASIC_NDJSON).get(stream) );
for (auto doc : stream) { for (auto doc : stream) {
UNUSED dom::array array; SIMDJSON_UNUSED dom::array array;
ASSERT_SUCCESS( doc.get(array) ); ASSERT_SUCCESS( doc.get(array) );
count++; count++;
} }
@ -227,7 +227,7 @@ namespace parse_api_tests {
dom::parser parser; dom::parser parser;
int count = 0; int count = 0;
for (auto doc : parser.parse_many(BASIC_NDJSON)) { for (auto doc : parser.parse_many(BASIC_NDJSON)) {
UNUSED dom::array array; SIMDJSON_UNUSED dom::array array;
ASSERT_SUCCESS( doc.get(array) ); ASSERT_SUCCESS( doc.get(array) );
count++; count++;
} }
@ -326,14 +326,14 @@ namespace parse_api_tests {
bool parser_parse_exception() { bool parser_parse_exception() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
dom::parser parser; dom::parser parser;
UNUSED dom::array array = parser.parse(BASIC_JSON); SIMDJSON_UNUSED dom::array array = parser.parse(BASIC_JSON);
return true; return true;
} }
bool parser_parse_many_exception() { bool parser_parse_many_exception() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
dom::parser parser; dom::parser parser;
int count = 0; int count = 0;
for (UNUSED dom::array doc : parser.parse_many(BASIC_NDJSON)) { for (SIMDJSON_UNUSED dom::array doc : parser.parse_many(BASIC_NDJSON)) {
count++; count++;
} }
ASSERT_EQUAL(count, 2); ASSERT_EQUAL(count, 2);
@ -345,7 +345,7 @@ namespace parse_api_tests {
dom::parser parser; dom::parser parser;
size_t count = 0; size_t count = 0;
dom::object object = parser.load(TWITTER_JSON); dom::object object = parser.load(TWITTER_JSON);
for (UNUSED auto field : object) { for (SIMDJSON_UNUSED auto field : object) {
count++; count++;
} }
ASSERT_EQUAL( count, object.size() ); ASSERT_EQUAL( count, object.size() );
@ -355,7 +355,7 @@ namespace parse_api_tests {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
dom::parser parser; dom::parser parser;
int count = 0; int count = 0;
for (UNUSED dom::array doc : parser.load_many(AMAZON_CELLPHONES_NDJSON)) { for (SIMDJSON_UNUSED dom::array doc : parser.load_many(AMAZON_CELLPHONES_NDJSON)) {
count++; count++;
} }
ASSERT_EQUAL( count, AMAZON_CELLPHONES_NDJSON_DOC_COUNT ); ASSERT_EQUAL( count, AMAZON_CELLPHONES_NDJSON_DOC_COUNT );
@ -538,7 +538,7 @@ namespace dom_api_tests {
dom::parser parser; dom::parser parser;
dom::object object; dom::object object;
ASSERT_SUCCESS( parser.parse(json).get(object) ); ASSERT_SUCCESS( parser.parse(json).get(object) );
for (UNUSED auto field : object) { for (SIMDJSON_UNUSED auto field : object) {
TEST_FAIL("Unexpected field"); TEST_FAIL("Unexpected field");
i++; i++;
} }
@ -554,7 +554,7 @@ namespace dom_api_tests {
dom::parser parser; dom::parser parser;
dom::array array; dom::array array;
ASSERT_SUCCESS( parser.parse(json).get(array) ); ASSERT_SUCCESS( parser.parse(json).get(array) );
for (UNUSED auto value : array) { for (SIMDJSON_UNUSED auto value : array) {
TEST_FAIL("Unexpected value"); TEST_FAIL("Unexpected value");
i++; i++;
} }
@ -645,7 +645,7 @@ namespace dom_api_tests {
ASSERT_EQUAL( object["a"].get<uint64_t>().first, 1 ); ASSERT_EQUAL( object["a"].get<uint64_t>().first, 1 );
simdjson::error_code error; simdjson::error_code error;
UNUSED element val; SIMDJSON_UNUSED element val;
#ifndef _LIBCPP_VERSION // should work everywhere but with libc++, must include the <ciso646> header. #ifndef _LIBCPP_VERSION // should work everywhere but with libc++, must include the <ciso646> header.
std::tie(val,error) = object["d"]; std::tie(val,error) = object["d"];
ASSERT_ERROR( error, NO_SUCH_FIELD ); ASSERT_ERROR( error, NO_SUCH_FIELD );
@ -681,7 +681,7 @@ namespace dom_api_tests {
ASSERT_EQUAL( obj["b"].get<uint64_t>().first, 2 ); ASSERT_EQUAL( obj["b"].get<uint64_t>().first, 2 );
ASSERT_EQUAL( obj["a"].get<uint64_t>().first, 1 ); ASSERT_EQUAL( obj["a"].get<uint64_t>().first, 1 );
UNUSED element val; SIMDJSON_UNUSED element val;
ASSERT_ERROR( doc["d"].get(val), NO_SUCH_FIELD); ASSERT_ERROR( doc["d"].get(val), NO_SUCH_FIELD);
return true; return true;
} }

View File

@ -168,7 +168,7 @@ bool cast_tester<T>::test_implicit_cast(simdjson_result<element> element, T expe
template<typename T> template<typename T>
bool cast_tester<T>::test_implicit_cast_error(element element, error_code expected_error) { bool cast_tester<T>::test_implicit_cast_error(element element, error_code expected_error) {
try { try {
UNUSED T actual; SIMDJSON_UNUSED T actual;
actual = element; actual = element;
return false; return false;
} catch(simdjson_error &e) { } catch(simdjson_error &e) {
@ -180,7 +180,7 @@ bool cast_tester<T>::test_implicit_cast_error(element element, error_code expect
template<typename T> template<typename T>
bool cast_tester<T>::test_implicit_cast_error(simdjson_result<element> element, error_code expected_error) { bool cast_tester<T>::test_implicit_cast_error(simdjson_result<element> element, error_code expected_error) {
try { try {
UNUSED T actual; SIMDJSON_UNUSED T actual;
actual = element; actual = element;
return false; return false;
} catch(simdjson_error &e) { } catch(simdjson_error &e) {

View File

@ -21,11 +21,11 @@ std::string trim(const std::string s) {
namespace document_stream_tests { namespace document_stream_tests {
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) { static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
simdjson::dom::document_stream stream; simdjson::dom::document_stream stream;
UNUSED auto error = parser.parse_many(str).get(stream); SIMDJSON_UNUSED auto error = parser.parse_many(str).get(stream);
return stream; return stream;
} }
// this is a compilation test // this is a compilation test
UNUSED static void parse_many_stream_assign() { SIMDJSON_UNUSED static void parse_many_stream_assign() {
simdjson::dom::parser parser; simdjson::dom::parser parser;
simdjson::padded_string str("{}",2); simdjson::padded_string str("{}",2);
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str); simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
@ -69,6 +69,7 @@ namespace document_stream_tests {
} }
return true; return true;
} }
bool small_window() { bool small_window() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded; auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
@ -91,6 +92,31 @@ namespace document_stream_tests {
return true; return true;
} }
#ifdef SIMDJSON_THREADS_ENABLED
bool threaded_disabled() {
std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
parser.threaded = false;
size_t count = 0;
size_t window_size = 10; // deliberately too small
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, window_size).get(stream) );
for (auto doc : stream) {
if (!doc.error()) {
std::cerr << "Expected a capacity error " << doc.error() << std::endl;
return false;
}
count++;
}
if(count == 2) {
std::cerr << "Expected a capacity error " << std::endl;
return false;
}
return true;
}
#endif
bool large_window() { bool large_window() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
#if SIZE_MAX > 17179869184 #if SIZE_MAX > 17179869184
@ -222,6 +248,9 @@ namespace document_stream_tests {
bool run() { bool run() {
return test_current_index() && return test_current_index() &&
#ifdef SIMDJSON_THREADS_ENABLED
threaded_disabled() &&
#endif
small_window() && small_window() &&
large_window() && large_window() &&
json_issue467() && json_issue467() &&

View File

@ -94,7 +94,7 @@ namespace document_tests {
myStream << parser.parse(json); myStream << parser.parse(json);
#else #else
simdjson::dom::element doc; simdjson::dom::element doc;
UNUSED auto error = parser.parse(json).get(doc); SIMDJSON_UNUSED auto error = parser.parse(json).get(doc);
myStream << doc; myStream << doc;
#endif #endif
std::string newjson = myStream.str(); std::string newjson = myStream.str();

View File

@ -117,14 +117,14 @@ namespace parser_load {
bool parser_load_chain() { bool parser_load_chain() {
TEST_START(); TEST_START();
dom::parser parser; dom::parser parser;
UNUSED uint64_t foo; SIMDJSON_UNUSED uint64_t foo;
ASSERT_ERROR( parser.load(NONEXISTENT_FILE)["foo"].get(foo), IO_ERROR); ASSERT_ERROR( parser.load(NONEXISTENT_FILE)["foo"].get(foo), IO_ERROR);
TEST_SUCCEED(); TEST_SUCCEED();
} }
bool parser_load_many_chain() { bool parser_load_many_chain() {
TEST_START(); TEST_START();
dom::parser parser; dom::parser parser;
UNUSED dom::document_stream stream; SIMDJSON_UNUSED dom::document_stream stream;
ASSERT_ERROR( parser.load_many(NONEXISTENT_FILE).get(stream), IO_ERROR ); ASSERT_ERROR( parser.load_many(NONEXISTENT_FILE).get(stream), IO_ERROR );
TEST_SUCCEED(); TEST_SUCCEED();
} }
@ -145,6 +145,49 @@ namespace parser_load {
} }
} }
namespace adversarial {
#define PADDING_FILLED_WITH_NUMBERS "222222222222222222222222222222222"
bool number_overrun_at_root() {
TEST_START();
constexpr const char *json = "1" PADDING_FILLED_WITH_NUMBERS ",";
constexpr size_t len = 1; // strlen("1");
dom::parser parser;
uint64_t foo;
ASSERT_SUCCESS( parser.parse(json, len).get(foo) ); // Parse just the first digit
ASSERT_EQUAL( foo, 1 );
TEST_SUCCEED();
}
bool number_overrun_in_array() {
TEST_START();
constexpr const char *json = "[1" PADDING_FILLED_WITH_NUMBERS "]";
constexpr size_t len = 2; // strlen("[1");
dom::parser parser;
uint64_t foo;
ASSERT_ERROR( parser.parse(json, len).get(foo), TAPE_ERROR ); // Parse just the first digit
TEST_SUCCEED();
}
bool number_overrun_in_object() {
TEST_START();
constexpr const char *json = "{\"key\":1" PADDING_FILLED_WITH_NUMBERS "}";
constexpr size_t len = 8; // strlen("{\"key\":1");
dom::parser parser;
uint64_t foo;
ASSERT_ERROR( parser.parse(json, len).get(foo), TAPE_ERROR ); // Parse just the first digit
TEST_SUCCEED();
}
bool run() {
static_assert(33 > SIMDJSON_PADDING, "corruption test doesn't have enough padding"); // 33 = strlen(PADDING_FILLED_WITH_NUMBERS)
return true
&& number_overrun_at_root()
&& number_overrun_in_array()
&& number_overrun_in_object()
;
}
}
int main() { int main() {
// this is put here deliberately to check that the documentation is correct (README), // this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation: // should this fail to compile, you should update the documentation:
@ -152,7 +195,10 @@ int main() {
printf("unsupported CPU\n"); printf("unsupported CPU\n");
} }
std::cout << "Running error tests." << std::endl; std::cout << "Running error tests." << std::endl;
if (!parser_load::run()) { if (!(true
&& parser_load::run()
&& adversarial::run()
)) {
return EXIT_FAILURE; return EXIT_FAILURE;
} }
std::cout << "Error tests are ok." << std::endl; std::cout << "Error tests are ok." << std::endl;

View File

@ -13,6 +13,7 @@
#include "simdjson.h" #include "simdjson.h"
/** /**
* Does the file filename ends with the given extension. * Does the file filename ends with the given extension.
*/ */

View File

@ -1,3 +1,9 @@
/***************
* We refer the programmer to
* JavaScript Object Notation (JSON) Pointer
* https://tools.ietf.org/html/rfc6901
*/
#include <iostream> #include <iostream>
#include "simdjson.h" #include "simdjson.h"
@ -15,6 +21,35 @@
using namespace simdjson; using namespace simdjson;
bool demo() {
#if SIMDJSON_EXCEPTIONS
std::cout << "demo test" << std::endl;
auto cars_json = R"( [
{ "make": "Toyota", "model": "Camry", "year": 2018, "tire_pressure": [ 40.1, 39.9, 37.7, 40.4 ] },
{ "make": "Kia", "model": "Soul", "year": 2012, "tire_pressure": [ 30.1, 31.0, 28.6, 28.7 ] },
{ "make": "Toyota", "model": "Tercel", "year": 1999, "tire_pressure": [ 29.8, 30.0, 30.2, 30.5 ] }
] )"_padded;
dom::parser parser;
dom::element cars = parser.parse(cars_json);
double x = cars.at_pointer("/0/tire_pressure/1");
if(x != 39.9) return false;
// Iterating through an array of objects
std::vector<double> measured;
for (dom::element car_element : cars) {
dom::object car;
simdjson::error_code error;
if ((error = car_element.get(car))) { std::cerr << error << std::endl; return false; }
double x3 = car.at_pointer("/tire_pressure/1");
measured.push_back(x3);
}
std::vector<double> expected = {39.9, 31, 30};
if(measured != expected) {
return false;
}
#endif
return true;
}
const padded_string TEST_JSON = R"( const padded_string TEST_JSON = R"(
{ {
"/~01abc": [ "/~01abc": [
@ -33,49 +68,126 @@ const padded_string TEST_JSON = R"(
} }
)"_padded; )"_padded;
bool json_pointer_success_test(const char *json_pointer, std::string_view expected_value) { const padded_string TEST_RFC_JSON = R"(
{
"foo": ["bar", "baz"],
"": 0,
"a/b": 1,
"c%d": 2,
"e^f": 3,
"g|h": 4,
"i\\j": 5,
"k\"l": 6,
" ": 7,
"m~n": 8
}
)"_padded;
bool json_pointer_success_test(const padded_string & source, const char *json_pointer, std::string_view expected_value) {
std::cout << "Running successful JSON pointer test '" << json_pointer << "' ..." << std::endl; std::cout << "Running successful JSON pointer test '" << json_pointer << "' ..." << std::endl;
dom::parser parser; dom::parser parser;
std::string_view value; dom::element doc;
ASSERT_SUCCESS( parser.parse(TEST_JSON).at(json_pointer).get(value) ); auto error = parser.parse(source).get(doc);
ASSERT_EQUAL(value, expected_value); if(error) { std::cerr << "cannot parse: " << error << std::endl; return false; }
dom::element answer;
error = doc.at_pointer(json_pointer).get(answer);
if(error) { std::cerr << "cannot access pointer: " << error << std::endl; return false; }
std::string str_answer = simdjson::minify(answer);
if(str_answer != expected_value) {
std::cerr << "They differ!!!" << std::endl;
std::cerr << " found '" << str_answer << "'" << std::endl;
std::cerr << " expected '" << expected_value << "'" << std::endl;
}
ASSERT_EQUAL(str_answer, expected_value);
return true; return true;
} }
bool json_pointer_success_test(const char *json_pointer) { bool json_pointer_failure_test(const padded_string & source, const char *json_pointer, error_code expected_error) {
std::cout << "Running successful JSON pointer test '" << json_pointer << "' ..." << std::endl;
dom::parser parser;
ASSERT_SUCCESS( parser.parse(TEST_JSON).at(json_pointer).error() );
return true;
}
bool json_pointer_failure_test(const char *json_pointer, error_code expected_error) {
std::cout << "Running invalid JSON pointer test '" << json_pointer << "' ..." << std::endl; std::cout << "Running invalid JSON pointer test '" << json_pointer << "' ..." << std::endl;
dom::parser parser; dom::parser parser;
ASSERT_ERROR(parser.parse(TEST_JSON).at(json_pointer).error(), expected_error); ASSERT_ERROR(parser.parse(source).at_pointer(json_pointer).error(), expected_error);
return true;
}
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING
// for pre 0.4 users (not standard compliant)
bool legacy_support() {
#if SIMDJSON_EXCEPTIONS
std::cout << "legacy test" << std::endl;
auto legacy_json = R"({"key": "value", "array": [0, 1, 2]})"_padded;
dom::parser parser;
dom::element legacy = parser.parse(legacy_json);
std::string_view value_str = legacy.at("key");
ASSERT_EQUAL(value_str, "value");
int64_t array0 = legacy.at("array/0");
ASSERT_EQUAL(array0, 0);
array0 = legacy.at("array").at("0");
ASSERT_EQUAL(array0, 0);
ASSERT_ERROR(legacy.at("no_such_key").error(), NO_SUCH_FIELD);
ASSERT_ERROR(legacy.at("array/9").error(), INDEX_OUT_OF_BOUNDS);
ASSERT_ERROR(legacy.at("array/not_a_num").error(), INCORRECT_TYPE);
ASSERT_ERROR(legacy.at("array/").error(), INVALID_JSON_POINTER);
#endif
return true;
}
SIMDJSON_POP_DISABLE_WARNINGS
// for 0.5 version and following (standard compliant)
bool modern_support() {
#if SIMDJSON_EXCEPTIONS
std::cout << "modern test" << std::endl;
auto example_json = R"({"key": "value", "array": [0, 1, 2]})"_padded;
dom::parser parser;
dom::element example = parser.parse(example_json);
std::string_view value_str = example.at_pointer("/key");
ASSERT_EQUAL(value_str, "value");
int64_t array0 = example.at_pointer("/array/0");
ASSERT_EQUAL(array0, 0);
array0 = example.at_pointer("/array").at_pointer("/0");
ASSERT_EQUAL(array0, 0);
ASSERT_ERROR(example.at_pointer("/no_such_key").error(), NO_SUCH_FIELD);
ASSERT_ERROR(example.at_pointer("/array/9").error(), INDEX_OUT_OF_BOUNDS);
ASSERT_ERROR(example.at_pointer("/array/not_a_num").error(), INCORRECT_TYPE);
ASSERT_ERROR(example.at_pointer("/array/").error(), INVALID_JSON_POINTER);
#endif
return true; return true;
} }
int main() { int main() {
if (true if (true
&& json_pointer_success_test("") && demo()
&& json_pointer_success_test("~1~001abc") && legacy_support()
&& json_pointer_success_test("~1~001abc/1") && modern_support()
&& json_pointer_success_test("~1~001abc/1/\\\" 0") && json_pointer_success_test(TEST_RFC_JSON, "", R"({"foo":["bar","baz"],"":0,"a/b":1,"c%d":2,"e^f":3,"g|h":4,"i\\j":5,"k\"l":6," ":7,"m~n":8})")
&& json_pointer_success_test("~1~001abc/1/\\\" 0/0", "value0") && json_pointer_success_test(TEST_RFC_JSON, "/foo", "[\"bar\",\"baz\"]")
&& json_pointer_success_test("~1~001abc/1/\\\" 0/1", "value1") && json_pointer_success_test(TEST_RFC_JSON, "/foo/0", "\"bar\"")
&& json_pointer_failure_test("~1~001abc/1/\\\" 0/2", INDEX_OUT_OF_BOUNDS) // index actually out of bounds && json_pointer_success_test(TEST_RFC_JSON, "/", "0")
&& json_pointer_success_test("arr") // get array && json_pointer_success_test(TEST_RFC_JSON, "/a~1b", "1")
&& json_pointer_failure_test("arr/0", INDEX_OUT_OF_BOUNDS) // array index 0 out of bounds on empty array && json_pointer_success_test(TEST_RFC_JSON, "/c%d", "2")
&& json_pointer_success_test("~1~001abc") // get object && json_pointer_success_test(TEST_RFC_JSON, "/e^f", "3")
&& json_pointer_success_test("0", "0 ok") // object index with integer-ish key && json_pointer_success_test(TEST_RFC_JSON, "/g|h", "4")
&& json_pointer_success_test("01", "01 ok") // object index with key that would be an invalid integer && json_pointer_success_test(TEST_RFC_JSON, "/i\\j", "5")
&& json_pointer_success_test("", "empty ok") // object index with empty key && json_pointer_success_test(TEST_RFC_JSON, "/k\"l", "6")
&& json_pointer_failure_test("~01abc", NO_SUCH_FIELD) // Test that we don't try to compare the literal key && json_pointer_success_test(TEST_RFC_JSON, "/ ", "7")
&& json_pointer_failure_test("~1~001abc/01", INVALID_JSON_POINTER) // Leading 0 in integer index && json_pointer_success_test(TEST_RFC_JSON, "/m~0n", "8")
&& json_pointer_failure_test("~1~001abc/", INVALID_JSON_POINTER) // Empty index to array && json_pointer_success_test(TEST_JSON, "",R"({"/~01abc":[0,{"\\\" 0":["value0","value1"]}],"0":"0 ok","01":"01 ok","":"empty ok","arr":[]})")
&& json_pointer_failure_test("~1~001abc/-", INDEX_OUT_OF_BOUNDS) // End index is always out of bounds && json_pointer_success_test(TEST_JSON, "/~1~001abc",R"([0,{"\\\" 0":["value0","value1"]}])")
&& json_pointer_success_test(TEST_JSON, "/~1~001abc/1",R"({"\\\" 0":["value0","value1"]})")
&& json_pointer_success_test(TEST_JSON, "/~1~001abc/1/\\\" 0",R"(["value0","value1"])")
&& json_pointer_success_test(TEST_JSON, "/~1~001abc/1/\\\" 0/0", "\"value0\"")
&& json_pointer_success_test(TEST_JSON, "/~1~001abc/1/\\\" 0/1", "\"value1\"")
&& json_pointer_failure_test(TEST_JSON, "/~1~001abc/1/\\\" 0/2", INDEX_OUT_OF_BOUNDS) // index actually out of bounds
&& json_pointer_success_test(TEST_JSON, "/arr", R"([])") // get array
&& json_pointer_failure_test(TEST_JSON, "/arr/0", INDEX_OUT_OF_BOUNDS) // array index 0 out of bounds on empty array
&& json_pointer_failure_test(TEST_JSON, "~1~001abc", INVALID_JSON_POINTER)
&& json_pointer_success_test(TEST_JSON, "/0", "\"0 ok\"") // object index with integer-ish key
&& json_pointer_success_test(TEST_JSON, "/01", "\"01 ok\"") // object index with key that would be an invalid integer
&& json_pointer_success_test(TEST_JSON, "", R"({"/~01abc":[0,{"\\\" 0":["value0","value1"]}],"0":"0 ok","01":"01 ok","":"empty ok","arr":[]})") // object index with empty key
&& json_pointer_failure_test(TEST_JSON, "/~01abc", NO_SUCH_FIELD) // Test that we don't try to compare the literal key
&& json_pointer_failure_test(TEST_JSON, "/~1~001abc/01", INVALID_JSON_POINTER) // Leading 0 in integer index
&& json_pointer_failure_test(TEST_JSON, "/~1~001abc/", INVALID_JSON_POINTER) // Empty index to array
&& json_pointer_failure_test(TEST_JSON, "/~1~001abc/-", INDEX_OUT_OF_BOUNDS) // End index is always out of bounds
) { ) {
std::cout << "Success!" << std::endl; std::cout << "Success!" << std::endl;
return 0; return 0;

Some files were not shown because too many files have changed in this diff Show More