diff --git a/benchmark/bench_ondemand.cpp b/benchmark/bench_ondemand.cpp index fd3f3bf3..b12781e3 100644 --- a/benchmark/bench_ondemand.cpp +++ b/benchmark/bench_ondemand.cpp @@ -62,4 +62,11 @@ SIMDJSON_POP_DISABLE_WARNINGS #include "find_tweet/rapidjson.h" #include "find_tweet/nlohmann_json.h" +#include "top_tweet/simdjson_dom.h" +#include "top_tweet/simdjson_ondemand.h" +#include "top_tweet/yyjson.h" +#include "top_tweet/sajson.h" +#include "top_tweet/rapidjson.h" +#include "top_tweet/nlohmann_json.h" + BENCHMARK_MAIN(); diff --git a/benchmark/distinct_user_id/yyjson.h b/benchmark/distinct_user_id/yyjson.h index 8a1614a2..e949ef14 100644 --- a/benchmark/distinct_user_id/yyjson.h +++ b/benchmark/distinct_user_id/yyjson.h @@ -12,7 +12,7 @@ struct yyjson_base { yyjson_val *root = yyjson_doc_get_root(doc); if (!yyjson_is_obj(root)) { return false; } yyjson_val *statuses = yyjson_obj_get(root, "statuses"); - if (!yyjson_is_arr(statuses)) { return "Statuses is not an array!"; } + if (!yyjson_is_arr(statuses)) { return false; } // Walk the document, parsing the tweets as we go size_t tweet_idx, tweets_max; diff --git a/benchmark/find_tweet/yyjson.h b/benchmark/find_tweet/yyjson.h index a1ff827a..3a8a6477 100644 --- a/benchmark/find_tweet/yyjson.h +++ b/benchmark/find_tweet/yyjson.h @@ -14,7 +14,7 @@ struct yyjson_base { yyjson_val *root = yyjson_doc_get_root(doc); if (!yyjson_is_obj(root)) { return false; } yyjson_val *statuses = yyjson_obj_get(root, "statuses"); - if (!yyjson_is_arr(statuses)) { return "Statuses is not an array!"; } + if (!yyjson_is_arr(statuses)) { return false; } // Walk the document, parsing the tweets as we go size_t tweet_idx, tweets_max; diff --git a/benchmark/partial_tweets/yyjson.h b/benchmark/partial_tweets/yyjson.h index 7ca47ded..a1586f2a 100644 --- a/benchmark/partial_tweets/yyjson.h +++ b/benchmark/partial_tweets/yyjson.h @@ -37,7 +37,7 @@ struct yyjson_base { yyjson_val *root = yyjson_doc_get_root(doc); if (!yyjson_is_obj(root)) { return false; } yyjson_val *statuses = yyjson_obj_get(root, "statuses"); - if (!yyjson_is_arr(statuses)) { return "Statuses is not an array!"; } + if (!yyjson_is_arr(statuses)) { return false; } // Walk the document, parsing the tweets as we go size_t tweet_idx, tweets_max; diff --git a/benchmark/top_tweet/README.md b/benchmark/top_tweet/README.md new file mode 100644 index 00000000..b8fdc5d5 --- /dev/null +++ b/benchmark/top_tweet/README.md @@ -0,0 +1,49 @@ +# Top Tweet Benchmark + +The top_tweet benchmark finds the most-retweeted tweet in a twitter API response. + +## Purpose + +This scenario tends to measure an implementation's laziness: its ability to avoid parsing unneeded +values, without knowing beforehand which values are needed. + +To find the top tweet, an implementation needs to iterate through all tweets, remembering which one +had the highest retweet count. While it scans, it will find many "candidate" tweets with the highest +retweet count *up to that point.* However, While the implementation iterates through tweets, it will +have many "candidate" tweets. Essentially, it has to keep track of the "top tweet so far" while it +searches. However, only the text and screen_name of the *final* top tweet need to be parsed. +Therefore, JSON parsers that can only parse values on the first pass (such as DOM or streaming +parsers) will be forced to parse text and screen_name of every candidate (if not every single +tweet). Parsers which can delay parsing of values until later will therefore shine in scenarios like +this. + +## Rules + +The benchmark will be called with `run(padded_string &json, int64_t max_retweet_count, top_tweet_result &result)`. +The benchmark must: +- Find the tweet with the highest retweet_count at the top level of the "statuses" array. +- Find the *last* such tweet: if multiple tweets have the same top retweet_count, the last one + should be returned. +- Exclude tweets with retweet_count above max_retweet_count. This restriction is solely here because + the default twitter.json has a rather high retweet count in the third tweet, and to test laziness + the matching tweet needs to be further down in the file. +- Fill in top_tweet_result with the corresponding fields from the matching tweet. + +### Abridged Schema + +The abridged schema (objects contain more fields than listed here): + +```json +{ + "statuses": [ + { + "text": "i like to tweet", // text containing UTF-8 and escape characters + "user": { + "screen_name": "AlexanderHamilton" // string containing UTF-8 (and escape characters?) + }, + "retweet_count": 2, // uint32 + }, + ... + ] +} +``` diff --git a/benchmark/top_tweet/nlohmann_json.h b/benchmark/top_tweet/nlohmann_json.h new file mode 100644 index 00000000..b16b203f --- /dev/null +++ b/benchmark/top_tweet/nlohmann_json.h @@ -0,0 +1,39 @@ +#pragma once + +#if SIMDJSON_COMPETITION_NLOHMANN_JSON + +#include "top_tweet.h" + +namespace top_tweet { + +using namespace simdjson; + +struct nlohmann_json { + using StringType=std::string; + + dom::parser parser{}; + + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + result.retweet_count = -1; + nlohmann::json top_tweet{}; + + auto root = nlohmann::json::parse(json.data(), json.data() + json.size()); + for (auto tweet : root["statuses"]) { + int64_t retweet_count = tweet["retweet_count"]; + if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) { + result.retweet_count = retweet_count; + top_tweet = tweet; + } + } + + result.text = top_tweet["text"]; + result.screen_name = top_tweet["user"]["screen_name"]; + return result.retweet_count != -1; + } +}; + +BENCHMARK_TEMPLATE(top_tweet, nlohmann_json)->UseManualTime(); + +} // namespace top_tweet + +#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON \ No newline at end of file diff --git a/benchmark/top_tweet/rapidjson.h b/benchmark/top_tweet/rapidjson.h new file mode 100644 index 00000000..4c47bf16 --- /dev/null +++ b/benchmark/top_tweet/rapidjson.h @@ -0,0 +1,69 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_RAPIDJSON + +#include "top_tweet.h" + +namespace top_tweet { + +using namespace rapidjson; + +struct rapidjson_base { + using StringType=std::string_view; + + Document doc{}; + + bool run(Document &root, int64_t max_retweet_count, top_tweet_result &result) { + result.retweet_count = -1; + + // Loop over the tweets + if (root.HasParseError() || !root.IsObject()) { return false; } + const auto &statuses = root.FindMember("statuses"); + if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; } + for (const Value &tweet : statuses->value.GetArray()) { + if (!tweet.IsObject()) { return false; } + + // Check if this tweet has a higher retweet count than the current top tweet + const auto &retweet_count_json = tweet.FindMember("retweet_count"); + if (retweet_count_json == tweet.MemberEnd() || !retweet_count_json->value.IsInt64()) { return false; } + int64_t retweet_count = retweet_count_json->value.GetInt64(); + if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) { + result.retweet_count = retweet_count; + + // TODO I can't figure out if there's a way to keep the Value to use outside the loop ... + + // Get text and screen_name of top tweet + const auto &text = tweet.FindMember("text"); + if (text == tweet.MemberEnd() || !text->value.IsString()) { return false; } + result.text = { text->value.GetString(), text->value.GetStringLength() }; + + const auto &user = tweet.FindMember("user"); + if (user == tweet.MemberEnd() || !user->value.IsObject()) { return false; } + const auto &screen_name = user->value.FindMember("screen_name"); + if (screen_name == user->value.MemberEnd() || !screen_name->value.IsString()) { return false; } + result.screen_name = { screen_name->value.GetString(), screen_name->value.GetStringLength() }; + + } + } + + return result.retweet_count != -1; + } +}; + +struct rapidjson : rapidjson_base { + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + return rapidjson_base::run(doc.Parse(json.data()), max_retweet_count, result); + } +}; +BENCHMARK_TEMPLATE(top_tweet, rapidjson)->UseManualTime(); + +struct rapidjson_insitu : rapidjson_base { + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + return rapidjson_base::run(doc.ParseInsitu(json.data()), max_retweet_count, result); + } +}; +BENCHMARK_TEMPLATE(top_tweet, rapidjson_insitu)->UseManualTime(); + +} // namespace partial_tweets + +#endif // SIMDJSON_COMPETITION_RAPIDJSON diff --git a/benchmark/top_tweet/sajson.h b/benchmark/top_tweet/sajson.h new file mode 100644 index 00000000..5f4cc055 --- /dev/null +++ b/benchmark/top_tweet/sajson.h @@ -0,0 +1,62 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_SAJSON + +#include "top_tweet.h" + +namespace top_tweet { + +struct sajson { + using StringType=std::string_view; + + size_t ast_buffer_size{0}; + size_t *ast_buffer{nullptr}; + + bool run(simdjson::padded_string &json, int32_t max_retweet_count, top_tweet_result &result) { + if (!ast_buffer) { + ast_buffer_size = json.size(); + ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t)); + } + auto doc = ::sajson::parse( + ::sajson::bounded_allocation(ast_buffer, ast_buffer_size), + ::sajson::mutable_string_view(json.size(), json.data()) + ); + if (!doc.is_valid()) { return false; } + + auto root = doc.get_root(); + if (root.get_type() != ::sajson::TYPE_OBJECT) { return false; } + auto statuses = root.get_value_of_key({ "statuses", strlen("statuses") }); + if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; } + + for (size_t i=0; i= result.retweet_count) { + result.retweet_count = retweet_count; + + auto text = tweet.get_value_of_key({ "text", strlen("text") }); + if (text.get_type() != ::sajson::TYPE_STRING) { return false; } + result.text = { text.as_cstring(), text.get_string_length() }; + + auto user = tweet.get_value_of_key({ "user", strlen("user") }); + if (user.get_type() != ::sajson::TYPE_OBJECT) { return false; } + auto screen_name = user.get_value_of_key({ "screen_name", strlen("screen_name") }); + if (screen_name.get_type() != ::sajson::TYPE_STRING) { return false; } + result.screen_name = { screen_name.as_cstring(), screen_name.get_string_length() }; + } + } + + return result.retweet_count != -1; + } +}; + +BENCHMARK_TEMPLATE(top_tweet, sajson)->UseManualTime(); + +} // namespace top_tweet + +#endif // SIMDJSON_COMPETITION_SAJSON \ No newline at end of file diff --git a/benchmark/top_tweet/simdjson_dom.h b/benchmark/top_tweet/simdjson_dom.h new file mode 100644 index 00000000..3a6648f1 --- /dev/null +++ b/benchmark/top_tweet/simdjson_dom.h @@ -0,0 +1,39 @@ +#pragma once + +#if SIMDJSON_EXCEPTIONS + +#include "top_tweet.h" + +namespace top_tweet { + +using namespace simdjson; + +struct simdjson_dom { + using StringType=std::string_view; + + dom::parser parser{}; + + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + result.retweet_count = -1; + dom::element top_tweet{}; + + auto doc = parser.parse(json); + for (auto tweet : doc["statuses"]) { + int64_t retweet_count = tweet["retweet_count"]; + if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) { + result.retweet_count = retweet_count; + top_tweet = tweet; + } + } + + result.text = top_tweet["text"]; + result.screen_name = top_tweet["user"]["screen_name"]; + return result.retweet_count != -1; + } +}; + +BENCHMARK_TEMPLATE(top_tweet, simdjson_dom)->UseManualTime(); + +} // namespace top_tweet + +#endif // SIMDJSON_EXCEPTIONS \ No newline at end of file diff --git a/benchmark/top_tweet/simdjson_ondemand.h b/benchmark/top_tweet/simdjson_ondemand.h new file mode 100644 index 00000000..10943df8 --- /dev/null +++ b/benchmark/top_tweet/simdjson_ondemand.h @@ -0,0 +1,81 @@ +#pragma once + +#if SIMDJSON_EXCEPTIONS + +#include "top_tweet.h" + +namespace top_tweet { + +using namespace simdjson; +using namespace simdjson::builtin; + +struct simdjson_ondemand { + using StringType=std::string_view; + + ondemand::parser parser{}; + + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + result.retweet_count = -1; + // We save these DOM values for later so we don't have to parse them + // into string_views until we're sure which ones we want to parse + // NOTE: simdjson does not presently support reuse of objects or arrays--just scalars. This is + // why we have to grab the text and screen_name fields instead of just saving the tweet object. + ondemand::value screen_name, text; + + auto doc = parser.iterate(json); + for (auto tweet : doc["statuses"]) { + // Since text, user.screen_name, and retweet_count generally appear in order, it's nearly free + // for us to retrieve them here (and will cost a bit more if we do it in the if + // statement). + auto tweet_text = tweet["text"]; + auto tweet_screen_name = tweet["user"]["screen_name"]; + int64_t retweet_count = tweet["retweet_count"]; + if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) { + result.retweet_count = retweet_count; + // TODO std::move should not be necessary + text = std::move(tweet_text); + screen_name = std::move(tweet_screen_name); + } + } + + // Now that we know which was the most retweeted, parse the values in it + result.screen_name = screen_name; + result.text = text; + return result.retweet_count != -1; + } +}; + +BENCHMARK_TEMPLATE(top_tweet, simdjson_ondemand)->UseManualTime(); + +struct simdjson_ondemand_forward_only { + using StringType=std::string_view; + + ondemand::parser parser{}; + + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + result.retweet_count = -1; + + auto doc = parser.iterate(json); + for (auto tweet : doc["statuses"]) { + // Since text, user.screen_name, and retweet_count generally appear in order, it's nearly free + // for us to retrieve them here (and will cost a bit more if we do it in the if + // statement). + auto tweet_text = tweet["text"]; + auto tweet_screen_name = tweet["user"]["screen_name"]; + int64_t retweet_count = tweet["retweet_count"]; + if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) { + result.retweet_count = retweet_count; + result.text = tweet_text; + result.screen_name = tweet_screen_name; + } + } + + return result.retweet_count != -1; + } +}; + +BENCHMARK_TEMPLATE(top_tweet, simdjson_ondemand_forward_only)->UseManualTime(); + +} // namespace top_tweet + +#endif // SIMDJSON_EXCEPTIONS diff --git a/benchmark/top_tweet/top_tweet.h b/benchmark/top_tweet/top_tweet.h new file mode 100644 index 00000000..d64b42a1 --- /dev/null +++ b/benchmark/top_tweet/top_tweet.h @@ -0,0 +1,67 @@ + +#pragma once + +#include "json_benchmark/file_runner.h" + +namespace top_tweet { + +using namespace json_benchmark; + +template +struct top_tweet_result { + int64_t retweet_count{}; + StringType screen_name{}; + StringType text{}; + template + simdjson_really_inline bool operator==(const top_tweet_result &other) const { + return retweet_count == other.retweet_count && + screen_name == other.screen_name && + text == other.text; + } + template + simdjson_really_inline bool operator!=(const top_tweet_result &other) const { return !(*this == other); } +}; + +template +simdjson_unused static std::ostream &operator<<(std::ostream &o, const top_tweet_result &t) { + o << "retweet_count: " << t.retweet_count << std::endl; + o << "screen_name: " << t.screen_name << std::endl; + o << "text: " << t.text << std::endl; + return o; +} + +template +struct runner : public file_runner { + top_tweet_result result{}; + + bool setup(benchmark::State &state) { + return this->load_json(state, TWITTER_JSON); + } + + bool before_run(benchmark::State &state) { + if (!file_runner::before_run(state)) { return false; } + result.retweet_count = -1; + return true; + } + + bool run(benchmark::State &) { + return this->implementation.run(this->json, 60, result); + } + + template + bool diff(benchmark::State &state, runner &reference) { + return diff_results(state, result, reference.result, diff_flags::NONE); + } + + size_t items_per_iteration() { + return 1; + } +}; + +struct simdjson_dom; + +template simdjson_really_inline static void top_tweet(benchmark::State &state) { + json_benchmark::run_json_benchmark, runner>(state); +} + +} // namespace top_tweet diff --git a/benchmark/top_tweet/yyjson.h b/benchmark/top_tweet/yyjson.h new file mode 100644 index 00000000..d7b310e5 --- /dev/null +++ b/benchmark/top_tweet/yyjson.h @@ -0,0 +1,68 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_YYJSON + +#include "top_tweet.h" + +namespace top_tweet { + +struct yyjson_base { + using StringType=std::string_view; + + bool run(yyjson_doc *doc, int64_t max_retweet_count, top_tweet_result &result) { + result.retweet_count = -1; + + yyjson_val *top_tweet{}; + + if (!doc) { return false; } + yyjson_val *root = yyjson_doc_get_root(doc); + if (!yyjson_is_obj(root)) { return false; } + yyjson_val *statuses = yyjson_obj_get(root, "statuses"); + if (!yyjson_is_arr(statuses)) { return false; } + + // Walk the document, parsing the tweets as we go + size_t tweet_idx, tweets_max; + yyjson_val *tweet; + yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) { + if (!yyjson_is_obj(tweet)) { return false; } + + auto retweet_count_val = yyjson_obj_get(tweet, "retweet_count"); + if (!yyjson_is_uint(retweet_count_val)) { return false; } + int64_t retweet_count = yyjson_get_uint(retweet_count_val); + if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) { + result.retweet_count = retweet_count; + top_tweet = tweet; + } + } + + auto text = yyjson_obj_get(top_tweet, "text"); + if (!yyjson_is_str(text)) { return false; } + result.text = { yyjson_get_str(text), yyjson_get_len(text) }; + + auto user = yyjson_obj_get(top_tweet, "user"); + if (!yyjson_is_obj(user)) { return false; } + auto screen_name = yyjson_obj_get(user, "screen_name"); + if (!yyjson_is_str(screen_name)) { return false; } + result.screen_name = { yyjson_get_str(screen_name), yyjson_get_len(screen_name) }; + + return result.retweet_count != -1; + } +}; + +struct yyjson : yyjson_base { + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), max_retweet_count, result); + } +}; +BENCHMARK_TEMPLATE(top_tweet, yyjson)->UseManualTime(); + +struct yyjson_insitu : yyjson_base { + bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result &result) { + return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), max_retweet_count, result); + } +}; +BENCHMARK_TEMPLATE(top_tweet, yyjson_insitu)->UseManualTime(); + +} // namespace top_tweet + +#endif // SIMDJSON_COMPETITION_YYJSON diff --git a/include/simdjson/generic/ondemand.h b/include/simdjson/generic/ondemand.h index 0c9dd7ce..0aede0ed 100644 --- a/include/simdjson/generic/ondemand.h +++ b/include/simdjson/generic/ondemand.h @@ -6,12 +6,15 @@ namespace SIMDJSON_IMPLEMENTATION { * Designed for maximum speed and a lower memory profile. */ namespace ondemand { - /** Represents the depth of a JSON value (number of nested arrays/objects). */ - using depth_t = int32_t; + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson +#include "simdjson/generic/ondemand/token_position.h" #include "simdjson/generic/ondemand/logger.h" #include "simdjson/generic/ondemand/raw_json_string.h" #include "simdjson/generic/ondemand/token_iterator.h" diff --git a/include/simdjson/generic/ondemand/document-inl.h b/include/simdjson/generic/ondemand/document-inl.h index 289e1046..8053e443 100644 --- a/include/simdjson/generic/ondemand/document-inl.h +++ b/include/simdjson/generic/ondemand/document-inl.h @@ -16,7 +16,6 @@ simdjson_really_inline value_iterator document::resume_value_iterator() noexcept return value_iterator(&iter, 1, iter.root_checkpoint()); } simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { - iter.assert_at_root(); return resume_value_iterator(); } simdjson_really_inline value document::resume_value() noexcept { @@ -33,22 +32,22 @@ simdjson_really_inline simdjson_result document::get_object() & noexcept return get_root_value().get_object(); } simdjson_really_inline simdjson_result document::get_uint64() noexcept { - return get_root_value_iterator().require_root_uint64(); + return get_root_value_iterator().get_root_uint64(); } simdjson_really_inline simdjson_result document::get_int64() noexcept { - return get_root_value_iterator().require_root_int64(); + return get_root_value_iterator().get_root_int64(); } simdjson_really_inline simdjson_result document::get_double() noexcept { - return get_root_value_iterator().require_root_double(); + return get_root_value_iterator().get_root_double(); } -simdjson_really_inline simdjson_result document::get_string() & noexcept { - return get_root_value().get_string(); +simdjson_really_inline simdjson_result document::get_string() noexcept { + return get_root_value_iterator().get_root_string(); } -simdjson_really_inline simdjson_result document::get_raw_json_string() & noexcept { - return get_root_value().get_raw_json_string(); +simdjson_really_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(); } simdjson_really_inline simdjson_result document::get_bool() noexcept { - return get_root_value_iterator().require_root_bool(); + return get_root_value_iterator().get_root_bool(); } simdjson_really_inline bool document::is_null() noexcept { return get_root_value_iterator().is_root_null(); @@ -63,6 +62,8 @@ template<> simdjson_really_inline simdjson_result document::get() & no template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_string(); } template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } @@ -81,8 +82,8 @@ simdjson_really_inline document::operator object() & noexcept(false) { return ge simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_really_inline document::operator std::string_view() & noexcept(false) { return get_string(); } -simdjson_really_inline document::operator raw_json_string() & noexcept(false) { return get_raw_json_string(); } +simdjson_really_inline document::operator std::string_view() noexcept(false) { return get_string(); } +simdjson_really_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } #endif @@ -186,11 +187,11 @@ simdjson_really_inline simdjson_result simdjson_result simdjson_result::get_string() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { if (error()) { return error(); } return first.get_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } @@ -257,11 +258,11 @@ simdjson_really_inline simdjson_result::operator std::string_view() & noexcept(false) { +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } diff --git a/include/simdjson/generic/ondemand/document.h b/include/simdjson/generic/ondemand/document.h index 01c4ffeb..33766bdd 100644 --- a/include/simdjson/generic/ondemand/document.h +++ b/include/simdjson/generic/ondemand/document.h @@ -77,7 +77,7 @@ public: * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_string() & noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; /** * Cast this JSON value to a raw_json_string. * @@ -86,7 +86,7 @@ public: * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * @@ -173,7 +173,7 @@ public: * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator std::string_view() & noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * @@ -182,7 +182,7 @@ public: * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator raw_json_string() & noexcept(false); + simdjson_really_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * @@ -300,8 +300,8 @@ public: simdjson_really_inline simdjson_result get_uint64() noexcept; simdjson_really_inline simdjson_result get_int64() noexcept; simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() & noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; simdjson_really_inline simdjson_result get_bool() noexcept; simdjson_really_inline bool is_null() noexcept; @@ -317,8 +317,8 @@ public: simdjson_really_inline operator uint64_t() noexcept(false); simdjson_really_inline operator int64_t() noexcept(false); simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() & noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); simdjson_really_inline operator bool() noexcept(false); #endif diff --git a/include/simdjson/generic/ondemand/json_iterator-inl.h b/include/simdjson/generic/ondemand/json_iterator-inl.h index b72a6d43..6e5d66ca 100644 --- a/include/simdjson/generic/ondemand/json_iterator-inl.h +++ b/include/simdjson/generic/ondemand/json_iterator-inl.h @@ -98,10 +98,10 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child SIMDJSON_POP_DISABLE_WARNINGS simdjson_really_inline bool json_iterator::at_root() const noexcept { - return token.checkpoint() == root_checkpoint(); + return token.position() == root_checkpoint(); } -simdjson_really_inline const uint32_t *json_iterator::root_checkpoint() const noexcept { +simdjson_really_inline token_position json_iterator::root_checkpoint() const noexcept { return parser->dom_parser->structural_indexes.get(); } @@ -138,6 +138,14 @@ simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const return token.peek_length(delta); } +simdjson_really_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + return token.peek(position); +} + +simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { + return token.peek_length(position); +} + simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); @@ -165,11 +173,11 @@ simdjson_really_inline error_code json_iterator::report_error(error_code _error, return error; } -simdjson_really_inline const uint32_t *json_iterator::checkpoint() const noexcept { - return token.checkpoint(); +simdjson_really_inline token_position json_iterator::position() const noexcept { + return token.position(); } -simdjson_really_inline void json_iterator::restore_checkpoint(const uint32_t *target_checkpoint) noexcept { - token.restore_checkpoint(target_checkpoint); +simdjson_really_inline void json_iterator::set_position(token_position target_checkpoint) noexcept { + token.set_position(target_checkpoint); } @@ -183,7 +191,7 @@ template simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { // Truncate whitespace to fit the buffer. if (max_len > N-1) { - if (jsoncharutils::is_not_structural_or_whitespace(json[N])) { return false; } + if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } max_len = N-1; } diff --git a/include/simdjson/generic/ondemand/json_iterator.h b/include/simdjson/generic/ondemand/json_iterator.h index 98907ae7..c28c1491 100644 --- a/include/simdjson/generic/ondemand/json_iterator.h +++ b/include/simdjson/generic/ondemand/json_iterator.h @@ -65,7 +65,7 @@ public: /** * Get the root value iterator */ - simdjson_really_inline const uint32_t *root_checkpoint() const noexcept; + simdjson_really_inline token_position root_checkpoint() const noexcept; /** * Assert if the iterator is not at the start @@ -92,10 +92,6 @@ public: */ simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Whether we are at the start of an object. - */ - /** * Get the JSON text for a given token (relative). * @@ -108,13 +104,32 @@ public: */ simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** - * Get the maximum length of the JSON text for a given token. + * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param index The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param index The position of the token to retrieve. + */ + simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; /** * Ascend one level. @@ -163,8 +178,8 @@ public: template simdjson_warn_unused simdjson_really_inline bool peek_to_buffer(uint8_t (&tmpbuf)[N]) noexcept; template simdjson_warn_unused simdjson_really_inline bool advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept; - simdjson_really_inline const uint32_t *checkpoint() const noexcept; - simdjson_really_inline void restore_checkpoint(const uint32_t *target_checkpoint) noexcept; + simdjson_really_inline token_position position() const noexcept; + simdjson_really_inline void set_position(token_position target_checkpoint) noexcept; protected: simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; @@ -177,6 +192,7 @@ protected: friend class parser; friend class value_iterator; friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; }; // json_iterator } // namespace ondemand diff --git a/include/simdjson/generic/ondemand/logger-inl.h b/include/simdjson/generic/ondemand/logger-inl.h index a8f0b20f..a7107d76 100644 --- a/include/simdjson/generic/ondemand/logger-inl.h +++ b/include/simdjson/generic/ondemand/logger-inl.h @@ -24,6 +24,9 @@ simdjson_really_inline void log_event(const json_iterator &iter, const char *typ simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta); } +simdjson_really_inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail); +} simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_line(iter, "+", type, "", delta, depth_delta); log_depth++; @@ -35,6 +38,9 @@ simdjson_really_inline void log_end_value(const json_iterator &iter, const char simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_line(iter, "ERROR: ", error, detail, delta, depth_delta); } +simdjson_really_inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail); +} simdjson_really_inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_event(iter.json_iter(), type, detail, delta, depth_delta); @@ -76,8 +82,12 @@ simdjson_really_inline void log_headers() noexcept { } simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, iter.token.index+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); +} +simdjson_really_inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept { if (LOG_ENABLED) { - const int indent = (log_depth+depth_delta)*2; + const int indent = depth*2; + const auto buf = iter.token.buf; printf("| %*s%s%-*s ", indent, "", title_prefix, @@ -86,21 +96,23 @@ simdjson_really_inline void log_line(const json_iterator &iter, const char *titl { // Print the current structural. printf("| "); + auto current_structural = &buf[*index]; for (int i=0;itoken.index == iter._start_index + 1) || (iter._json_iter->_depth < iter._depth) ); + SIMDJSON_ASSUME( (iter._json_iter->token.index == iter._start_position + 1) || (iter._json_iter->_depth < iter._depth) ); return iter; } simdjson_really_inline object_iterator object::end() noexcept { diff --git a/include/simdjson/generic/ondemand/raw_json_string.h b/include/simdjson/generic/ondemand/raw_json_string.h index c553e642..d6066af2 100644 --- a/include/simdjson/generic/ondemand/raw_json_string.h +++ b/include/simdjson/generic/ondemand/raw_json_string.h @@ -6,6 +6,7 @@ namespace ondemand { class object; class parser; +class json_iterator; /** * A string escaped per JSON rules, terminated with quote ("). They are used to represent diff --git a/include/simdjson/generic/ondemand/token_iterator-inl.h b/include/simdjson/generic/ondemand/token_iterator-inl.h index 0398e222..f9dce6e5 100644 --- a/include/simdjson/generic/ondemand/token_iterator-inl.h +++ b/include/simdjson/generic/ondemand/token_iterator-inl.h @@ -2,17 +2,28 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, uint32_t *_index) noexcept +simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, token_position _index) noexcept : buf{_buf}, index{_index} { } -simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(index+delta)]; -} simdjson_really_inline const uint8_t *token_iterator::advance() noexcept { return &buf[*(index++)]; } + +simdjson_really_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_really_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_really_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(index+delta)]; +} simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(index+delta); } @@ -20,6 +31,13 @@ simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const return *(index+delta+1) - *(index+delta); } +simdjson_really_inline token_position token_iterator::position() const noexcept { + return index; +} +simdjson_really_inline void token_iterator::set_position(token_position target_checkpoint) noexcept { + index = target_checkpoint; +} + simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return index == other.index; } @@ -39,14 +57,6 @@ simdjson_really_inline bool token_iterator::operator<=(const token_iterator &oth return index <= other.index; } -simdjson_really_inline const uint32_t *token_iterator::checkpoint() const noexcept { - return index; -} - -simdjson_really_inline void token_iterator::restore_checkpoint(const uint32_t *target_checkpoint) noexcept { - index = target_checkpoint; -} - } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/token_iterator.h b/include/simdjson/generic/ondemand/token_iterator.h index 2db1fa7b..9dc61abd 100644 --- a/include/simdjson/generic/ondemand/token_iterator.h +++ b/include/simdjson/generic/ondemand/token_iterator.h @@ -21,6 +21,13 @@ public: simdjson_really_inline token_iterator(const token_iterator &other) noexcept = default; simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + /** + * Advance to the next token (returning the current one). + * + * Does not check or update depth/expect_value. Caller is responsible for that. + */ + simdjson_really_inline const uint8_t *advance() noexcept; + /** * Get the JSON text for a given token (relative). * @@ -42,22 +49,35 @@ public: * 1 = next token, -1 = prev token. */ simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** - * Advance to the next token (returning the current one). + * Get the JSON text for a given token. * - * Does not check or update depth/expect_value. Caller is responsible for that. + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; /** * Save the current index to be restored later. */ - simdjson_really_inline const uint32_t *checkpoint() const noexcept; - + simdjson_really_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ - simdjson_really_inline void restore_checkpoint(const uint32_t *target_checkpoint) noexcept; + simdjson_really_inline void set_position(token_position target_checkpoint) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. @@ -70,7 +90,7 @@ public: simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; protected: - simdjson_really_inline token_iterator(const uint8_t *buf, uint32_t *index) noexcept; + simdjson_really_inline token_iterator(const uint8_t *buf, token_position index) noexcept; /** * Get the index of the JSON text for a given token (relative). @@ -79,17 +99,26 @@ protected: * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. - * */ simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_really_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; - const uint32_t *index{}; + token_position index{}; friend class json_iterator; friend class value_iterator; friend class object; friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; }; } // namespace ondemand diff --git a/include/simdjson/generic/ondemand/token_position.h b/include/simdjson/generic/ondemand/token_position.h new file mode 100644 index 00000000..cfa13300 --- /dev/null +++ b/include/simdjson/generic/ondemand/token_position.h @@ -0,0 +1,10 @@ +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson diff --git a/include/simdjson/generic/ondemand/value-inl.h b/include/simdjson/generic/ondemand/value-inl.h index a46dcc83..af2cfcc0 100644 --- a/include/simdjson/generic/ondemand/value-inl.h +++ b/include/simdjson/generic/ondemand/value-inl.h @@ -40,46 +40,25 @@ simdjson_really_inline simdjson_result value::start_or_resume_object() & } } -simdjson_really_inline simdjson_result value::get_raw_json_string() && noexcept { - return iter.require_raw_json_string(); +simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); } -simdjson_really_inline simdjson_result value::get_raw_json_string() & noexcept { - return iter.try_get_raw_json_string(); +simdjson_really_inline simdjson_result value::get_string() noexcept { + return iter.get_string(); } -simdjson_really_inline simdjson_result value::get_string() && noexcept { - return iter.require_string(); +simdjson_really_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); } -simdjson_really_inline simdjson_result value::get_string() & noexcept { - return iter.try_get_string(); +simdjson_really_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); } -simdjson_really_inline simdjson_result value::get_double() && noexcept { - return iter.require_double(); +simdjson_really_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); } -simdjson_really_inline simdjson_result value::get_double() & noexcept { - return iter.try_get_double(); +simdjson_really_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); } -simdjson_really_inline simdjson_result value::get_uint64() && noexcept { - return iter.require_uint64(); -} -simdjson_really_inline simdjson_result value::get_uint64() & noexcept { - return iter.try_get_uint64(); -} -simdjson_really_inline simdjson_result value::get_int64() && noexcept { - return iter.require_int64(); -} -simdjson_really_inline simdjson_result value::get_int64() & noexcept { - return iter.try_get_int64(); -} -simdjson_really_inline simdjson_result value::get_bool() && noexcept { - return iter.require_bool(); -} -simdjson_really_inline simdjson_result value::get_bool() & noexcept { - return iter.try_get_bool(); -} -simdjson_really_inline bool value::is_null() && noexcept { - return iter.require_null(); -} -simdjson_really_inline bool value::is_null() & noexcept { +simdjson_really_inline bool value::is_null() noexcept { return iter.is_null(); } @@ -122,41 +101,23 @@ simdjson_really_inline value::operator object() && noexcept(false) { simdjson_really_inline value::operator object() & noexcept(false) { return std::forward(*this).get_object(); } -simdjson_really_inline value::operator uint64_t() && noexcept(false) { - return std::forward(*this).get_uint64(); +simdjson_really_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); } -simdjson_really_inline value::operator uint64_t() & noexcept(false) { - return std::forward(*this).get_uint64(); +simdjson_really_inline value::operator int64_t() noexcept(false) { + return get_int64(); } -simdjson_really_inline value::operator int64_t() && noexcept(false) { - return std::forward(*this).get_int64(); +simdjson_really_inline value::operator double() noexcept(false) { + return get_double(); } -simdjson_really_inline value::operator int64_t() & noexcept(false) { - return std::forward(*this).get_int64(); +simdjson_really_inline value::operator std::string_view() noexcept(false) { + return get_string(); } -simdjson_really_inline value::operator double() && noexcept(false) { - return std::forward(*this).get_double(); +simdjson_really_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); } -simdjson_really_inline value::operator double() & noexcept(false) { - return std::forward(*this).get_double(); -} -simdjson_really_inline value::operator std::string_view() && noexcept(false) { - return std::forward(*this).get_string(); -} -simdjson_really_inline value::operator std::string_view() & noexcept(false) { - return std::forward(*this).get_string(); -} -simdjson_really_inline value::operator raw_json_string() && noexcept(false) { - return std::forward(*this).get_raw_json_string(); -} -simdjson_really_inline value::operator raw_json_string() & noexcept(false) { - return std::forward(*this).get_raw_json_string(); -} -simdjson_really_inline value::operator bool() && noexcept(false) { - return std::forward(*this).get_bool(); -} -simdjson_really_inline value::operator bool() & noexcept(false) { - return std::forward(*this).get_bool(); +simdjson_really_inline value::operator bool() noexcept(false) { + return get_bool(); } #endif @@ -303,62 +264,34 @@ simdjson_really_inline simdjson_result(first).get_object(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_really_inline simdjson_result simdjson_result::get_int64() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_really_inline simdjson_result simdjson_result::get_double() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { if (error()) { return error(); } return first.get_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_string() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_really_inline simdjson_result simdjson_result::get_bool() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_bool(); -} -simdjson_really_inline bool simdjson_result::is_null() & noexcept { +simdjson_really_inline bool simdjson_result::is_null() noexcept { if (error()) { return false; } return first.is_null(); } -simdjson_really_inline bool simdjson_result::is_null() && noexcept { - if (error()) { return false; } - return std::forward(first).is_null(); -} template simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } @@ -413,54 +346,30 @@ simdjson_really_inline simdjson_result if (error()) { throw simdjson_error(error()); } return std::forward(first); } -simdjson_really_inline simdjson_result::operator uint64_t() && noexcept(false) { +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator int64_t() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator uint64_t() & noexcept(false) { +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator double() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator int64_t() & noexcept(false) { +simdjson_really_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator std::string_view() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator double() & noexcept(false) { +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator std::string_view() & noexcept(false) { +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator bool() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator bool() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} #endif } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/value.h b/include/simdjson/generic/ondemand/value.h index 3faad26c..44cdc57f 100644 --- a/include/simdjson/generic/ondemand/value.h +++ b/include/simdjson/generic/ondemand/value.h @@ -67,19 +67,13 @@ public: /** @overload simdjson_really_inline operator object() && noexcept(false); */ simdjson_really_inline simdjson_result get_object() & noexcept; - // PERF NOTE: get_XXX() methods generally have both && and & variants because performance is demonstrably better on clang. - // Specifically, in typical cases where you use a temporary value (like doc["x"].get_double()) the && version is faster - // because the & version has to branch to check whether the parse failed or not before deciding whether the value was consumed. - /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_uint64() && noexcept */ - simdjson_really_inline simdjson_result get_uint64() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value to a signed integer. @@ -87,9 +81,7 @@ public: * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_int64() && noexcept */ - simdjson_really_inline simdjson_result get_int64() & noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value to a double. @@ -97,9 +89,7 @@ public: * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_double() && noexcept */ - simdjson_really_inline simdjson_result get_double() & noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; /** * Cast this JSON value to a string. @@ -112,9 +102,7 @@ public: * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_string() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_string() && noexcept */ - simdjson_really_inline simdjson_result get_string() & noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; /** * Cast this JSON value to a raw_json_string. @@ -124,9 +112,7 @@ public: * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_raw_json_string() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_raw_json_string() && noexcept */ - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. @@ -134,18 +120,14 @@ public: * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ - simdjson_really_inline simdjson_result get_bool() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_bool() && noexcept */ - simdjson_really_inline simdjson_result get_bool() & noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. * * @returns Whether the value is null. */ - simdjson_really_inline bool is_null() && noexcept; - /** @overload simdjson_really_inline bool is_null() && noexcept */ - simdjson_really_inline bool is_null() & noexcept; + simdjson_really_inline bool is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -172,27 +154,21 @@ public: * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline operator uint64_t() && noexcept(false); - /** @overload simdjson_really_inline operator uint64_t() && noexcept(false); */ - simdjson_really_inline operator uint64_t() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ - simdjson_really_inline operator int64_t() && noexcept(false); - /** @overload simdjson_really_inline operator int64_t() && noexcept(false); */ - simdjson_really_inline operator int64_t() & noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ - simdjson_really_inline operator double() && noexcept(false); - /** @overload simdjson_really_inline operator double() && noexcept(false); */ - simdjson_really_inline operator double() & noexcept(false); + simdjson_really_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * @@ -204,9 +180,7 @@ public: * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator std::string_view() && noexcept(false); - /** @overload simdjson_really_inline operator std::string_view() && noexcept(false); */ - simdjson_really_inline operator std::string_view() & noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * @@ -215,18 +189,14 @@ public: * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator raw_json_string() && noexcept(false); - /** @overload simdjson_really_inline operator raw_json_string() && noexcept(false); */ - simdjson_really_inline operator raw_json_string() & noexcept(false); + simdjson_really_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ - simdjson_really_inline operator bool() && noexcept(false); - /** @overload simdjson_really_inline operator bool() && noexcept(false); */ - simdjson_really_inline operator bool() & noexcept(false); + simdjson_really_inline operator bool() noexcept(false); #endif /** @@ -370,26 +340,13 @@ public: simdjson_really_inline simdjson_result get_object() && noexcept; simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() && noexcept; - simdjson_really_inline simdjson_result get_uint64() & noexcept; - - simdjson_really_inline simdjson_result get_int64() && noexcept; - simdjson_really_inline simdjson_result get_int64() & noexcept; - - simdjson_really_inline simdjson_result get_double() && noexcept; - simdjson_really_inline simdjson_result get_double() & noexcept; - - simdjson_really_inline simdjson_result get_string() && noexcept; - simdjson_really_inline simdjson_result get_string() & noexcept; - - simdjson_really_inline simdjson_result get_raw_json_string() && noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; - - simdjson_really_inline simdjson_result get_bool() && noexcept; - simdjson_really_inline simdjson_result get_bool() & noexcept; - - simdjson_really_inline bool is_null() && noexcept; - simdjson_really_inline bool is_null() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline bool is_null() noexcept; template simdjson_really_inline simdjson_result get() & noexcept; template simdjson_really_inline simdjson_result get() && noexcept; @@ -402,18 +359,12 @@ public: simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false); simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() && noexcept(false); simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() && noexcept(false); - simdjson_really_inline operator uint64_t() & noexcept(false); - simdjson_really_inline operator int64_t() && noexcept(false); - simdjson_really_inline operator int64_t() & noexcept(false); - simdjson_really_inline operator double() && noexcept(false); - simdjson_really_inline operator double() & noexcept(false); - simdjson_really_inline operator std::string_view() && noexcept(false); - simdjson_really_inline operator std::string_view() & noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() && noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false); - simdjson_really_inline operator bool() && noexcept(false); - simdjson_really_inline operator bool() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); #endif simdjson_really_inline simdjson_result begin() & noexcept; diff --git a/include/simdjson/generic/ondemand/value_iterator-inl.h b/include/simdjson/generic/ondemand/value_iterator-inl.h index da8cb138..76219deb 100644 --- a/include/simdjson/generic/ondemand/value_iterator-inl.h +++ b/include/simdjson/generic/ondemand/value_iterator-inl.h @@ -2,10 +2,10 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline value_iterator::value_iterator(json_iterator *json_iter, depth_t depth, const uint32_t *start_index) noexcept +simdjson_really_inline value_iterator::value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept : _json_iter{json_iter}, _depth{depth}, - _start_index{start_index} + _start_position{start_index} { } @@ -184,7 +184,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // First, we scan from that point to the end. // If we don't find a match, we loop back around, and scan from the beginning to that point. - const uint32_t *search_start = _json_iter->checkpoint(); + token_position search_start = _json_iter->position(); // Next, we find a match starting from the current position. while (has_value) { @@ -211,11 +211,11 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) - _json_iter->restore_checkpoint(_start_index + 1); + _json_iter->set_position(_start_position + 1); _json_iter->descend_to(_depth); has_value = started_object(); - while (_json_iter->checkpoint() < search_start) { + while (_json_iter->position() < search_start) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); // We must be at the start of a field @@ -298,245 +298,95 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator } } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_string() noexcept { - return try_get_raw_json_string().unescape(_json_iter->string_buf_loc()); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_string() noexcept { - return require_raw_json_string().unescape(_json_iter->string_buf_loc()); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_raw_json_string() noexcept { - assert_at_start(); - - logger::log_value(*_json_iter, "string", "", 0); - auto json = _json_iter->peek(); - if (*json != '"') { logger::log_error(*_json_iter, "Not a string"); return INCORRECT_TYPE; } - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return raw_json_string(json+1); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_raw_json_string() noexcept { - assert_at_start(); - - logger::log_value(*_json_iter, "string", "", 0); - auto json = _json_iter->advance(); - if (*json != '"') { logger::log_error(*_json_iter, "Not a string"); return INCORRECT_TYPE; } - _json_iter->ascend_to(depth()-1); - return raw_json_string(json+1); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_uint64() noexcept { - assert_at_non_root_start(); - - logger::log_value(*_json_iter, "uint64", "", 0); - uint64_t result; - SIMDJSON_TRY( numberparsing::parse_unsigned(_json_iter->peek()).get(result) ); - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_uint64() noexcept { - assert_at_non_root_start(); - - logger::log_value(*_json_iter, "uint64", "", 0); - _json_iter->ascend_to(depth()-1); - return numberparsing::parse_unsigned(_json_iter->advance()); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_int64() noexcept { - assert_at_non_root_start(); - - logger::log_value(*_json_iter, "int64", "", 0); - int64_t result; - SIMDJSON_TRY( numberparsing::parse_integer(_json_iter->peek()).get(result) ); - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_int64() noexcept { - assert_at_non_root_start(); - - logger::log_value(*_json_iter, "int64", "", 0); - _json_iter->ascend_to(depth()-1); - return numberparsing::parse_integer(_json_iter->advance()); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_double() noexcept { - assert_at_non_root_start(); - - logger::log_value(*_json_iter, "double", "", 0); - double result; - SIMDJSON_TRY( numberparsing::parse_double(_json_iter->peek()).get(result) ); - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_double() noexcept { - assert_at_non_root_start(); - - logger::log_value(*_json_iter, "double", "", 0); - _json_iter->ascend_to(depth()-1); - return numberparsing::parse_double(_json_iter->advance()); -} simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - logger::log_value(*_json_iter, "bool", ""); auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { logger::log_error(*_json_iter, "Not a boolean"); return INCORRECT_TYPE; } + if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_bool() noexcept { - assert_at_non_root_start(); - - bool result; - SIMDJSON_TRY( parse_bool(_json_iter->peek()).get(result) ); - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return result; +simdjson_really_inline bool value_iterator::parse_null(const uint8_t *json) const noexcept { + return !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_bool() noexcept { - assert_at_non_root_start(); - _json_iter->ascend_to(depth()-1); - return parse_bool(_json_iter->advance()); +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_string() noexcept { + return get_raw_json_string().unescape(_json_iter->string_buf_loc()); } -simdjson_really_inline bool value_iterator::is_null(const uint8_t *json) const noexcept { - if (!atomparsing::str4ncmp(json, "null")) { - logger::log_value(*_json_iter, "null", ""); - return true; - } - return false; +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = advance_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { + return numberparsing::parse_unsigned(advance_non_root_scalar("uint64")); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { + return numberparsing::parse_integer(advance_non_root_scalar("int64")); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { + return numberparsing::parse_double(advance_non_root_scalar("double")); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { + return parse_bool(advance_non_root_scalar("bool")); } simdjson_really_inline bool value_iterator::is_null() noexcept { - assert_at_non_root_start(); - - if (is_null(_json_iter->peek())) { - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return true; - } - return false; -} -simdjson_really_inline bool value_iterator::require_null() noexcept { - assert_at_non_root_start(); - - _json_iter->ascend_to(depth()-1); - return is_null(_json_iter->advance()); + return parse_null(advance_non_root_scalar("null")); } constexpr const uint32_t MAX_INT_LENGTH = 1024; -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_root_uint64(const uint8_t *json, uint32_t max_len) const noexcept { +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_string() noexcept { + return get_string(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { + return get_raw_json_string(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { + auto max_len = peek_scalar_length(); + auto json = advance_root_scalar("uint64"); uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, "Root number more than 20 characters"); return NUMBER_ERROR; } - logger::log_value(*_json_iter, "uint64", "", 0); - auto result = numberparsing::parse_unsigned(tmpbuf); - if (result.error()) { logger::log_error(*_json_iter, "Error parsing unsigned integer"); } - return result; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } + return numberparsing::parse_unsigned(tmpbuf); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_root_uint64() noexcept { - assert_at_root(); - - uint64_t result; - SIMDJSON_TRY( parse_root_uint64(_json_iter->peek(), _json_iter->peek_length()).get(result) ); - _json_iter->advance(); - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_root_uint64() noexcept { - assert_at_root(); - - auto max_len = _json_iter->peek_length(); - return parse_root_uint64(_json_iter->advance(), max_len); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_root_int64(const uint8_t *json, uint32_t max_len) const noexcept { +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { + auto max_len = peek_scalar_length(); + auto json = advance_root_scalar("int64"); uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, "Root number more than 20 characters"); return NUMBER_ERROR; } - logger::log_value(*_json_iter, "int64", "", 0); - auto result = numberparsing::parse_integer(tmpbuf); - if (result.error()) { logger::log_error(*_json_iter, "Error parsing integer"); } - return result; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } + return numberparsing::parse_integer(tmpbuf); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_root_int64() noexcept { - assert_at_root(); - - int64_t result; - SIMDJSON_TRY( parse_root_int64(_json_iter->peek(), _json_iter->peek_length()).get(result) ); - _json_iter->advance(); - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_root_int64() noexcept { - assert_at_root(); - - auto max_len = _json_iter->peek_length(); - return parse_root_int64(_json_iter->advance(), max_len); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_root_double(const uint8_t *json, uint32_t max_len) const noexcept { +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { + auto max_len = peek_scalar_length(); + auto json = advance_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest number: -0.e-308. uint8_t tmpbuf[1074+8+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, "Root number more than 1082 characters"); return NUMBER_ERROR; } - logger::log_value(*_json_iter, "double", "", 0); - auto result = numberparsing::parse_double(tmpbuf); - if (result.error()) { logger::log_error(*_json_iter, "Error parsing double"); } - return result; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } + return numberparsing::parse_double(tmpbuf); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_root_double() noexcept { - assert_at_root(); - - double result; - SIMDJSON_TRY( parse_root_double(_json_iter->peek(), _json_iter->peek_length()).get(result) ); - _json_iter->advance(); - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_root_double() noexcept { - assert_at_root(); - - auto max_len = _json_iter->peek_length(); - return parse_root_double(_json_iter->advance(), max_len); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_root_bool(const uint8_t *json, uint32_t max_len) const noexcept { +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { + auto max_len = peek_scalar_length(); + auto json = advance_root_scalar("bool"); uint8_t tmpbuf[5+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, "Not a boolean"); return INCORRECT_TYPE; } + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } return parse_bool(tmpbuf); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::try_get_root_bool() noexcept { - assert_at_root(); - - bool result; - SIMDJSON_TRY( parse_root_bool(_json_iter->peek(), _json_iter->peek_length()).get(result) ); - _json_iter->advance(); - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::require_root_bool() noexcept { - assert_at_root(); - - auto max_len = _json_iter->peek_length(); - return parse_root_bool(_json_iter->advance(), max_len); -} -simdjson_really_inline bool value_iterator::is_root_null(const uint8_t *json, uint32_t max_len) const noexcept { - uint8_t tmpbuf[4+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return false; } - return is_null(tmpbuf); -} simdjson_really_inline bool value_iterator::is_root_null() noexcept { - assert_at_root(); - - if (!is_root_null(_json_iter->peek(), _json_iter->peek_length())) { return false; } - _json_iter->advance(); - return true; -} -simdjson_really_inline bool value_iterator::require_root_null() noexcept { - assert_at_root(); - - auto max_len = _json_iter->peek_length(); - return is_root_null(_json_iter->advance(), max_len); + auto max_len = peek_scalar_length(); + auto json = advance_root_scalar("null"); + return max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5])); } simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_index ); + SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } + simdjson_really_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.checkpoint() }; + return { _json_iter, depth()+1, _json_iter->token.position() }; } simdjson_really_inline bool value_iterator::is_open() const noexcept { @@ -548,12 +398,12 @@ simdjson_really_inline bool value_iterator::at_eof() const noexcept { } simdjson_really_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.index == _start_index; + return _json_iter->token.index == _start_position; } simdjson_really_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_index ); - return _json_iter->token.index == _start_index + 1; + SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); + return _json_iter->token.index == _start_position + 1; } simdjson_really_inline void value_iterator::abandon() noexcept { @@ -577,20 +427,66 @@ simdjson_warn_unused simdjson_really_inline json_iterator &value_iterator::json_ return *_json_iter; } +simdjson_really_inline const uint8_t *value_iterator::peek_scalar() const noexcept { + return _json_iter->peek(_start_position); +} +simdjson_really_inline uint32_t value_iterator::peek_scalar_length() const noexcept { + return _json_iter->peek_length(_start_position); +} + +simdjson_really_inline const uint8_t *value_iterator::advance_scalar(const char *type) const noexcept { + logger::log_value(*_json_iter, _start_position, depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_scalar(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + auto result = _json_iter->advance(); + _json_iter->ascend_to(depth()-1); + return result; +} +simdjson_really_inline const uint8_t *value_iterator::advance_root_scalar(const char *type) const noexcept { + logger::log_value(*_json_iter, _start_position, depth(), type); + if (!is_at_start()) { return peek_scalar(); } + + assert_at_root(); + auto result = _json_iter->advance(); + _json_iter->ascend_to(depth()-1); + return result; +} +simdjson_really_inline const uint8_t *value_iterator::advance_non_root_scalar(const char *type) const noexcept { + logger::log_value(*_json_iter, _start_position, depth(), type); + if (!is_at_start()) { return peek_scalar(); } + + assert_at_non_root_start(); + auto result = _json_iter->advance(); + _json_iter->ascend_to(depth()-1); + return result; +} + +simdjson_really_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, _start_position, depth(), message); + return INCORRECT_TYPE; +} + +simdjson_really_inline bool value_iterator::is_at_start() const noexcept { + return _json_iter->token.index == _start_position; +} + simdjson_really_inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index == _start_index ); + SIMDJSON_ASSUME( _json_iter->token.index == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_really_inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_index ); + SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_really_inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_index ); + SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } diff --git a/include/simdjson/generic/ondemand/value_iterator.h b/include/simdjson/generic/ondemand/value_iterator.h index 2ff0b339..60a2942a 100644 --- a/include/simdjson/generic/ondemand/value_iterator.h +++ b/include/simdjson/generic/ondemand/value_iterator.h @@ -28,7 +28,7 @@ protected: * * PERF NOTE: this is a safety check; we expect this to be elided in release builds. */ - const uint32_t *_start_index{}; + token_position _start_position{}; public: simdjson_really_inline value_iterator() noexcept = default; @@ -249,30 +249,20 @@ public: * @{ */ - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_bool() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_bool() noexcept; - simdjson_really_inline bool require_null() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_bool() noexcept; simdjson_really_inline bool is_null() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_root_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_root_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_root_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_root_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_root_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_root_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result try_get_root_bool() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result require_root_bool() noexcept; - simdjson_really_inline bool require_root_null() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_raw_json_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_bool() noexcept; simdjson_really_inline bool is_root_null() noexcept; simdjson_really_inline error_code error() const noexcept; @@ -283,15 +273,20 @@ public: /** @} */ protected: - simdjson_really_inline value_iterator(json_iterator *json_iter, depth_t depth, const uint32_t *start_index) noexcept; - simdjson_really_inline bool is_null(const uint8_t *json) const noexcept; - simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; - simdjson_really_inline bool is_root_null(const uint8_t *json, uint32_t max_len) const noexcept; - simdjson_really_inline simdjson_result parse_root_bool(const uint8_t *json, uint32_t max_len) const noexcept; - simdjson_really_inline simdjson_result parse_root_uint64(const uint8_t *json, uint32_t max_len) const noexcept; - simdjson_really_inline simdjson_result parse_root_int64(const uint8_t *json, uint32_t max_len) const noexcept; - simdjson_really_inline simdjson_result parse_root_double(const uint8_t *json, uint32_t max_len) const noexcept; + simdjson_really_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + simdjson_really_inline bool parse_null(const uint8_t *json) const noexcept; + simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + + simdjson_really_inline const uint8_t *peek_scalar() const noexcept; + simdjson_really_inline uint32_t peek_scalar_length() const noexcept; + simdjson_really_inline const uint8_t *advance_scalar(const char *type) const noexcept; + simdjson_really_inline const uint8_t *advance_root_scalar(const char *type) const noexcept; + simdjson_really_inline const uint8_t *advance_non_root_scalar(const char *type) const noexcept; + + simdjson_really_inline error_code incorrect_type_error(const char *message) const noexcept; + + simdjson_really_inline bool is_at_start() const noexcept; simdjson_really_inline void assert_at_start() const noexcept; simdjson_really_inline void assert_at_root() const noexcept; simdjson_really_inline void assert_at_child() const noexcept; diff --git a/tests/ondemand/ondemand_dom_api_tests.cpp b/tests/ondemand/ondemand_dom_api_tests.cpp index b16e4380..b99ff5a1 100644 --- a/tests/ondemand/ondemand_dom_api_tests.cpp +++ b/tests/ondemand/ondemand_dom_api_tests.cpp @@ -664,51 +664,146 @@ namespace dom_api_tests { } template - bool test_scalar_value(const padded_string &json, const T &expected) { + bool test_scalar_value(const padded_string &json, const T &expected, bool test_twice=true) { std::cout << "- JSON: " << json << endl; SUBTEST( "simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { T actual; ASSERT_SUCCESS( doc_result.get(actual) ); ASSERT_EQUAL( expected, actual ); + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( doc_result.get(actual) ); + ASSERT_EQUAL( expected, actual ); + } return true; })); SUBTEST( "document", test_ondemand_doc(json, [&](auto doc_result) { T actual; ASSERT_SUCCESS( doc_result.get(actual) ); ASSERT_EQUAL( expected, actual ); - return true; - })); - padded_string array_json = std::string("[") + std::string(json) + "]"; - std::cout << "- JSON: " << array_json << endl; - SUBTEST( "simdjson_result", test_ondemand_doc(array_json, [&](auto doc_result) { - int count = 0; - for (simdjson_result val_result : doc_result) { - T actual; - ASSERT_SUCCESS( val_result.get(actual) ); - ASSERT_EQUAL(expected, actual); - count++; + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( doc_result.get(actual) ); + ASSERT_EQUAL( expected, actual ); } - ASSERT_EQUAL(count, 1); return true; })); - SUBTEST( "ondemand::value", test_ondemand_doc(array_json, [&](auto doc_result) { - int count = 0; - for (simdjson_result val_result : doc_result) { - ondemand::value val; - ASSERT_SUCCESS( val_result.get(val) ); + + { + padded_string whitespace_json = std::string(json) + " "; + std::cout << "- JSON: " << whitespace_json << endl; + SUBTEST( "simdjson_result", test_ondemand_doc(whitespace_json, [&](auto doc_result) { T actual; - ASSERT_SUCCESS( val.get(actual) ); - ASSERT_EQUAL(expected, actual); - count++; - } - ASSERT_EQUAL(count, 1); - return true; - })); + ASSERT_SUCCESS( doc_result.get(actual) ); + ASSERT_EQUAL( expected, actual ); + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( doc_result.get(actual) ); + ASSERT_EQUAL( expected, actual ); + } + return true; + })); + SUBTEST( "document", test_ondemand_doc(whitespace_json, [&](auto doc_result) { + T actual; + ASSERT_SUCCESS( doc_result.get(actual) ); + ASSERT_EQUAL( expected, actual ); + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( doc_result.get(actual) ); + ASSERT_EQUAL( expected, actual ); + } + return true; + })); + } + + { + padded_string array_json = std::string("[") + std::string(json) + "]"; + std::cout << "- JSON: " << array_json << endl; + SUBTEST( "simdjson_result", test_ondemand_doc(array_json, [&](auto doc_result) { + int count = 0; + for (simdjson_result val_result : doc_result) { + T actual; + ASSERT_SUCCESS( val_result.get(actual) ); + ASSERT_EQUAL(expected, actual); + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( val_result.get(actual) ); + ASSERT_EQUAL(expected, actual); + } + count++; + } + ASSERT_EQUAL(count, 1); + return true; + })); + SUBTEST( "value", test_ondemand_doc(array_json, [&](auto doc_result) { + int count = 0; + for (simdjson_result val_result : doc_result) { + ondemand::value val; + ASSERT_SUCCESS( val_result.get(val) ); + T actual; + ASSERT_SUCCESS( val.get(actual) ); + ASSERT_EQUAL(expected, actual); + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( val.get(actual) ); + ASSERT_EQUAL(expected, actual); + } + count++; + } + ASSERT_EQUAL(count, 1); + return true; + })); + } + + { + padded_string whitespace_array_json = std::string("[") + std::string(json) + " ]"; + std::cout << "- JSON: " << whitespace_array_json << endl; + SUBTEST( "simdjson_result", test_ondemand_doc(whitespace_array_json, [&](auto doc_result) { + int count = 0; + for (simdjson_result val_result : doc_result) { + T actual; + ASSERT_SUCCESS( val_result.get(actual) ); + ASSERT_EQUAL(expected, actual); + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( val_result.get(actual) ); + ASSERT_EQUAL(expected, actual); + } + count++; + } + ASSERT_EQUAL(count, 1); + return true; + })); + SUBTEST( "value", test_ondemand_doc(whitespace_array_json, [&](auto doc_result) { + int count = 0; + for (simdjson_result val_result : doc_result) { + ondemand::value val; + ASSERT_SUCCESS( val_result.get(val) ); + T actual; + ASSERT_SUCCESS( val.get(actual) ); + ASSERT_EQUAL(expected, actual); + // Test it twice (scalars can be retrieved more than once) + if (test_twice) { + ASSERT_SUCCESS( val.get(actual) ); + ASSERT_EQUAL(expected, actual); + } + count++; + } + ASSERT_EQUAL(count, 1); + return true; + })); + } + TEST_SUCCEED(); } + bool string_value() { TEST_START(); - return test_scalar_value(R"("hi")"_padded, std::string_view("hi")); + // We can't retrieve a small string twice because it will blow out the string buffer + if (!test_scalar_value(R"("hi")"_padded, std::string_view("hi"), false)) { return false; } + // ... unless the document is big enough to have a big string buffer :) + if (!test_scalar_value(R"("hi" )"_padded, std::string_view("hi"))) { return false; } + TEST_SUCCEED(); } bool numeric_values() { diff --git a/tests/ondemand/ondemand_error_tests.cpp b/tests/ondemand/ondemand_error_tests.cpp index d5310e0a..1a1656f8 100644 --- a/tests/ondemand/ondemand_error_tests.cpp +++ b/tests/ondemand/ondemand_error_tests.cpp @@ -240,8 +240,8 @@ namespace error_tests { TEST_START(); ONDEMAND_SUBTEST("missing comma", "[1 1]", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); ONDEMAND_SUBTEST("extra comma ", "[1,,1]", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", "[,]", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", "[,,]", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", "[,]", assert_iterate(doc, { NUMBER_ERROR })); + ONDEMAND_SUBTEST("extra comma ", "[,,]", assert_iterate(doc, { NUMBER_ERROR, NUMBER_ERROR, TAPE_ERROR })); TEST_SUCCEED(); } bool top_level_array_iterate_unclosed_error() { @@ -250,7 +250,7 @@ namespace error_tests { ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); // TODO These pass the user values that may run past the end of the buffer if they aren't careful // In particular, if the padding is decorated with the wrong values, we could cause overrun! - ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { NUMBER_ERROR, NUMBER_ERROR, TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); @@ -261,21 +261,21 @@ namespace error_tests { TEST_START(); ONDEMAND_SUBTEST("missing comma", R"({ "a": [1 1] })", assert_iterate(doc["a"], { int64_t(1) }, { TAPE_ERROR })); ONDEMAND_SUBTEST("extra comma ", R"({ "a": [1,,1] })", assert_iterate(doc["a"], { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", R"({ "a": [1,,] })", assert_iterate(doc["a"], { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,] })", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,,] })", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", R"({ "a": [1,,] })", assert_iterate(doc["a"], { int64_t(1) }, { NUMBER_ERROR })); + ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,] })", assert_iterate(doc["a"], { NUMBER_ERROR })); + ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,,] })", assert_iterate(doc["a"], { NUMBER_ERROR, NUMBER_ERROR, TAPE_ERROR })); TEST_SUCCEED(); } bool array_iterate_unclosed_error() { TEST_START(); - ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,,)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1 )", assert_iterate(doc["a"], { int64_t(1) }, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,,)", assert_iterate(doc["a"], { NUMBER_ERROR, NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1 )", assert_iterate(doc["a"], { int64_t(1) }, { TAPE_ERROR })); // TODO These pass the user values that may run past the end of the buffer if they aren't careful // In particular, if the padding is decorated with the wrong values, we could cause overrun! - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1,)", assert_iterate(doc["a"], { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1,)", assert_iterate(doc["a"], { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); TEST_SUCCEED(); } diff --git a/tests/ondemand/ondemand_ordering_tests.cpp b/tests/ondemand/ondemand_ordering_tests.cpp index b4291f2a..905fffa2 100644 --- a/tests/ondemand/ondemand_ordering_tests.cpp +++ b/tests/ondemand/ondemand_ordering_tests.cpp @@ -108,8 +108,8 @@ namespace ordering_tests { double x{0}; double y{0}; double z{0}; - for (ondemand::object point_object : doc["coordinates"]) { - for (auto field : point_object) { + for (auto point_object : doc["coordinates"]) { + for (auto field : point_object.get_object()) { if (field.key() == "z") { z += double(field.value()); } else if (field.key() == "x") { x += double(field.value()); } else if (field.key() == "y") { y += double(field.value()); } @@ -117,6 +117,31 @@ namespace ordering_tests { } return (x == 1.1) && (y == 2.2) && (z == 3.3); } + + bool use_values_out_of_order_after_array() { + TEST_START(); + ondemand::parser parser{}; + auto doc = parser.iterate(json); + simdjson_result x{}, y{}, z{}; + for (auto point_object : doc["coordinates"]) { + x = point_object["x"]; + y = point_object["y"]; + z = point_object["z"]; + } + return (double(x) == 1.1) && (double(z) == 3.3) && (double(y) == 2.2); + } + + bool use_object_multiple_times_out_of_order() { + TEST_START(); + ondemand::parser parser{}; + auto json2 = "{\"coordinates\":{\"x\":1.1,\"y\":2.2,\"z\":3.3}}"_padded; + auto doc = parser.iterate(json2); + auto x = doc["coordinates"]["x"]; + auto y = doc["coordinates"]["y"]; + auto z = doc["coordinates"]["z"]; + return (double(x) == 1.1) && (double(z) == 3.3) && (double(y) == 2.2); + } + #endif // SIMDJSON_EXCEPTIONS bool run() { @@ -129,6 +154,8 @@ namespace ordering_tests { out_of_order_object_find_field_unordered() && out_of_order_object_find_field() && foreach_object_field_lookup() && + use_values_out_of_order_after_array() && + use_object_multiple_times_out_of_order() && #endif // SIMDJSON_EXCEPTIONS true; }