diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 30bb5b58..20ca57d2 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -41,7 +41,7 @@ if (TARGET benchmark::benchmark) if (SIMDJSON_EXCEPTIONS) add_executable(bench_ondemand bench_ondemand.cpp) if (TARGET yyjson) - target_link_libraries(bench_ondemand PRIVATE yyjson) + target_link_libraries(bench_ondemand PRIVATE yyjson rapidjson) endif (TARGET yyjson) endif (SIMDJSON_EXCEPTIONS) endif() diff --git a/benchmark/bench_ondemand.cpp b/benchmark/bench_ondemand.cpp index c09432c7..e368125c 100644 --- a/benchmark/bench_ondemand.cpp +++ b/benchmark/bench_ondemand.cpp @@ -6,6 +6,13 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS #include "yyjson.h" #endif +#ifdef SIMDJSON_COMPETITION_RAPIDJSON +#include "rapidjson/document.h" +#include "rapidjson/reader.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#endif + // This has to be last, for reasons I don't yet understand #include @@ -14,22 +21,27 @@ SIMDJSON_POP_DISABLE_WARNINGS #include "partial_tweets/simdjson_dom.h" #include "partial_tweets/simdjson_ondemand.h" #include "partial_tweets/yyjson.h" +#include "partial_tweets/rapidjson.h" #include "large_random/simdjson_dom.h" #include "large_random/simdjson_ondemand.h" #include "large_random/simdjson_ondemand_unordered.h" #include "large_random/yyjson.h" +#include "large_random/rapidjson.h" #include "kostya/simdjson_dom.h" #include "kostya/simdjson_ondemand.h" #include "kostya/yyjson.h" +#include "kostya/rapidjson.h" #include "distinct_user_id/simdjson_dom.h" #include "distinct_user_id/simdjson_ondemand.h" #include "distinct_user_id/yyjson.h" +#include "distinct_user_id/rapidjson.h" #include "find_tweet/simdjson_dom.h" #include "find_tweet/simdjson_ondemand.h" #include "find_tweet/yyjson.h" +#include "find_tweet/rapidjson.h" BENCHMARK_MAIN(); diff --git a/benchmark/distinct_user_id/rapidjson.h b/benchmark/distinct_user_id/rapidjson.h new file mode 100644 index 00000000..a8122580 --- /dev/null +++ b/benchmark/distinct_user_id/rapidjson.h @@ -0,0 +1,49 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_RAPIDJSON + +#include "distinct_user_id.h" + +namespace distinct_user_id { + +using namespace rapidjson; + +template +class rapidjson_base { + Document doc{}; + +public: + bool run(const padded_string &json, std::vector &ids) { + auto &root = doc.Parse(json.data()); + if (root.HasParseError() || !root.IsObject()) { return false; } + auto statuses = root.FindMember("statuses"); + if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; } + for (auto &tweet : statuses->value.GetArray()) { + if (!tweet.IsObject()) { return false; } + auto user = tweet.FindMember("user"); + if (user == tweet.MemberEnd() || !user->value.IsObject()) { return false; } + auto id = user->value.FindMember("id"); + if (id == user->value.MemberEnd() || !id->value.IsUint64()) { return false; } + ids.push_back(id->value.GetUint64()); + + auto retweet = tweet.FindMember("retweeted_status"); + if (retweet != tweet.MemberEnd()) { + if (!retweet->value.IsObject()) { return false; } + user = retweet->value.FindMember("user"); + if (user == retweet->value.MemberEnd() || !user->value.IsObject()) { return false; } + id = user->value.FindMember("id"); + if (id == user->value.MemberEnd() || !id->value.IsUint64()) { return false; } + ids.push_back(id->value.GetUint64()); + } + } + + return true; + } +}; + +class rapidjson : public rapidjson_base {}; +BENCHMARK_TEMPLATE(distinct_user_id, rapidjson); + +} // namespace partial_tweets + +#endif // SIMDJSON_COMPETITION_RAPIDJSON diff --git a/benchmark/distinct_user_id/simdjson_dom.h b/benchmark/distinct_user_id/simdjson_dom.h index e05d92da..fb81c9de 100644 --- a/benchmark/distinct_user_id/simdjson_dom.h +++ b/benchmark/distinct_user_id/simdjson_dom.h @@ -22,7 +22,7 @@ public: // Not all tweets have a "retweeted_status", but when they do // we want to go and find the user within. auto retweet = tweet["retweeted_status"]; - if(retweet.error() != NO_SUCH_FIELD) { + if (retweet.error() != NO_SUCH_FIELD) { ids.push_back(retweet["user"]["id"]); } } diff --git a/benchmark/distinct_user_id/simdjson_ondemand.h b/benchmark/distinct_user_id/simdjson_ondemand.h index 8a3c5459..3dfe6ef2 100644 --- a/benchmark/distinct_user_id/simdjson_ondemand.h +++ b/benchmark/distinct_user_id/simdjson_ondemand.h @@ -22,7 +22,7 @@ public: // Not all tweets have a "retweeted_status", but when they do // we want to go and find the user within. auto retweet = tweet.find_field("retweeted_status"); - if(!retweet.error()) { + if (!retweet.error()) { ids.push_back(retweet.find_field("user").find_field("id")); } } diff --git a/benchmark/find_tweet/rapidjson.h b/benchmark/find_tweet/rapidjson.h new file mode 100644 index 00000000..ac62c765 --- /dev/null +++ b/benchmark/find_tweet/rapidjson.h @@ -0,0 +1,42 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_RAPIDJSON + +#include "find_tweet.h" + +namespace find_tweet { + +using namespace rapidjson; + +template +class rapidjson_base { + Document doc{}; + +public: + bool run(const padded_string &json, uint64_t find_id, std::string_view &text) { + auto &root = doc.Parse(json.data()); + if (root.HasParseError() || !root.IsObject()) { return false; } + auto statuses = root.FindMember("statuses"); + if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; } + for (auto &tweet : statuses->value.GetArray()) { + if (!tweet.IsObject()) { return false; } + auto id = tweet.FindMember("id"); + if (id == tweet.MemberEnd() || !id->value.IsUint64()) { return false; } + if (id->value.GetUint64() == find_id) { + auto _text = tweet.FindMember("text"); + if (_text == tweet.MemberEnd() || !_text->value.IsString()) { return false; } + text = { _text->value.GetString(), _text->value.GetStringLength() }; + return true; + } + } + + return false; + } +}; + +class rapidjson : public rapidjson_base {}; +BENCHMARK_TEMPLATE(find_tweet, rapidjson); + +} // namespace partial_tweets + +#endif // SIMDJSON_COMPETITION_RAPIDJSON diff --git a/benchmark/find_tweet/yyjson.h b/benchmark/find_tweet/yyjson.h index 4734cd19..a6befd10 100644 --- a/benchmark/find_tweet/yyjson.h +++ b/benchmark/find_tweet/yyjson.h @@ -7,13 +7,6 @@ namespace find_tweet { class yyjson { -public: - simdjson_really_inline std::string_view result() { return text; } - simdjson_really_inline size_t item_count() { return 1; } - -private: - std::string_view text{}; - public: bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) { // Walk the document, parsing the tweets as we go diff --git a/benchmark/kostya/kostya.h b/benchmark/kostya/kostya.h index 64a7bde2..a47d9b01 100644 --- a/benchmark/kostya/kostya.h +++ b/benchmark/kostya/kostya.h @@ -46,6 +46,10 @@ public: bool diff(benchmark::State &state, runner &reference) { return diff_results(state, points, reference.points); } + + size_t items_per_iteration() { + return points.size(); + } }; static void append_coordinate(std::default_random_engine &e, std::uniform_real_distribution<> &dis, std::stringstream &myss) { diff --git a/benchmark/kostya/rapidjson.h b/benchmark/kostya/rapidjson.h new file mode 100644 index 00000000..aeb2b7dc --- /dev/null +++ b/benchmark/kostya/rapidjson.h @@ -0,0 +1,46 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_RAPIDJSON + +#include "kostya.h" + +namespace kostya { + +using namespace rapidjson; + +template +class rapidjson_base { + Document doc; + + simdjson_really_inline double get_double(Value &object, std::string_view key) { + auto field = object.FindMember(key.data()); + if (field == object.MemberEnd()) { throw "Missing double field"; } + if (!field->value.IsNumber()) { throw "Field is not double"; } + return field->value.GetDouble(); + } + +public: + bool run(const simdjson::padded_string &json, std::vector &points) { + auto &root = doc.Parse(json.data()); + if (root.HasParseError()) { return false; } + if (!root.IsObject()) { return false; } + auto coords = root.FindMember("coordinates"); + if (coords == root.MemberEnd()) { return false; } + if (!coords->value.IsArray()) { return false; } + for (auto &coord : coords->value.GetArray()) { + if (!coord.IsObject()) { return false; } + points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); + } + + return true; + } +}; + +class rapidjson : public rapidjson_base {}; +class rapidjson_lossless : public rapidjson_base {}; +BENCHMARK_TEMPLATE(kostya, rapidjson); +BENCHMARK_TEMPLATE(kostya, rapidjson_lossless); + +} // namespace kostya + +#endif // SIMDJSON_COMPETITION_RAPIDJSON diff --git a/benchmark/large_random/large_random.h b/benchmark/large_random/large_random.h index 2bab652b..1ec3c6fa 100644 --- a/benchmark/large_random/large_random.h +++ b/benchmark/large_random/large_random.h @@ -43,6 +43,10 @@ public: bool diff(benchmark::State &state, runner &reference) { return diff_results(state, points, reference.points); } + + size_t items_per_iteration() { + return points.size(); + } }; static std::string build_json_array(size_t N) { diff --git a/benchmark/large_random/rapidjson.h b/benchmark/large_random/rapidjson.h new file mode 100644 index 00000000..b19e327b --- /dev/null +++ b/benchmark/large_random/rapidjson.h @@ -0,0 +1,43 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_RAPIDJSON + +#include "large_random.h" + +namespace large_random { + +using namespace rapidjson; + +template +class rapidjson_base { + Document doc; + + simdjson_really_inline double get_double(Value &object, std::string_view key) { + auto field = object.FindMember(key.data()); + if (field == object.MemberEnd()) { throw "Missing double field"; } + if (!field->value.IsNumber()) { throw "Field is not double"; } + return field->value.GetDouble(); + } + +public: + bool run(const simdjson::padded_string &json, std::vector &points) { + auto &coords = doc.Parse(json.data()); + if (coords.HasParseError()) { return false; } + if (!coords.IsArray()) { return false; } + for (auto &coord : coords.GetArray()) { + if (!coord.IsObject()) { return false; } + points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); + } + + return true; + } +}; + +class rapidjson : public rapidjson_base {}; +class rapidjson_lossless : public rapidjson_base {}; +BENCHMARK_TEMPLATE(large_random, rapidjson); +BENCHMARK_TEMPLATE(large_random, rapidjson_lossless); + +} // namespace large_random + +#endif // SIMDJSON_COMPETITION_RAPIDJSON diff --git a/benchmark/large_random/yyjson.h b/benchmark/large_random/yyjson.h index e5142136..7e23a220 100644 --- a/benchmark/large_random/yyjson.h +++ b/benchmark/large_random/yyjson.h @@ -7,8 +7,6 @@ namespace large_random { class yyjson { - ondemand::parser parser{}; - simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) { yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length()); if (!val){ throw "missing point field!"; } diff --git a/benchmark/partial_tweets/rapidjson.h b/benchmark/partial_tweets/rapidjson.h new file mode 100644 index 00000000..babc94d2 --- /dev/null +++ b/benchmark/partial_tweets/rapidjson.h @@ -0,0 +1,70 @@ +#pragma once + +#ifdef SIMDJSON_COMPETITION_RAPIDJSON + +#include "partial_tweets.h" + +namespace partial_tweets { + +using namespace rapidjson; + +template +class rapidjson_base { + Document doc{}; + + simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) { + // TODO use version that supports passing string length? + auto field = object.FindMember(key.data()); + if (field == object.MemberEnd()) { throw "Missing object field"; } + if (!field->value.IsString()) { throw "Field is not a string"; } + return { field->value.GetString(), field->value.GetStringLength() }; + } + simdjson_really_inline uint64_t get_uint64(Value &object, std::string_view key) { + auto field = object.FindMember(key.data()); + if (field == object.MemberEnd()) { throw "Missing object field"; } + if (!field->value.IsUint64()) { throw "Field is not uint64"; } + return field->value.GetUint64(); + } + simdjson_really_inline uint64_t get_nullable_uint64(Value &object, std::string_view key) { + auto field = object.FindMember(key.data()); + if (field == object.MemberEnd()) { throw "Missing nullable uint64 field"; } + if (field->value.IsNull()) { return 0; } + if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; } + return field->value.GetUint64(); + } + simdjson_really_inline partial_tweets::twitter_user get_user(Value &object, std::string_view key) { + auto field = object.FindMember(key.data()); + if (field == object.MemberEnd()) { throw "Missing user field"; } + if (!field->value.IsObject()) { throw "User field is not an object"; } + return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") }; + } + +public: + bool run(const padded_string &json, std::vector &tweets) { + auto &root = doc.Parse(json.data()); + if (root.HasParseError() || !root.IsObject()) { return false; } + auto statuses = root.FindMember("statuses"); + if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; } + for (auto &tweet : statuses->value.GetArray()) { + if (!tweet.IsObject()) { return false; } + tweets.emplace_back(partial_tweets::tweet{ + get_string_view(tweet, "created_at"), + get_uint64 (tweet, "id"), + get_string_view(tweet, "text"), + get_nullable_uint64 (tweet, "in_reply_to_status_id"), + get_user (tweet, "user"), + get_uint64 (tweet, "retweet_count"), + get_uint64 (tweet, "favorite_count") + }); + } + + return true; + } +}; + +class rapidjson : public rapidjson_base {}; + BENCHMARK_TEMPLATE(partial_tweets, rapidjson); + +} // namespace partial_tweets + +#endif // SIMDJSON_COMPETITION_RAPIDJSON diff --git a/benchmark/partial_tweets/yyjson.h b/benchmark/partial_tweets/yyjson.h index 1dfb0075..6929bf8b 100644 --- a/benchmark/partial_tweets/yyjson.h +++ b/benchmark/partial_tweets/yyjson.h @@ -7,8 +7,6 @@ namespace partial_tweets { class yyjson { - dom::parser parser{}; - simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) { auto val = yyjson_obj_getn(obj, key.data(), key.length()); if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }