Add usage benchmarks for rapidjson

This commit is contained in:
John Keiser 2021-01-04 15:22:35 -08:00
parent 22846f7577
commit 680cd6df34
14 changed files with 273 additions and 14 deletions

View File

@ -41,7 +41,7 @@ if (TARGET benchmark::benchmark)
if (SIMDJSON_EXCEPTIONS)
add_executable(bench_ondemand bench_ondemand.cpp)
if (TARGET yyjson)
target_link_libraries(bench_ondemand PRIVATE yyjson)
target_link_libraries(bench_ondemand PRIVATE yyjson rapidjson)
endif (TARGET yyjson)
endif (SIMDJSON_EXCEPTIONS)
endif()

View File

@ -6,6 +6,13 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#include "yyjson.h"
#endif
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
#include "rapidjson/document.h"
#include "rapidjson/reader.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h"
#endif
// This has to be last, for reasons I don't yet understand
#include <benchmark/benchmark.h>
@ -14,22 +21,27 @@ SIMDJSON_POP_DISABLE_WARNINGS
#include "partial_tweets/simdjson_dom.h"
#include "partial_tweets/simdjson_ondemand.h"
#include "partial_tweets/yyjson.h"
#include "partial_tweets/rapidjson.h"
#include "large_random/simdjson_dom.h"
#include "large_random/simdjson_ondemand.h"
#include "large_random/simdjson_ondemand_unordered.h"
#include "large_random/yyjson.h"
#include "large_random/rapidjson.h"
#include "kostya/simdjson_dom.h"
#include "kostya/simdjson_ondemand.h"
#include "kostya/yyjson.h"
#include "kostya/rapidjson.h"
#include "distinct_user_id/simdjson_dom.h"
#include "distinct_user_id/simdjson_ondemand.h"
#include "distinct_user_id/yyjson.h"
#include "distinct_user_id/rapidjson.h"
#include "find_tweet/simdjson_dom.h"
#include "find_tweet/simdjson_ondemand.h"
#include "find_tweet/yyjson.h"
#include "find_tweet/rapidjson.h"
BENCHMARK_MAIN();

View File

@ -0,0 +1,49 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
#include "distinct_user_id.h"
namespace distinct_user_id {
using namespace rapidjson;
template<int F>
class rapidjson_base {
Document doc{};
public:
bool run(const padded_string &json, std::vector<uint64_t> &ids) {
auto &root = doc.Parse<F>(json.data());
if (root.HasParseError() || !root.IsObject()) { return false; }
auto statuses = root.FindMember("statuses");
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
for (auto &tweet : statuses->value.GetArray()) {
if (!tweet.IsObject()) { return false; }
auto user = tweet.FindMember("user");
if (user == tweet.MemberEnd() || !user->value.IsObject()) { return false; }
auto id = user->value.FindMember("id");
if (id == user->value.MemberEnd() || !id->value.IsUint64()) { return false; }
ids.push_back(id->value.GetUint64());
auto retweet = tweet.FindMember("retweeted_status");
if (retweet != tweet.MemberEnd()) {
if (!retweet->value.IsObject()) { return false; }
user = retweet->value.FindMember("user");
if (user == retweet->value.MemberEnd() || !user->value.IsObject()) { return false; }
id = user->value.FindMember("id");
if (id == user->value.MemberEnd() || !id->value.IsUint64()) { return false; }
ids.push_back(id->value.GetUint64());
}
}
return true;
}
};
class rapidjson : public rapidjson_base<kParseValidateEncodingFlag> {};
BENCHMARK_TEMPLATE(distinct_user_id, rapidjson);
} // namespace partial_tweets
#endif // SIMDJSON_COMPETITION_RAPIDJSON

View File

@ -22,7 +22,7 @@ public:
// Not all tweets have a "retweeted_status", but when they do
// we want to go and find the user within.
auto retweet = tweet["retweeted_status"];
if(retweet.error() != NO_SUCH_FIELD) {
if (retweet.error() != NO_SUCH_FIELD) {
ids.push_back(retweet["user"]["id"]);
}
}

View File

@ -22,7 +22,7 @@ public:
// Not all tweets have a "retweeted_status", but when they do
// we want to go and find the user within.
auto retweet = tweet.find_field("retweeted_status");
if(!retweet.error()) {
if (!retweet.error()) {
ids.push_back(retweet.find_field("user").find_field("id"));
}
}

View File

@ -0,0 +1,42 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
#include "find_tweet.h"
namespace find_tweet {
using namespace rapidjson;
template<int F>
class rapidjson_base {
Document doc{};
public:
bool run(const padded_string &json, uint64_t find_id, std::string_view &text) {
auto &root = doc.Parse<F>(json.data());
if (root.HasParseError() || !root.IsObject()) { return false; }
auto statuses = root.FindMember("statuses");
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
for (auto &tweet : statuses->value.GetArray()) {
if (!tweet.IsObject()) { return false; }
auto id = tweet.FindMember("id");
if (id == tweet.MemberEnd() || !id->value.IsUint64()) { return false; }
if (id->value.GetUint64() == find_id) {
auto _text = tweet.FindMember("text");
if (_text == tweet.MemberEnd() || !_text->value.IsString()) { return false; }
text = { _text->value.GetString(), _text->value.GetStringLength() };
return true;
}
}
return false;
}
};
class rapidjson : public rapidjson_base<kParseValidateEncodingFlag> {};
BENCHMARK_TEMPLATE(find_tweet, rapidjson);
} // namespace partial_tweets
#endif // SIMDJSON_COMPETITION_RAPIDJSON

View File

@ -7,13 +7,6 @@
namespace find_tweet {
class yyjson {
public:
simdjson_really_inline std::string_view result() { return text; }
simdjson_really_inline size_t item_count() { return 1; }
private:
std::string_view text{};
public:
bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) {
// Walk the document, parsing the tweets as we go

View File

@ -46,6 +46,10 @@ public:
bool diff(benchmark::State &state, runner<R> &reference) {
return diff_results(state, points, reference.points);
}
size_t items_per_iteration() {
return points.size();
}
};
static void append_coordinate(std::default_random_engine &e, std::uniform_real_distribution<> &dis, std::stringstream &myss) {

View File

@ -0,0 +1,46 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
#include "kostya.h"
namespace kostya {
using namespace rapidjson;
template<int F>
class rapidjson_base {
Document doc;
simdjson_really_inline double get_double(Value &object, std::string_view key) {
auto field = object.FindMember(key.data());
if (field == object.MemberEnd()) { throw "Missing double field"; }
if (!field->value.IsNumber()) { throw "Field is not double"; }
return field->value.GetDouble();
}
public:
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
auto &root = doc.Parse<F>(json.data());
if (root.HasParseError()) { return false; }
if (!root.IsObject()) { return false; }
auto coords = root.FindMember("coordinates");
if (coords == root.MemberEnd()) { return false; }
if (!coords->value.IsArray()) { return false; }
for (auto &coord : coords->value.GetArray()) {
if (!coord.IsObject()) { return false; }
points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;
}
};
class rapidjson : public rapidjson_base<kParseValidateEncodingFlag> {};
class rapidjson_lossless : public rapidjson_base<kParseValidateEncodingFlag | kParseFullPrecisionFlag> {};
BENCHMARK_TEMPLATE(kostya, rapidjson);
BENCHMARK_TEMPLATE(kostya, rapidjson_lossless);
} // namespace kostya
#endif // SIMDJSON_COMPETITION_RAPIDJSON

View File

@ -43,6 +43,10 @@ public:
bool diff(benchmark::State &state, runner<R> &reference) {
return diff_results(state, points, reference.points);
}
size_t items_per_iteration() {
return points.size();
}
};
static std::string build_json_array(size_t N) {

View File

@ -0,0 +1,43 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
#include "large_random.h"
namespace large_random {
using namespace rapidjson;
template<int F>
class rapidjson_base {
Document doc;
simdjson_really_inline double get_double(Value &object, std::string_view key) {
auto field = object.FindMember(key.data());
if (field == object.MemberEnd()) { throw "Missing double field"; }
if (!field->value.IsNumber()) { throw "Field is not double"; }
return field->value.GetDouble();
}
public:
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
auto &coords = doc.Parse<F>(json.data());
if (coords.HasParseError()) { return false; }
if (!coords.IsArray()) { return false; }
for (auto &coord : coords.GetArray()) {
if (!coord.IsObject()) { return false; }
points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;
}
};
class rapidjson : public rapidjson_base<kParseValidateEncodingFlag> {};
class rapidjson_lossless : public rapidjson_base<kParseValidateEncodingFlag | kParseFullPrecisionFlag> {};
BENCHMARK_TEMPLATE(large_random, rapidjson);
BENCHMARK_TEMPLATE(large_random, rapidjson_lossless);
} // namespace large_random
#endif // SIMDJSON_COMPETITION_RAPIDJSON

View File

@ -7,8 +7,6 @@
namespace large_random {
class yyjson {
ondemand::parser parser{};
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
if (!val){ throw "missing point field!"; }

View File

@ -0,0 +1,70 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
#include "partial_tweets.h"
namespace partial_tweets {
using namespace rapidjson;
template<int F>
class rapidjson_base {
Document doc{};
simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) {
// TODO use version that supports passing string length?
auto field = object.FindMember(key.data());
if (field == object.MemberEnd()) { throw "Missing object field"; }
if (!field->value.IsString()) { throw "Field is not a string"; }
return { field->value.GetString(), field->value.GetStringLength() };
}
simdjson_really_inline uint64_t get_uint64(Value &object, std::string_view key) {
auto field = object.FindMember(key.data());
if (field == object.MemberEnd()) { throw "Missing object field"; }
if (!field->value.IsUint64()) { throw "Field is not uint64"; }
return field->value.GetUint64();
}
simdjson_really_inline uint64_t get_nullable_uint64(Value &object, std::string_view key) {
auto field = object.FindMember(key.data());
if (field == object.MemberEnd()) { throw "Missing nullable uint64 field"; }
if (field->value.IsNull()) { return 0; }
if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; }
return field->value.GetUint64();
}
simdjson_really_inline partial_tweets::twitter_user get_user(Value &object, std::string_view key) {
auto field = object.FindMember(key.data());
if (field == object.MemberEnd()) { throw "Missing user field"; }
if (!field->value.IsObject()) { throw "User field is not an object"; }
return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") };
}
public:
bool run(const padded_string &json, std::vector<tweet> &tweets) {
auto &root = doc.Parse<F>(json.data());
if (root.HasParseError() || !root.IsObject()) { return false; }
auto statuses = root.FindMember("statuses");
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
for (auto &tweet : statuses->value.GetArray()) {
if (!tweet.IsObject()) { return false; }
tweets.emplace_back(partial_tweets::tweet{
get_string_view(tweet, "created_at"),
get_uint64 (tweet, "id"),
get_string_view(tweet, "text"),
get_nullable_uint64 (tweet, "in_reply_to_status_id"),
get_user (tweet, "user"),
get_uint64 (tweet, "retweet_count"),
get_uint64 (tweet, "favorite_count")
});
}
return true;
}
};
class rapidjson : public rapidjson_base<kParseValidateEncodingFlag> {};
BENCHMARK_TEMPLATE(partial_tweets, rapidjson);
} // namespace partial_tweets
#endif // SIMDJSON_COMPETITION_RAPIDJSON

View File

@ -7,8 +7,6 @@
namespace partial_tweets {
class yyjson {
dom::parser parser{};
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
auto val = yyjson_obj_getn(obj, key.data(), key.length());
if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }