Add nlohmann_json benchmarks

This commit is contained in:
John Keiser 2021-01-06 14:38:12 -08:00
parent 6367e55a5f
commit ab859f7952
24 changed files with 220 additions and 41 deletions

View File

@ -49,6 +49,9 @@ if(TARGET benchmark::benchmark)
if(TARGET sajson)
target_link_libraries(bench_ondemand PRIVATE sajson)
endif()
if(TARGET nlohmann_json)
target_link_libraries(bench_ondemand PRIVATE nlohmann_json)
endif()
endif()
endif()

View File

@ -17,6 +17,10 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#include "sajson.h"
#endif
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
#include <nlohmann/json.hpp>
#endif
// This has to be last, for reasons I don't yet understand
#include <benchmark/benchmark.h>
@ -27,6 +31,7 @@ SIMDJSON_POP_DISABLE_WARNINGS
#include "partial_tweets/yyjson.h"
#include "partial_tweets/sajson.h"
#include "partial_tweets/rapidjson.h"
#include "partial_tweets/nlohmann_json.h"
#include "large_random/simdjson_dom.h"
#include "large_random/simdjson_ondemand.h"
@ -34,23 +39,27 @@ SIMDJSON_POP_DISABLE_WARNINGS
#include "large_random/yyjson.h"
#include "large_random/sajson.h"
#include "large_random/rapidjson.h"
#include "large_random/nlohmann_json.h"
#include "kostya/simdjson_dom.h"
#include "kostya/simdjson_ondemand.h"
#include "kostya/yyjson.h"
#include "kostya/sajson.h"
#include "kostya/rapidjson.h"
#include "kostya/nlohmann_json.h"
#include "distinct_user_id/simdjson_dom.h"
#include "distinct_user_id/simdjson_ondemand.h"
#include "distinct_user_id/yyjson.h"
#include "distinct_user_id/sajson.h"
#include "distinct_user_id/rapidjson.h"
#include "distinct_user_id/nlohmann_json.h"
#include "find_tweet/simdjson_dom.h"
#include "find_tweet/simdjson_ondemand.h"
#include "find_tweet/yyjson.h"
#include "find_tweet/sajson.h"
#include "find_tweet/rapidjson.h"
#include "find_tweet/nlohmann_json.h"
BENCHMARK_MAIN();

View File

@ -0,0 +1,27 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
#include "distinct_user_id.h"
namespace distinct_user_id {
struct nlohmann_json {
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
for (auto tweet : root["statuses"]) {
result.push_back(tweet["user"]["id"]);
if (tweet.contains("retweeted_status")) {
result.push_back(tweet["retweeted_status"]["user"]["id"]);
}
}
return true;
}
};
BENCHMARK_TEMPLATE(distinct_user_id, nlohmann_json)->UseManualTime();
} // namespace distinct_user_id
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON

View File

@ -7,7 +7,7 @@ namespace find_tweet {
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
std::string_view result;
typename I::StringType result;
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);

View File

@ -0,0 +1,29 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
#include "find_tweet.h"
namespace find_tweet {
struct nlohmann_json {
using StringType=std::string;
bool run(simdjson::padded_string &json, uint64_t find_id, std::string &result) {
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
for (auto tweet : root["statuses"]) {
if (tweet["id"] == find_id) {
result = tweet["text"];
return true;
}
}
return false;
}
};
BENCHMARK_TEMPLATE(find_tweet, nlohmann_json)->UseManualTime();
} // namespace find_tweet
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON

View File

@ -9,6 +9,8 @@ namespace find_tweet {
using namespace rapidjson;
struct rapidjson_base {
using StringType=std::string_view;
Document doc{};
bool run(Document &root, uint64_t find_id, std::string_view &result) {

View File

@ -7,6 +7,8 @@
namespace find_tweet {
struct sajson {
using StringType=std::string_view;
size_t ast_buffer_size{0};
size_t *ast_buffer{nullptr};
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {

View File

@ -9,6 +9,8 @@ namespace find_tweet {
using namespace simdjson;
struct simdjson_dom {
using StringType=std::string_view;
dom::parser parser{};
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {

View File

@ -10,6 +10,8 @@ using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
using StringType=std::string_view;
ondemand::parser parser{};
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {

View File

@ -7,6 +7,8 @@
namespace find_tweet {
struct yyjson_base {
using StringType=std::string_view;
bool run(yyjson_doc *doc, uint64_t find_id, std::string_view &result) {
if (!doc) { return false; }
yyjson_val *root = yyjson_doc_get_root(doc);

View File

@ -6,12 +6,12 @@
namespace json_benchmark {
template<typename T>
static bool diff_results(benchmark::State &state, const T &result, const T &reference);
template<typename T, typename U>
static bool diff_results(benchmark::State &state, const T &result, const U &reference);
template<typename T>
template<typename T, typename U>
struct result_differ {
static bool diff(benchmark::State &state, const T &result, const T &reference) {
static bool diff(benchmark::State &state, const T &result, const U &reference) {
if (result != reference) {
std::stringstream str;
str << "result incorrect: " << result << " ... reference: " << reference;
@ -23,7 +23,7 @@ struct result_differ {
};
template<>
bool result_differ<double>::diff(benchmark::State &state, const double &result, const double &reference) {
bool result_differ<double, double>::diff(benchmark::State &state, const double &result, const double &reference) {
if (result != reference) {
std::stringstream str;
// We print it out using full precision.
@ -39,9 +39,9 @@ bool result_differ<double>::diff(benchmark::State &state, const double &result,
}
template<typename T>
struct result_differ<std::vector<T>> {
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
template<typename T, typename U>
struct result_differ<std::vector<T>, std::vector<U>> {
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<U> &reference) {
auto result_iter = result.begin();
auto reference_iter = reference.begin();
while (result_iter != result.end() && reference_iter != reference.end()) {
@ -64,9 +64,9 @@ struct result_differ<std::vector<T>> {
}
};
template<typename T>
static bool diff_results(benchmark::State &state, const T &result, const T &reference) {
return result_differ<T>::diff(state, result, reference);
template<typename T, typename U>
static bool diff_results(benchmark::State &state, const T &result, const U &reference) {
return result_differ<T, U>::diff(state, result, reference);
}
} // namespace json_benchmark

View File

@ -94,7 +94,7 @@ template<typename I> simdjson_really_inline static void kostya(benchmark::State
namespace json_benchmark {
template<>
bool result_differ<kostya::point>::diff(benchmark::State &state, const kostya::point &result, const kostya::point &reference) {
bool result_differ<kostya::point, kostya::point>::diff(benchmark::State &state, const kostya::point &result, const kostya::point &reference) {
return diff_results(state, result.x, reference.x)
&& diff_results(state, result.y, reference.y)
&& diff_results(state, result.z, reference.z);

View File

@ -0,0 +1,23 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
#include "kostya.h"
namespace kostya {
struct nlohmann_json {
bool run(simdjson::padded_string &json, std::vector<point> &result) {
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
for (auto point : root["coordinates"]) {
result.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
}
return true;
}
};
BENCHMARK_TEMPLATE(kostya, nlohmann_json)->UseManualTime();
} // namespace kostya
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON

View File

@ -77,7 +77,7 @@ template<typename T> static void large_random(benchmark::State &state) {
namespace json_benchmark {
template<>
bool result_differ<large_random::point>::diff(benchmark::State &state, const large_random::point &result, const large_random::point &reference) {
bool result_differ<large_random::point, large_random::point>::diff(benchmark::State &state, const large_random::point &result, const large_random::point &reference) {
return diff_results(state, result.x, reference.x)
&& diff_results(state, result.y, reference.y)
&& diff_results(state, result.z, reference.z);

View File

@ -0,0 +1,22 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
#include "large_random.h"
namespace large_random {
struct nlohmann_json {
bool run(simdjson::padded_string &json, std::vector<point> &result) {
for (auto point : nlohmann::json::parse(json.data(), json.data() + json.size())) {
result.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
}
return true;
}
};
BENCHMARK_TEMPLATE(large_random, nlohmann_json)->UseManualTime();
} // namespace large_random
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON

View File

@ -0,0 +1,40 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
#include "partial_tweets.h"
namespace partial_tweets {
struct nlohmann_json {
using StringType=std::string;
simdjson_really_inline uint64_t nullable_int(nlohmann::json value) {
if (value.is_null()) { return 0; }
return value;
}
bool run(simdjson::padded_string &json, std::vector<tweet<std::string>> &result) {
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
for (auto tweet : root["statuses"]) {
auto user = tweet["user"];
result.emplace_back(partial_tweets::tweet<std::string>{
tweet["created_at"],
tweet["id"],
tweet["text"],
nullable_int(tweet["in_reply_to_status_id"]),
{ user["id"], user["screen_name"] },
tweet["retweet_count"],
tweet["favorite_count"]
});
}
return true;
}
};
BENCHMARK_TEMPLATE(partial_tweets, nlohmann_json)->UseManualTime();
} // namespace partial_tweets
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON

View File

@ -9,7 +9,7 @@ namespace partial_tweets {
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
std::vector<tweet> result{};
std::vector<tweet<typename I::StringType>> result{};
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);

View File

@ -9,6 +9,8 @@ namespace partial_tweets {
using namespace rapidjson;
struct rapidjson_base {
using StringType=std::string_view;
Document doc{};
simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) {
@ -31,20 +33,20 @@ struct rapidjson_base {
if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; }
return field->value.GetUint64();
}
simdjson_really_inline partial_tweets::twitter_user get_user(Value &object, std::string_view key) {
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(Value &object, std::string_view key) {
auto field = object.FindMember(key.data());
if (field == object.MemberEnd()) { throw "Missing user field"; }
if (!field->value.IsObject()) { throw "User field is not an object"; }
return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") };
}
bool run(Document &root, std::vector<tweet> &result) {
bool run(Document &root, std::vector<tweet<std::string_view>> &result) {
if (root.HasParseError() || !root.IsObject()) { return false; }
auto statuses = root.FindMember("statuses");
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
for (auto &tweet : statuses->value.GetArray()) {
if (!tweet.IsObject()) { return false; }
result.emplace_back(partial_tweets::tweet{
result.emplace_back(partial_tweets::tweet<std::string_view>{
get_string_view(tweet, "created_at"),
get_uint64 (tweet, "id"),
get_string_view(tweet, "text"),
@ -60,14 +62,14 @@ struct rapidjson_base {
};
struct rapidjson : rapidjson_base {
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
return rapidjson_base::run(doc.Parse<kParseValidateEncodingFlag>(json.data()), result);
}
};
BENCHMARK_TEMPLATE(partial_tweets, rapidjson)->UseManualTime();
struct rapidjson_insitu : rapidjson_base {
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
return rapidjson_base::run(doc.ParseInsitu<kParseValidateEncodingFlag>(json.data()), result);
}
};

View File

@ -7,6 +7,8 @@
namespace partial_tweets {
struct sajson {
using StringType=std::string_view;
size_t ast_buffer_size{0};
size_t *ast_buffer{nullptr};
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
@ -46,13 +48,13 @@ struct sajson {
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
return result;
}
simdjson_really_inline partial_tweets::twitter_user get_user(const ::sajson::value &obj, std::string_view key) {
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(const ::sajson::value &obj, std::string_view key) {
auto user = obj.get_value_of_key({key.data(), key.length()});
if (user.get_type() != ::sajson::TYPE_OBJECT) { throw "user is not an object"; }
return { get_str_uint64(user, "id_str"), get_string_view(user, "screen_name") };
}
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
if (!ast_buffer) {
ast_buffer_size = json.size();
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
@ -71,7 +73,7 @@ struct sajson {
for (size_t i=0; i<statuses.get_length(); i++) {
auto tweet = statuses.get_array_element(i);
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { return false; }
result.emplace_back(partial_tweets::tweet{
result.emplace_back(partial_tweets::tweet<std::string_view>{
get_string_view(tweet, "created_at"),
get_str_uint64 (tweet, "id_str"),
get_string_view(tweet, "text"),

View File

@ -9,6 +9,8 @@ namespace partial_tweets {
using namespace simdjson;
struct simdjson_dom {
using StringType=std::string_view;
dom::parser parser{};
simdjson_really_inline uint64_t nullable_int(dom::element element) {
@ -16,10 +18,10 @@ struct simdjson_dom {
return element;
}
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
for (dom::element tweet : parser.parse(json)["statuses"]) {
auto user = tweet["user"];
result.emplace_back(partial_tweets::tweet{
result.emplace_back(partial_tweets::tweet<std::string_view>{
tweet["created_at"],
tweet["id"],
tweet["text"],

View File

@ -10,6 +10,8 @@ using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
using StringType=std::string_view;
ondemand::parser parser{};
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
@ -17,15 +19,15 @@ struct simdjson_ondemand {
return value;
}
simdjson_really_inline twitter_user read_user(ondemand::object user) {
simdjson_really_inline twitter_user<std::string_view> read_user(ondemand::object user) {
return { user.find_field("id"), user.find_field("screen_name") };
}
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
// Walk the document, parsing the tweets as we go
auto doc = parser.iterate(json);
for (ondemand::object tweet : doc.find_field("statuses")) {
result.emplace_back(partial_tweets::tweet{
result.emplace_back(partial_tweets::tweet<std::string_view>{
tweet.find_field("created_at"),
tweet.find_field("id"),
tweet.find_field("text"),

View File

@ -22,15 +22,17 @@ namespace partial_tweets {
// ]
// }
template<typename StringType=std::string_view>
struct tweet {
std::string_view created_at{};
StringType created_at{};
uint64_t id{};
std::string_view result{};
StringType result{};
uint64_t in_reply_to_status_id{};
twitter_user user{};
twitter_user<StringType> user{};
uint64_t retweet_count{};
uint64_t favorite_count{};
simdjson_really_inline bool operator==(const tweet &other) const {
template<typename OtherStringType>
simdjson_really_inline bool operator==(const tweet<OtherStringType> &other) const {
return created_at == other.created_at &&
id == other.id &&
result == other.result &&
@ -39,10 +41,12 @@ struct tweet {
retweet_count == other.retweet_count &&
favorite_count == other.favorite_count;
}
simdjson_really_inline bool operator!=(const tweet &other) const { return !(*this == other); }
template<typename OtherStringType>
simdjson_really_inline bool operator!=(const tweet<OtherStringType> &other) const { return !(*this == other); }
};
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet &t) {
template<typename StringType>
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet<StringType> &t) {
o << "created_at: " << t.created_at << std::endl;
o << "id: " << t.id << std::endl;
o << "result: " << t.result << std::endl;

View File

@ -3,11 +3,13 @@
namespace partial_tweets {
template<typename StringType=std::string_view>
struct twitter_user {
uint64_t id{};
std::string_view screen_name{};
StringType screen_name{};
bool operator==(const twitter_user &other) const {
template<typename OtherStringType>
bool operator==(const twitter_user<OtherStringType> &other) const {
return id == other.id &&
screen_name == other.screen_name;
}

View File

@ -7,6 +7,8 @@
namespace partial_tweets {
struct yyjson_base {
using StringType=std::string_view;
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
auto val = yyjson_obj_getn(obj, key.data(), key.length());
if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }
@ -24,13 +26,13 @@ struct yyjson_base {
if (type != YYJSON_TYPE_NUM && type != YYJSON_TYPE_NULL ) { throw "field is not uint64 or null!"; }
return yyjson_get_uint(val);
}
simdjson_really_inline partial_tweets::twitter_user get_user(yyjson_val *obj, std::string_view key) {
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(yyjson_val *obj, std::string_view key) {
auto user = yyjson_obj_getn(obj, key.data(), key.length());
if (!yyjson_is_obj(user)) { throw "missing twitter user field!"; }
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
}
bool run(yyjson_doc *doc, std::vector<tweet> &result) {
bool run(yyjson_doc *doc, std::vector<tweet<std::string_view>> &result) {
if (!doc) { return false; }
yyjson_val *root = yyjson_doc_get_root(doc);
if (!yyjson_is_obj(root)) { return false; }
@ -43,7 +45,7 @@ struct yyjson_base {
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
if (!yyjson_is_obj(tweet)) { return false; }
// TODO these can't actually handle errors
result.emplace_back(partial_tweets::tweet{
result.emplace_back(partial_tweets::tweet<std::string_view>{
get_string_view(tweet, "created_at"),
get_uint64 (tweet, "id"),
get_string_view(tweet, "text"),
@ -59,14 +61,14 @@ struct yyjson_base {
};
struct yyjson : yyjson_base {
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), result);
}
};
BENCHMARK_TEMPLATE(partial_tweets, yyjson)->UseManualTime();
struct yyjson_insitu : yyjson_base {
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), result);
}
};