Add nlohmann_json benchmarks
This commit is contained in:
parent
6367e55a5f
commit
ab859f7952
|
@ -49,6 +49,9 @@ if(TARGET benchmark::benchmark)
|
||||||
if(TARGET sajson)
|
if(TARGET sajson)
|
||||||
target_link_libraries(bench_ondemand PRIVATE sajson)
|
target_link_libraries(bench_ondemand PRIVATE sajson)
|
||||||
endif()
|
endif()
|
||||||
|
if(TARGET nlohmann_json)
|
||||||
|
target_link_libraries(bench_ondemand PRIVATE nlohmann_json)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,10 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||||
#include "sajson.h"
|
#include "sajson.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
// This has to be last, for reasons I don't yet understand
|
// This has to be last, for reasons I don't yet understand
|
||||||
#include <benchmark/benchmark.h>
|
#include <benchmark/benchmark.h>
|
||||||
|
|
||||||
|
@ -27,6 +31,7 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
||||||
#include "partial_tweets/yyjson.h"
|
#include "partial_tweets/yyjson.h"
|
||||||
#include "partial_tweets/sajson.h"
|
#include "partial_tweets/sajson.h"
|
||||||
#include "partial_tweets/rapidjson.h"
|
#include "partial_tweets/rapidjson.h"
|
||||||
|
#include "partial_tweets/nlohmann_json.h"
|
||||||
|
|
||||||
#include "large_random/simdjson_dom.h"
|
#include "large_random/simdjson_dom.h"
|
||||||
#include "large_random/simdjson_ondemand.h"
|
#include "large_random/simdjson_ondemand.h"
|
||||||
|
@ -34,23 +39,27 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
||||||
#include "large_random/yyjson.h"
|
#include "large_random/yyjson.h"
|
||||||
#include "large_random/sajson.h"
|
#include "large_random/sajson.h"
|
||||||
#include "large_random/rapidjson.h"
|
#include "large_random/rapidjson.h"
|
||||||
|
#include "large_random/nlohmann_json.h"
|
||||||
|
|
||||||
#include "kostya/simdjson_dom.h"
|
#include "kostya/simdjson_dom.h"
|
||||||
#include "kostya/simdjson_ondemand.h"
|
#include "kostya/simdjson_ondemand.h"
|
||||||
#include "kostya/yyjson.h"
|
#include "kostya/yyjson.h"
|
||||||
#include "kostya/sajson.h"
|
#include "kostya/sajson.h"
|
||||||
#include "kostya/rapidjson.h"
|
#include "kostya/rapidjson.h"
|
||||||
|
#include "kostya/nlohmann_json.h"
|
||||||
|
|
||||||
#include "distinct_user_id/simdjson_dom.h"
|
#include "distinct_user_id/simdjson_dom.h"
|
||||||
#include "distinct_user_id/simdjson_ondemand.h"
|
#include "distinct_user_id/simdjson_ondemand.h"
|
||||||
#include "distinct_user_id/yyjson.h"
|
#include "distinct_user_id/yyjson.h"
|
||||||
#include "distinct_user_id/sajson.h"
|
#include "distinct_user_id/sajson.h"
|
||||||
#include "distinct_user_id/rapidjson.h"
|
#include "distinct_user_id/rapidjson.h"
|
||||||
|
#include "distinct_user_id/nlohmann_json.h"
|
||||||
|
|
||||||
#include "find_tweet/simdjson_dom.h"
|
#include "find_tweet/simdjson_dom.h"
|
||||||
#include "find_tweet/simdjson_ondemand.h"
|
#include "find_tweet/simdjson_ondemand.h"
|
||||||
#include "find_tweet/yyjson.h"
|
#include "find_tweet/yyjson.h"
|
||||||
#include "find_tweet/sajson.h"
|
#include "find_tweet/sajson.h"
|
||||||
#include "find_tweet/rapidjson.h"
|
#include "find_tweet/rapidjson.h"
|
||||||
|
#include "find_tweet/nlohmann_json.h"
|
||||||
|
|
||||||
BENCHMARK_MAIN();
|
BENCHMARK_MAIN();
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "distinct_user_id.h"
|
||||||
|
|
||||||
|
namespace distinct_user_id {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto tweet : root["statuses"]) {
|
||||||
|
result.push_back(tweet["user"]["id"]);
|
||||||
|
if (tweet.contains("retweeted_status")) {
|
||||||
|
result.push_back(tweet["retweeted_status"]["user"]["id"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(distinct_user_id, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace distinct_user_id
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -7,7 +7,7 @@ namespace find_tweet {
|
||||||
|
|
||||||
template<typename I>
|
template<typename I>
|
||||||
struct runner : public json_benchmark::file_runner<I> {
|
struct runner : public json_benchmark::file_runner<I> {
|
||||||
std::string_view result;
|
typename I::StringType result;
|
||||||
|
|
||||||
bool setup(benchmark::State &state) {
|
bool setup(benchmark::State &state) {
|
||||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "find_tweet.h"
|
||||||
|
|
||||||
|
namespace find_tweet {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
using StringType=std::string;
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, uint64_t find_id, std::string &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto tweet : root["statuses"]) {
|
||||||
|
if (tweet["id"] == find_id) {
|
||||||
|
result = tweet["text"];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(find_tweet, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace find_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,6 +9,8 @@ namespace find_tweet {
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
|
|
||||||
struct rapidjson_base {
|
struct rapidjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
Document doc{};
|
Document doc{};
|
||||||
|
|
||||||
bool run(Document &root, uint64_t find_id, std::string_view &result) {
|
bool run(Document &root, uint64_t find_id, std::string_view &result) {
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace find_tweet {
|
namespace find_tweet {
|
||||||
|
|
||||||
struct sajson {
|
struct sajson {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
size_t ast_buffer_size{0};
|
size_t ast_buffer_size{0};
|
||||||
size_t *ast_buffer{nullptr};
|
size_t *ast_buffer{nullptr};
|
||||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
|
|
@ -9,6 +9,8 @@ namespace find_tweet {
|
||||||
using namespace simdjson;
|
using namespace simdjson;
|
||||||
|
|
||||||
struct simdjson_dom {
|
struct simdjson_dom {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
dom::parser parser{};
|
dom::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||||
|
|
|
@ -10,6 +10,8 @@ using namespace simdjson;
|
||||||
using namespace simdjson::builtin;
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
struct simdjson_ondemand {
|
struct simdjson_ondemand {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
ondemand::parser parser{};
|
ondemand::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace find_tweet {
|
namespace find_tweet {
|
||||||
|
|
||||||
struct yyjson_base {
|
struct yyjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
bool run(yyjson_doc *doc, uint64_t find_id, std::string_view &result) {
|
bool run(yyjson_doc *doc, uint64_t find_id, std::string_view &result) {
|
||||||
if (!doc) { return false; }
|
if (!doc) { return false; }
|
||||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
|
|
|
@ -6,12 +6,12 @@
|
||||||
|
|
||||||
namespace json_benchmark {
|
namespace json_benchmark {
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, typename U>
|
||||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference);
|
static bool diff_results(benchmark::State &state, const T &result, const U &reference);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, typename U>
|
||||||
struct result_differ {
|
struct result_differ {
|
||||||
static bool diff(benchmark::State &state, const T &result, const T &reference) {
|
static bool diff(benchmark::State &state, const T &result, const U &reference) {
|
||||||
if (result != reference) {
|
if (result != reference) {
|
||||||
std::stringstream str;
|
std::stringstream str;
|
||||||
str << "result incorrect: " << result << " ... reference: " << reference;
|
str << "result incorrect: " << result << " ... reference: " << reference;
|
||||||
|
@ -23,7 +23,7 @@ struct result_differ {
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
bool result_differ<double>::diff(benchmark::State &state, const double &result, const double &reference) {
|
bool result_differ<double, double>::diff(benchmark::State &state, const double &result, const double &reference) {
|
||||||
if (result != reference) {
|
if (result != reference) {
|
||||||
std::stringstream str;
|
std::stringstream str;
|
||||||
// We print it out using full precision.
|
// We print it out using full precision.
|
||||||
|
@ -39,9 +39,9 @@ bool result_differ<double>::diff(benchmark::State &state, const double &result,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, typename U>
|
||||||
struct result_differ<std::vector<T>> {
|
struct result_differ<std::vector<T>, std::vector<U>> {
|
||||||
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
|
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<U> &reference) {
|
||||||
auto result_iter = result.begin();
|
auto result_iter = result.begin();
|
||||||
auto reference_iter = reference.begin();
|
auto reference_iter = reference.begin();
|
||||||
while (result_iter != result.end() && reference_iter != reference.end()) {
|
while (result_iter != result.end() && reference_iter != reference.end()) {
|
||||||
|
@ -64,9 +64,9 @@ struct result_differ<std::vector<T>> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, typename U>
|
||||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference) {
|
static bool diff_results(benchmark::State &state, const T &result, const U &reference) {
|
||||||
return result_differ<T>::diff(state, result, reference);
|
return result_differ<T, U>::diff(state, result, reference);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace json_benchmark
|
} // namespace json_benchmark
|
||||||
|
|
|
@ -94,7 +94,7 @@ template<typename I> simdjson_really_inline static void kostya(benchmark::State
|
||||||
|
|
||||||
namespace json_benchmark {
|
namespace json_benchmark {
|
||||||
template<>
|
template<>
|
||||||
bool result_differ<kostya::point>::diff(benchmark::State &state, const kostya::point &result, const kostya::point &reference) {
|
bool result_differ<kostya::point, kostya::point>::diff(benchmark::State &state, const kostya::point &result, const kostya::point &reference) {
|
||||||
return diff_results(state, result.x, reference.x)
|
return diff_results(state, result.x, reference.x)
|
||||||
&& diff_results(state, result.y, reference.y)
|
&& diff_results(state, result.y, reference.y)
|
||||||
&& diff_results(state, result.z, reference.z);
|
&& diff_results(state, result.z, reference.z);
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "kostya.h"
|
||||||
|
|
||||||
|
namespace kostya {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto point : root["coordinates"]) {
|
||||||
|
result.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(kostya, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace kostya
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -77,7 +77,7 @@ template<typename T> static void large_random(benchmark::State &state) {
|
||||||
|
|
||||||
namespace json_benchmark {
|
namespace json_benchmark {
|
||||||
template<>
|
template<>
|
||||||
bool result_differ<large_random::point>::diff(benchmark::State &state, const large_random::point &result, const large_random::point &reference) {
|
bool result_differ<large_random::point, large_random::point>::diff(benchmark::State &state, const large_random::point &result, const large_random::point &reference) {
|
||||||
return diff_results(state, result.x, reference.x)
|
return diff_results(state, result.x, reference.x)
|
||||||
&& diff_results(state, result.y, reference.y)
|
&& diff_results(state, result.y, reference.y)
|
||||||
&& diff_results(state, result.z, reference.z);
|
&& diff_results(state, result.z, reference.z);
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "large_random.h"
|
||||||
|
|
||||||
|
namespace large_random {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
|
for (auto point : nlohmann::json::parse(json.data(), json.data() + json.size())) {
|
||||||
|
result.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(large_random, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace large_random
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -0,0 +1,40 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "partial_tweets.h"
|
||||||
|
|
||||||
|
namespace partial_tweets {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
using StringType=std::string;
|
||||||
|
|
||||||
|
simdjson_really_inline uint64_t nullable_int(nlohmann::json value) {
|
||||||
|
if (value.is_null()) { return 0; }
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string>> &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto tweet : root["statuses"]) {
|
||||||
|
auto user = tweet["user"];
|
||||||
|
result.emplace_back(partial_tweets::tweet<std::string>{
|
||||||
|
tweet["created_at"],
|
||||||
|
tweet["id"],
|
||||||
|
tweet["text"],
|
||||||
|
nullable_int(tweet["in_reply_to_status_id"]),
|
||||||
|
{ user["id"], user["screen_name"] },
|
||||||
|
tweet["retweet_count"],
|
||||||
|
tweet["favorite_count"]
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(partial_tweets, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace partial_tweets
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,7 +9,7 @@ namespace partial_tweets {
|
||||||
|
|
||||||
template<typename I>
|
template<typename I>
|
||||||
struct runner : public json_benchmark::file_runner<I> {
|
struct runner : public json_benchmark::file_runner<I> {
|
||||||
std::vector<tweet> result{};
|
std::vector<tweet<typename I::StringType>> result{};
|
||||||
|
|
||||||
bool setup(benchmark::State &state) {
|
bool setup(benchmark::State &state) {
|
||||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||||
|
|
|
@ -9,6 +9,8 @@ namespace partial_tweets {
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
|
|
||||||
struct rapidjson_base {
|
struct rapidjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
Document doc{};
|
Document doc{};
|
||||||
|
|
||||||
simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) {
|
simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) {
|
||||||
|
@ -31,20 +33,20 @@ struct rapidjson_base {
|
||||||
if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; }
|
if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; }
|
||||||
return field->value.GetUint64();
|
return field->value.GetUint64();
|
||||||
}
|
}
|
||||||
simdjson_really_inline partial_tweets::twitter_user get_user(Value &object, std::string_view key) {
|
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(Value &object, std::string_view key) {
|
||||||
auto field = object.FindMember(key.data());
|
auto field = object.FindMember(key.data());
|
||||||
if (field == object.MemberEnd()) { throw "Missing user field"; }
|
if (field == object.MemberEnd()) { throw "Missing user field"; }
|
||||||
if (!field->value.IsObject()) { throw "User field is not an object"; }
|
if (!field->value.IsObject()) { throw "User field is not an object"; }
|
||||||
return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") };
|
return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(Document &root, std::vector<tweet> &result) {
|
bool run(Document &root, std::vector<tweet<std::string_view>> &result) {
|
||||||
if (root.HasParseError() || !root.IsObject()) { return false; }
|
if (root.HasParseError() || !root.IsObject()) { return false; }
|
||||||
auto statuses = root.FindMember("statuses");
|
auto statuses = root.FindMember("statuses");
|
||||||
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
|
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
|
||||||
for (auto &tweet : statuses->value.GetArray()) {
|
for (auto &tweet : statuses->value.GetArray()) {
|
||||||
if (!tweet.IsObject()) { return false; }
|
if (!tweet.IsObject()) { return false; }
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
get_string_view(tweet, "created_at"),
|
get_string_view(tweet, "created_at"),
|
||||||
get_uint64 (tweet, "id"),
|
get_uint64 (tweet, "id"),
|
||||||
get_string_view(tweet, "text"),
|
get_string_view(tweet, "text"),
|
||||||
|
@ -60,14 +62,14 @@ struct rapidjson_base {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rapidjson : rapidjson_base {
|
struct rapidjson : rapidjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return rapidjson_base::run(doc.Parse<kParseValidateEncodingFlag>(json.data()), result);
|
return rapidjson_base::run(doc.Parse<kParseValidateEncodingFlag>(json.data()), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
BENCHMARK_TEMPLATE(partial_tweets, rapidjson)->UseManualTime();
|
BENCHMARK_TEMPLATE(partial_tweets, rapidjson)->UseManualTime();
|
||||||
|
|
||||||
struct rapidjson_insitu : rapidjson_base {
|
struct rapidjson_insitu : rapidjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return rapidjson_base::run(doc.ParseInsitu<kParseValidateEncodingFlag>(json.data()), result);
|
return rapidjson_base::run(doc.ParseInsitu<kParseValidateEncodingFlag>(json.data()), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace partial_tweets {
|
namespace partial_tweets {
|
||||||
|
|
||||||
struct sajson {
|
struct sajson {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
size_t ast_buffer_size{0};
|
size_t ast_buffer_size{0};
|
||||||
size_t *ast_buffer{nullptr};
|
size_t *ast_buffer{nullptr};
|
||||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
@ -46,13 +48,13 @@ struct sajson {
|
||||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
simdjson_really_inline partial_tweets::twitter_user get_user(const ::sajson::value &obj, std::string_view key) {
|
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(const ::sajson::value &obj, std::string_view key) {
|
||||||
auto user = obj.get_value_of_key({key.data(), key.length()});
|
auto user = obj.get_value_of_key({key.data(), key.length()});
|
||||||
if (user.get_type() != ::sajson::TYPE_OBJECT) { throw "user is not an object"; }
|
if (user.get_type() != ::sajson::TYPE_OBJECT) { throw "user is not an object"; }
|
||||||
return { get_str_uint64(user, "id_str"), get_string_view(user, "screen_name") };
|
return { get_str_uint64(user, "id_str"), get_string_view(user, "screen_name") };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
if (!ast_buffer) {
|
if (!ast_buffer) {
|
||||||
ast_buffer_size = json.size();
|
ast_buffer_size = json.size();
|
||||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||||
|
@ -71,7 +73,7 @@ struct sajson {
|
||||||
for (size_t i=0; i<statuses.get_length(); i++) {
|
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||||
auto tweet = statuses.get_array_element(i);
|
auto tweet = statuses.get_array_element(i);
|
||||||
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
get_string_view(tweet, "created_at"),
|
get_string_view(tweet, "created_at"),
|
||||||
get_str_uint64 (tweet, "id_str"),
|
get_str_uint64 (tweet, "id_str"),
|
||||||
get_string_view(tweet, "text"),
|
get_string_view(tweet, "text"),
|
||||||
|
|
|
@ -9,6 +9,8 @@ namespace partial_tweets {
|
||||||
using namespace simdjson;
|
using namespace simdjson;
|
||||||
|
|
||||||
struct simdjson_dom {
|
struct simdjson_dom {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
dom::parser parser{};
|
dom::parser parser{};
|
||||||
|
|
||||||
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
||||||
|
@ -16,10 +18,10 @@ struct simdjson_dom {
|
||||||
return element;
|
return element;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
||||||
auto user = tweet["user"];
|
auto user = tweet["user"];
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
tweet["created_at"],
|
tweet["created_at"],
|
||||||
tweet["id"],
|
tweet["id"],
|
||||||
tweet["text"],
|
tweet["text"],
|
||||||
|
|
|
@ -10,6 +10,8 @@ using namespace simdjson;
|
||||||
using namespace simdjson::builtin;
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
struct simdjson_ondemand {
|
struct simdjson_ondemand {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
ondemand::parser parser{};
|
ondemand::parser parser{};
|
||||||
|
|
||||||
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
|
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
|
||||||
|
@ -17,15 +19,15 @@ struct simdjson_ondemand {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline twitter_user read_user(ondemand::object user) {
|
simdjson_really_inline twitter_user<std::string_view> read_user(ondemand::object user) {
|
||||||
return { user.find_field("id"), user.find_field("screen_name") };
|
return { user.find_field("id"), user.find_field("screen_name") };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
// Walk the document, parsing the tweets as we go
|
// Walk the document, parsing the tweets as we go
|
||||||
auto doc = parser.iterate(json);
|
auto doc = parser.iterate(json);
|
||||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
tweet.find_field("created_at"),
|
tweet.find_field("created_at"),
|
||||||
tweet.find_field("id"),
|
tweet.find_field("id"),
|
||||||
tweet.find_field("text"),
|
tweet.find_field("text"),
|
||||||
|
|
|
@ -22,15 +22,17 @@ namespace partial_tweets {
|
||||||
// ]
|
// ]
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
template<typename StringType=std::string_view>
|
||||||
struct tweet {
|
struct tweet {
|
||||||
std::string_view created_at{};
|
StringType created_at{};
|
||||||
uint64_t id{};
|
uint64_t id{};
|
||||||
std::string_view result{};
|
StringType result{};
|
||||||
uint64_t in_reply_to_status_id{};
|
uint64_t in_reply_to_status_id{};
|
||||||
twitter_user user{};
|
twitter_user<StringType> user{};
|
||||||
uint64_t retweet_count{};
|
uint64_t retweet_count{};
|
||||||
uint64_t favorite_count{};
|
uint64_t favorite_count{};
|
||||||
simdjson_really_inline bool operator==(const tweet &other) const {
|
template<typename OtherStringType>
|
||||||
|
simdjson_really_inline bool operator==(const tweet<OtherStringType> &other) const {
|
||||||
return created_at == other.created_at &&
|
return created_at == other.created_at &&
|
||||||
id == other.id &&
|
id == other.id &&
|
||||||
result == other.result &&
|
result == other.result &&
|
||||||
|
@ -39,10 +41,12 @@ struct tweet {
|
||||||
retweet_count == other.retweet_count &&
|
retweet_count == other.retweet_count &&
|
||||||
favorite_count == other.favorite_count;
|
favorite_count == other.favorite_count;
|
||||||
}
|
}
|
||||||
simdjson_really_inline bool operator!=(const tweet &other) const { return !(*this == other); }
|
template<typename OtherStringType>
|
||||||
|
simdjson_really_inline bool operator!=(const tweet<OtherStringType> &other) const { return !(*this == other); }
|
||||||
};
|
};
|
||||||
|
|
||||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet &t) {
|
template<typename StringType>
|
||||||
|
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet<StringType> &t) {
|
||||||
o << "created_at: " << t.created_at << std::endl;
|
o << "created_at: " << t.created_at << std::endl;
|
||||||
o << "id: " << t.id << std::endl;
|
o << "id: " << t.id << std::endl;
|
||||||
o << "result: " << t.result << std::endl;
|
o << "result: " << t.result << std::endl;
|
||||||
|
|
|
@ -3,11 +3,13 @@
|
||||||
|
|
||||||
namespace partial_tweets {
|
namespace partial_tweets {
|
||||||
|
|
||||||
|
template<typename StringType=std::string_view>
|
||||||
struct twitter_user {
|
struct twitter_user {
|
||||||
uint64_t id{};
|
uint64_t id{};
|
||||||
std::string_view screen_name{};
|
StringType screen_name{};
|
||||||
|
|
||||||
bool operator==(const twitter_user &other) const {
|
template<typename OtherStringType>
|
||||||
|
bool operator==(const twitter_user<OtherStringType> &other) const {
|
||||||
return id == other.id &&
|
return id == other.id &&
|
||||||
screen_name == other.screen_name;
|
screen_name == other.screen_name;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace partial_tweets {
|
namespace partial_tweets {
|
||||||
|
|
||||||
struct yyjson_base {
|
struct yyjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
|
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
|
||||||
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||||
if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }
|
if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }
|
||||||
|
@ -24,13 +26,13 @@ struct yyjson_base {
|
||||||
if (type != YYJSON_TYPE_NUM && type != YYJSON_TYPE_NULL ) { throw "field is not uint64 or null!"; }
|
if (type != YYJSON_TYPE_NUM && type != YYJSON_TYPE_NULL ) { throw "field is not uint64 or null!"; }
|
||||||
return yyjson_get_uint(val);
|
return yyjson_get_uint(val);
|
||||||
}
|
}
|
||||||
simdjson_really_inline partial_tweets::twitter_user get_user(yyjson_val *obj, std::string_view key) {
|
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(yyjson_val *obj, std::string_view key) {
|
||||||
auto user = yyjson_obj_getn(obj, key.data(), key.length());
|
auto user = yyjson_obj_getn(obj, key.data(), key.length());
|
||||||
if (!yyjson_is_obj(user)) { throw "missing twitter user field!"; }
|
if (!yyjson_is_obj(user)) { throw "missing twitter user field!"; }
|
||||||
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
|
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(yyjson_doc *doc, std::vector<tweet> &result) {
|
bool run(yyjson_doc *doc, std::vector<tweet<std::string_view>> &result) {
|
||||||
if (!doc) { return false; }
|
if (!doc) { return false; }
|
||||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
if (!yyjson_is_obj(root)) { return false; }
|
if (!yyjson_is_obj(root)) { return false; }
|
||||||
|
@ -43,7 +45,7 @@ struct yyjson_base {
|
||||||
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
|
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
|
||||||
if (!yyjson_is_obj(tweet)) { return false; }
|
if (!yyjson_is_obj(tweet)) { return false; }
|
||||||
// TODO these can't actually handle errors
|
// TODO these can't actually handle errors
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
get_string_view(tweet, "created_at"),
|
get_string_view(tweet, "created_at"),
|
||||||
get_uint64 (tweet, "id"),
|
get_uint64 (tweet, "id"),
|
||||||
get_string_view(tweet, "text"),
|
get_string_view(tweet, "text"),
|
||||||
|
@ -59,14 +61,14 @@ struct yyjson_base {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct yyjson : yyjson_base {
|
struct yyjson : yyjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), result);
|
return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
BENCHMARK_TEMPLATE(partial_tweets, yyjson)->UseManualTime();
|
BENCHMARK_TEMPLATE(partial_tweets, yyjson)->UseManualTime();
|
||||||
|
|
||||||
struct yyjson_insitu : yyjson_base {
|
struct yyjson_insitu : yyjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), result);
|
return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue