Merge pull request #1372 from simdjson/jkeiser/ondemand-sajson
Add sajson and nlohmann_json benchmarks
This commit is contained in:
commit
3849cc400e
|
@ -34,16 +34,25 @@ if (TARGET competition-all)
|
|||
target_compile_definitions(allparsingcompetition PRIVATE ALLPARSER)
|
||||
endif()
|
||||
|
||||
if (TARGET benchmark::benchmark)
|
||||
if(TARGET benchmark::benchmark)
|
||||
link_libraries(benchmark::benchmark)
|
||||
add_executable(bench_parse_call bench_parse_call.cpp)
|
||||
add_executable(bench_dom_api bench_dom_api.cpp)
|
||||
if (SIMDJSON_EXCEPTIONS)
|
||||
if(SIMDJSON_EXCEPTIONS)
|
||||
add_executable(bench_ondemand bench_ondemand.cpp)
|
||||
if (TARGET yyjson)
|
||||
target_link_libraries(bench_ondemand PRIVATE yyjson rapidjson)
|
||||
endif (TARGET yyjson)
|
||||
endif (SIMDJSON_EXCEPTIONS)
|
||||
if(TARGET yyjson)
|
||||
target_link_libraries(bench_ondemand PRIVATE yyjson)
|
||||
endif()
|
||||
if(TARGET rapidjson)
|
||||
target_link_libraries(bench_ondemand PRIVATE rapidjson)
|
||||
endif()
|
||||
if(TARGET sajson)
|
||||
target_link_libraries(bench_ondemand PRIVATE sajson)
|
||||
endif()
|
||||
if(TARGET nlohmann_json)
|
||||
target_link_libraries(bench_ondemand PRIVATE nlohmann_json)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(checkperf.cmake)
|
||||
|
|
|
@ -13,6 +13,14 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
|||
#include "rapidjson/writer.h"
|
||||
#endif
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
#include "sajson.h"
|
||||
#endif
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||
#include <nlohmann/json.hpp>
|
||||
#endif
|
||||
|
||||
// This has to be last, for reasons I don't yet understand
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
|
@ -21,27 +29,37 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
|||
#include "partial_tweets/simdjson_dom.h"
|
||||
#include "partial_tweets/simdjson_ondemand.h"
|
||||
#include "partial_tweets/yyjson.h"
|
||||
#include "partial_tweets/sajson.h"
|
||||
#include "partial_tweets/rapidjson.h"
|
||||
#include "partial_tweets/nlohmann_json.h"
|
||||
|
||||
#include "large_random/simdjson_dom.h"
|
||||
#include "large_random/simdjson_ondemand.h"
|
||||
#include "large_random/simdjson_ondemand_unordered.h"
|
||||
#include "large_random/yyjson.h"
|
||||
#include "large_random/sajson.h"
|
||||
#include "large_random/rapidjson.h"
|
||||
#include "large_random/nlohmann_json.h"
|
||||
|
||||
#include "kostya/simdjson_dom.h"
|
||||
#include "kostya/simdjson_ondemand.h"
|
||||
#include "kostya/yyjson.h"
|
||||
#include "kostya/sajson.h"
|
||||
#include "kostya/rapidjson.h"
|
||||
#include "kostya/nlohmann_json.h"
|
||||
|
||||
#include "distinct_user_id/simdjson_dom.h"
|
||||
#include "distinct_user_id/simdjson_ondemand.h"
|
||||
#include "distinct_user_id/yyjson.h"
|
||||
#include "distinct_user_id/sajson.h"
|
||||
#include "distinct_user_id/rapidjson.h"
|
||||
#include "distinct_user_id/nlohmann_json.h"
|
||||
|
||||
#include "find_tweet/simdjson_dom.h"
|
||||
#include "find_tweet/simdjson_ondemand.h"
|
||||
#include "find_tweet/yyjson.h"
|
||||
#include "find_tweet/sajson.h"
|
||||
#include "find_tweet/rapidjson.h"
|
||||
#include "find_tweet/nlohmann_json.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
|
|
@ -6,16 +6,18 @@
|
|||
|
||||
namespace distinct_user_id {
|
||||
|
||||
using namespace json_benchmark;
|
||||
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::file_runner<I> {
|
||||
struct runner : public file_runner<I> {
|
||||
std::vector<uint64_t> result{};
|
||||
|
||||
bool setup(benchmark::State &state) {
|
||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||
return this->load_json(state, TWITTER_JSON);
|
||||
}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
|
||||
if (!file_runner<I>::before_run(state)) { return false; }
|
||||
result.clear();
|
||||
return true;
|
||||
}
|
||||
|
@ -25,7 +27,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
|||
}
|
||||
|
||||
bool after_run(benchmark::State &state) {
|
||||
if (!json_benchmark::file_runner<I>::after_run(state)) { return false; }
|
||||
if (!file_runner<I>::after_run(state)) { return false; }
|
||||
std::sort(result.begin(), result.end());
|
||||
auto last = std::unique(result.begin(), result.end());
|
||||
result.erase(last, result.end());
|
||||
|
@ -34,7 +36,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
|||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, result, reference.result);
|
||||
return diff_results(state, result, reference.result, diff_flags::NONE);
|
||||
}
|
||||
|
||||
size_t items_per_iteration() {
|
||||
|
@ -45,7 +47,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
|||
struct simdjson_dom;
|
||||
|
||||
template<typename I> simdjson_really_inline static void distinct_user_id(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
struct nlohmann_json {
|
||||
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||
for (auto tweet : root["statuses"]) {
|
||||
result.push_back(tweet["user"]["id"]);
|
||||
if (tweet.contains("retweeted_status")) {
|
||||
result.push_back(tweet["retweeted_status"]["user"]["id"]);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, nlohmann_json)->UseManualTime();
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
struct sajson {
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||
return { val.as_cstring(), val.get_string_length() };
|
||||
}
|
||||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||
using namespace sajson;
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = parse(
|
||||
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != TYPE_OBJECT) { return false; }
|
||||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||
if (statuses.get_type() != TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||
auto tweet = statuses.get_array_element(i);
|
||||
|
||||
// get tweet.user.id
|
||||
if (tweet.get_type() != TYPE_OBJECT) { return false; }
|
||||
auto user = tweet.get_value_of_key({"user", strlen("user")});
|
||||
if (user.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.push_back(get_str_uint64(user, "id_str"));
|
||||
|
||||
// get tweet.retweeted_status.user.id
|
||||
auto retweet = tweet.get_value_of_key({"retweeted_status", strlen("retweeted_status")});
|
||||
switch (retweet.get_type()) {
|
||||
case TYPE_OBJECT: {
|
||||
auto retweet_user = retweet.get_value_of_key({"user", strlen("user")});
|
||||
if (retweet_user.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.push_back(get_str_uint64(retweet_user, "id_str"));
|
||||
break;
|
||||
}
|
||||
// TODO distinguish null and missing. null is bad. missing is fine.
|
||||
case TYPE_NULL:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, sajson)->UseManualTime();
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -5,34 +5,36 @@
|
|||
|
||||
namespace find_tweet {
|
||||
|
||||
using namespace json_benchmark;
|
||||
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::file_runner<I> {
|
||||
std::string_view result;
|
||||
struct runner : public file_runner<I> {
|
||||
typename I::StringType result;
|
||||
|
||||
bool setup(benchmark::State &state) {
|
||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||
return this->load_json(state, TWITTER_JSON);
|
||||
}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
|
||||
if (!file_runner<I>::before_run(state)) { return false; }
|
||||
result = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run(benchmark::State &) {
|
||||
return this->implementation.run(this->json, 505874901689851900ULL, result);
|
||||
return this->implementation.run(this->json, 505874901689851904ULL, result);
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, result, reference.result);
|
||||
return diff_results(state, result, reference.result, diff_flags::NONE);
|
||||
}
|
||||
};
|
||||
|
||||
struct simdjson_dom;
|
||||
|
||||
template<typename I> simdjson_really_inline static void find_tweet(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace find_tweet
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
struct nlohmann_json {
|
||||
using StringType=std::string;
|
||||
|
||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string &result) {
|
||||
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||
for (auto tweet : root["statuses"]) {
|
||||
if (tweet["id"] == find_id) {
|
||||
result = tweet["text"];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(find_tweet, nlohmann_json)->UseManualTime();
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,6 +9,8 @@ namespace find_tweet {
|
|||
using namespace rapidjson;
|
||||
|
||||
struct rapidjson_base {
|
||||
using StringType=std::string_view;
|
||||
|
||||
Document doc{};
|
||||
|
||||
bool run(Document &root, uint64_t find_id, std::string_view &result) {
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
struct sajson {
|
||||
using StringType=std::string_view;
|
||||
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||
return { val.as_cstring(), val.get_string_length() };
|
||||
}
|
||||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = ::sajson::parse(
|
||||
::sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
::sajson::mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != ::sajson::TYPE_OBJECT) { printf("a\n"); return false; }
|
||||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||
auto tweet = statuses.get_array_element(i);
|
||||
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { printf("b\n"); return false; }
|
||||
// TODO if there is a way to get the raw string, it might be faster to iota find_id and then
|
||||
// compare it to each id_str, instead of parsing each int and comparing to find_id.
|
||||
if (get_str_uint64(tweet, "id_str") == find_id) {
|
||||
result = get_string_view(tweet, "text");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(find_tweet, sajson)->UseManualTime();
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -9,6 +9,8 @@ namespace find_tweet {
|
|||
using namespace simdjson;
|
||||
|
||||
struct simdjson_dom {
|
||||
using StringType=std::string_view;
|
||||
|
||||
dom::parser parser{};
|
||||
|
||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||
|
|
|
@ -10,6 +10,8 @@ using namespace simdjson;
|
|||
using namespace simdjson::builtin;
|
||||
|
||||
struct simdjson_ondemand {
|
||||
using StringType=std::string_view;
|
||||
|
||||
ondemand::parser parser{};
|
||||
|
||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
namespace find_tweet {
|
||||
|
||||
struct yyjson_base {
|
||||
using StringType=std::string_view;
|
||||
|
||||
bool run(yyjson_doc *doc, uint64_t find_id, std::string_view &result) {
|
||||
if (!doc) { return false; }
|
||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||
|
|
|
@ -2,13 +2,21 @@
|
|||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <limits>
|
||||
|
||||
template<typename T>
|
||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference);
|
||||
namespace json_benchmark {
|
||||
|
||||
template<typename T>
|
||||
enum class diff_flags {
|
||||
NONE = 0,
|
||||
IMPRECISE_FLOATS = 1
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags);
|
||||
|
||||
template<typename T, typename U>
|
||||
struct result_differ {
|
||||
static bool diff(benchmark::State &state, const T &result, const T &reference) {
|
||||
static bool diff(benchmark::State &state, const T &result, const U &reference, diff_flags flags) {
|
||||
if (result != reference) {
|
||||
std::stringstream str;
|
||||
str << "result incorrect: " << result << " ... reference: " << reference;
|
||||
|
@ -19,13 +27,13 @@ struct result_differ {
|
|||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct result_differ<std::vector<T>> {
|
||||
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
|
||||
template<typename T, typename U>
|
||||
struct result_differ<std::vector<T>, std::vector<U>> {
|
||||
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<U> &reference, diff_flags flags) {
|
||||
auto result_iter = result.begin();
|
||||
auto reference_iter = reference.begin();
|
||||
while (result_iter != result.end() && reference_iter != reference.end()) {
|
||||
if (!diff_results(state, *result_iter, *reference_iter)) { return false; }
|
||||
if (!diff_results(state, *result_iter, *reference_iter, flags)) { return false; }
|
||||
result_iter++;
|
||||
reference_iter++;
|
||||
}
|
||||
|
@ -44,8 +52,41 @@ struct result_differ<std::vector<T>> {
|
|||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference) {
|
||||
return result_differ<T>::diff(state, result, reference);
|
||||
template<>
|
||||
struct result_differ<double, double> {
|
||||
static bool diff(benchmark::State &state, const double &result, const double &reference, diff_flags flags) {
|
||||
bool different;
|
||||
if (int(flags) & int(diff_flags::IMPRECISE_FLOATS)) {
|
||||
different = f64_ulp_dist(result, reference) > 1;
|
||||
} else {
|
||||
different = result != reference;
|
||||
}
|
||||
if (different) {
|
||||
std::stringstream str;
|
||||
// We print it out using full precision.
|
||||
constexpr auto precision = std::numeric_limits<double>::max_digits10;
|
||||
str << std::setprecision(precision);
|
||||
str << "incorrect double result: " << std::endl;
|
||||
str << " result: " << std::left << std::setw(precision+2) << result << " (hexfloat " << std::hexfloat << result << ")" << std::defaultfloat << std::endl;
|
||||
str << "reference: " << std::left << std::setw(precision+2) << reference << " (hexfloat " << std::hexfloat << reference << ")" << std::defaultfloat << std::endl;
|
||||
state.SkipWithError(str.str().data());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint64_t f64_ulp_dist(double a, double b) {
|
||||
uint64_t ua, ub;
|
||||
std::memcpy(&ua, &a, sizeof(ua));
|
||||
std::memcpy(&ub, &b, sizeof(ub));
|
||||
if ((int64_t)(ub ^ ua) >= 0)
|
||||
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
|
||||
return ua + ub + 0x80000000;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags) {
|
||||
return result_differ<T, U>::diff(state, result, reference, flags);
|
||||
}
|
||||
|
||||
} // namespace json_benchmark
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include "diff_results.h"
|
||||
|
||||
namespace json_benchmark {
|
||||
|
||||
struct point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct result_differ<point, point> {
|
||||
static bool diff(benchmark::State &state, const point &result, const point &reference, diff_flags flags) {
|
||||
return diff_results(state, result.x, reference.x, flags)
|
||||
&& diff_results(state, result.y, reference.y, flags)
|
||||
&& diff_results(state, result.z, reference.z, flags);
|
||||
}
|
||||
};
|
||||
|
||||
static simdjson_unused std::ostream &operator<<(std::ostream &o, const point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
} // namespace json_benchmark
|
|
@ -1,39 +1,24 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "json_benchmark/string_runner.h"
|
||||
#include "json_benchmark/point.h"
|
||||
#include <vector>
|
||||
#include <random>
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace json_benchmark;
|
||||
|
||||
static const simdjson::padded_string &get_built_json_array();
|
||||
|
||||
struct point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const point &other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::string_runner<I> {
|
||||
struct runner : public string_runner<I> {
|
||||
std::vector<point> result;
|
||||
|
||||
runner() : json_benchmark::string_runner<I>(get_built_json_array()) {}
|
||||
runner() : string_runner<I>(get_built_json_array()) {}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
if (!json_benchmark::string_runner<I>::before_run(state)) { return false; }
|
||||
if (!string_runner<I>::before_run(state)) { return false; }
|
||||
result.clear();
|
||||
return true;
|
||||
}
|
||||
|
@ -44,7 +29,7 @@ struct runner : public json_benchmark::string_runner<I> {
|
|||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, result, reference.result);
|
||||
return diff_results(state, result, reference.result, I::DiffFlags);
|
||||
}
|
||||
|
||||
size_t items_per_iteration() {
|
||||
|
@ -95,9 +80,7 @@ static const simdjson::padded_string &get_built_json_array() {
|
|||
struct simdjson_dom;
|
||||
|
||||
template<typename I> simdjson_really_inline static void kostya(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
struct nlohmann_json {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||
for (auto point : root["coordinates"]) {
|
||||
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(kostya, nlohmann_json)->UseManualTime();
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,6 +9,8 @@ namespace kostya {
|
|||
using namespace rapidjson;
|
||||
|
||||
struct rapidjson_base {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
Document doc;
|
||||
|
||||
simdjson_really_inline double get_double(Value &object, std::string_view key) {
|
||||
|
@ -26,7 +28,7 @@ struct rapidjson_base {
|
|||
if (!coords->value.IsArray()) { return false; }
|
||||
for (auto &coord : coords->value.GetArray()) {
|
||||
if (!coord.IsObject()) { return false; }
|
||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
struct sajson {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS;
|
||||
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
|
||||
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) {
|
||||
using namespace sajson;
|
||||
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
switch (val.get_type()) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_DOUBLE:
|
||||
return val.get_number_value();
|
||||
default:
|
||||
throw "field not double";
|
||||
}
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
using namespace sajson;
|
||||
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = parse(
|
||||
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != TYPE_OBJECT) { return false; }
|
||||
auto points = root.get_value_of_key({"coordinates", strlen("coordinates")});
|
||||
if (points.get_type() != TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<points.get_length(); i++) {
|
||||
auto point = points.get_array_element(i);
|
||||
if (point.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.emplace_back(json_benchmark::point{
|
||||
get_double(point, "x"),
|
||||
get_double(point, "y"),
|
||||
get_double(point, "z")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(kostya, sajson)->UseManualTime();
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -9,11 +9,13 @@ namespace kostya {
|
|||
using namespace simdjson;
|
||||
|
||||
struct simdjson_dom {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
dom::parser parser{};
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
for (auto point : parser.parse(json)["coordinates"]) {
|
||||
result.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
|
||||
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -10,12 +10,14 @@ using namespace simdjson;
|
|||
using namespace simdjson::builtin;
|
||||
|
||||
struct simdjson_ondemand {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
ondemand::parser parser{};
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object point : doc.find_field("coordinates")) {
|
||||
result.emplace_back(kostya::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
|
||||
result.emplace_back(json_benchmark::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
namespace kostya {
|
||||
|
||||
struct yyjson_base {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
||||
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
if (!val) { throw "missing point field!"; }
|
||||
|
@ -35,7 +37,7 @@ struct yyjson_base {
|
|||
yyjson_val *coord;
|
||||
yyjson_arr_foreach(coords, idx, max, coord) {
|
||||
if (!yyjson_is_obj(coord)) { return false; }
|
||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -1,36 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
#include "json_benchmark/string_runner.h"
|
||||
#include "json_benchmark/point.h"
|
||||
#include <random>
|
||||
|
||||
namespace large_random {
|
||||
|
||||
static const simdjson::padded_string &get_built_json_array();
|
||||
|
||||
struct point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const point &other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
using namespace json_benchmark;
|
||||
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::string_runner<I> {
|
||||
struct runner : public string_runner<I> {
|
||||
std::vector<point> result;
|
||||
|
||||
runner() : json_benchmark::string_runner<I>(get_built_json_array()) {}
|
||||
runner() : string_runner<I>(get_built_json_array()) {}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
if (!json_benchmark::string_runner<I>::before_run(state)) { return false; }
|
||||
if (!string_runner<I>::before_run(state)) { return false; }
|
||||
result.clear();
|
||||
return true;
|
||||
}
|
||||
|
@ -41,7 +32,7 @@ struct runner : public json_benchmark::string_runner<I> {
|
|||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, result, reference.result);
|
||||
return diff_results(state, result, reference.result, I::DiffFlags);
|
||||
}
|
||||
|
||||
size_t items_per_iteration() {
|
||||
|
@ -76,7 +67,7 @@ static const simdjson::padded_string &get_built_json_array() {
|
|||
struct simdjson_dom;
|
||||
|
||||
template<typename T> static void large_random(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
|
||||
run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace large_random
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||
|
||||
#include "large_random.h"
|
||||
|
||||
namespace large_random {
|
||||
|
||||
struct nlohmann_json {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
for (auto point : nlohmann::json::parse(json.data(), json.data() + json.size())) {
|
||||
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(large_random, nlohmann_json)->UseManualTime();
|
||||
|
||||
} // namespace large_random
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,6 +9,8 @@ namespace large_random {
|
|||
using namespace rapidjson;
|
||||
|
||||
struct rapidjson_base {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
Document doc;
|
||||
|
||||
simdjson_really_inline double get_double(Value &object, std::string_view key) {
|
||||
|
@ -23,7 +25,7 @@ struct rapidjson_base {
|
|||
if (!coords.IsArray()) { return false; }
|
||||
for (auto &coord : coords.GetArray()) {
|
||||
if (!coord.IsObject()) { return false; }
|
||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "large_random.h"
|
||||
|
||||
namespace large_random {
|
||||
|
||||
struct sajson {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS;
|
||||
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
|
||||
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) {
|
||||
using namespace sajson;
|
||||
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
switch (val.get_type()) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_DOUBLE:
|
||||
return val.get_number_value();
|
||||
default:
|
||||
throw "field not double";
|
||||
}
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
using namespace sajson;
|
||||
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = parse(
|
||||
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto points = doc.get_root();
|
||||
if (points.get_type() != TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<points.get_length(); i++) {
|
||||
auto point = points.get_array_element(i);
|
||||
if (point.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.emplace_back(json_benchmark::point{
|
||||
get_double(point, "x"),
|
||||
get_double(point, "y"),
|
||||
get_double(point, "z")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(large_random, sajson)->UseManualTime();
|
||||
|
||||
} // namespace large_random
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -9,11 +9,13 @@ namespace large_random {
|
|||
using namespace simdjson;
|
||||
|
||||
struct simdjson_dom {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
dom::parser parser{};
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
for (auto point : parser.parse(json)) {
|
||||
result.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
|
||||
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -10,12 +10,14 @@ using namespace simdjson;
|
|||
using namespace simdjson::builtin;
|
||||
|
||||
struct simdjson_ondemand {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
ondemand::parser parser{};
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc) {
|
||||
result.emplace_back(point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
|
||||
result.emplace_back(json_benchmark::point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -10,12 +10,14 @@ using namespace simdjson;
|
|||
using namespace simdjson::builtin;
|
||||
|
||||
struct simdjson_ondemand_unordered {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
ondemand::parser parser{};
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc) {
|
||||
result.emplace_back(large_random::point{coord["x"], coord["y"], coord["z"]});
|
||||
result.emplace_back(json_benchmark::point{coord["x"], coord["y"], coord["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
namespace large_random {
|
||||
|
||||
struct yyjson_base {
|
||||
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||
|
||||
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
||||
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
if (!val) { throw "missing point field!"; }
|
||||
|
@ -34,7 +36,7 @@ struct yyjson_base {
|
|||
yyjson_val *coord;
|
||||
yyjson_arr_foreach(coords, idx, max, coord) {
|
||||
if (!yyjson_is_obj(coord)) { return false; }
|
||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
struct nlohmann_json {
|
||||
using StringType=std::string;
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(nlohmann::json value) {
|
||||
if (value.is_null()) { return 0; }
|
||||
return value;
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string>> &result) {
|
||||
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||
for (auto tweet : root["statuses"]) {
|
||||
auto user = tweet["user"];
|
||||
result.emplace_back(partial_tweets::tweet<std::string>{
|
||||
tweet["created_at"],
|
||||
tweet["id"],
|
||||
tweet["text"],
|
||||
nullable_int(tweet["in_reply_to_status_id"]),
|
||||
{ user["id"], user["screen_name"] },
|
||||
tweet["retweet_count"],
|
||||
tweet["favorite_count"]
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(partial_tweets, nlohmann_json)->UseManualTime();
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -7,16 +7,18 @@
|
|||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace json_benchmark;
|
||||
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::file_runner<I> {
|
||||
std::vector<tweet> result{};
|
||||
struct runner : public file_runner<I> {
|
||||
std::vector<tweet<typename I::StringType>> result{};
|
||||
|
||||
bool setup(benchmark::State &state) {
|
||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||
return this->load_json(state, TWITTER_JSON);
|
||||
}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
|
||||
if (!file_runner<I>::before_run(state)) { return false; }
|
||||
result.clear();
|
||||
return true;
|
||||
}
|
||||
|
@ -27,7 +29,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
|||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, result, reference.result);
|
||||
return diff_results(state, result, reference.result, diff_flags::NONE);
|
||||
}
|
||||
|
||||
size_t items_per_iteration() {
|
||||
|
@ -38,7 +40,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
|||
struct simdjson_dom;
|
||||
|
||||
template<typename I> simdjson_really_inline static void partial_tweets(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
|
|
@ -9,6 +9,8 @@ namespace partial_tweets {
|
|||
using namespace rapidjson;
|
||||
|
||||
struct rapidjson_base {
|
||||
using StringType=std::string_view;
|
||||
|
||||
Document doc{};
|
||||
|
||||
simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) {
|
||||
|
@ -31,20 +33,20 @@ struct rapidjson_base {
|
|||
if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; }
|
||||
return field->value.GetUint64();
|
||||
}
|
||||
simdjson_really_inline partial_tweets::twitter_user get_user(Value &object, std::string_view key) {
|
||||
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(Value &object, std::string_view key) {
|
||||
auto field = object.FindMember(key.data());
|
||||
if (field == object.MemberEnd()) { throw "Missing user field"; }
|
||||
if (!field->value.IsObject()) { throw "User field is not an object"; }
|
||||
return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") };
|
||||
}
|
||||
|
||||
bool run(Document &root, std::vector<tweet> &result) {
|
||||
bool run(Document &root, std::vector<tweet<std::string_view>> &result) {
|
||||
if (root.HasParseError() || !root.IsObject()) { return false; }
|
||||
auto statuses = root.FindMember("statuses");
|
||||
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
|
||||
for (auto &tweet : statuses->value.GetArray()) {
|
||||
if (!tweet.IsObject()) { return false; }
|
||||
result.emplace_back(partial_tweets::tweet{
|
||||
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||
get_string_view(tweet, "created_at"),
|
||||
get_uint64 (tweet, "id"),
|
||||
get_string_view(tweet, "text"),
|
||||
|
@ -60,14 +62,14 @@ struct rapidjson_base {
|
|||
};
|
||||
|
||||
struct rapidjson : rapidjson_base {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||
return rapidjson_base::run(doc.Parse<kParseValidateEncodingFlag>(json.data()), result);
|
||||
}
|
||||
};
|
||||
BENCHMARK_TEMPLATE(partial_tweets, rapidjson)->UseManualTime();
|
||||
|
||||
struct rapidjson_insitu : rapidjson_base {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||
return rapidjson_base::run(doc.ParseInsitu<kParseValidateEncodingFlag>(json.data()), result);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
struct sajson {
|
||||
using StringType=std::string_view;
|
||||
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||
return { val.as_cstring(), val.get_string_length() };
|
||||
}
|
||||
simdjson_really_inline uint64_t get_uint52(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
switch (val.get_type()) {
|
||||
case ::sajson::TYPE_INTEGER: {
|
||||
int64_t result;
|
||||
if (!val.get_int53_value(&result) || result < 0) { throw "field is not uint52"; }
|
||||
return uint64_t(result);
|
||||
}
|
||||
default:
|
||||
throw "field not integer";
|
||||
}
|
||||
}
|
||||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
simdjson_really_inline uint64_t get_nullable_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() == ::sajson::TYPE_NULL) { return 0; }
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(const ::sajson::value &obj, std::string_view key) {
|
||||
auto user = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (user.get_type() != ::sajson::TYPE_OBJECT) { throw "user is not an object"; }
|
||||
return { get_str_uint64(user, "id_str"), get_string_view(user, "screen_name") };
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = ::sajson::parse(
|
||||
::sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
::sajson::mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||
auto tweet = statuses.get_array_element(i);
|
||||
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||
get_string_view(tweet, "created_at"),
|
||||
get_str_uint64 (tweet, "id_str"),
|
||||
get_string_view(tweet, "text"),
|
||||
get_nullable_str_uint64(tweet, "in_reply_to_status_id_str"),
|
||||
get_user (tweet, "user"),
|
||||
get_uint52 (tweet, "retweet_count"),
|
||||
get_uint52 (tweet, "favorite_count")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(partial_tweets, sajson)->UseManualTime();
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -9,6 +9,8 @@ namespace partial_tweets {
|
|||
using namespace simdjson;
|
||||
|
||||
struct simdjson_dom {
|
||||
using StringType=std::string_view;
|
||||
|
||||
dom::parser parser{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
||||
|
@ -16,10 +18,10 @@ struct simdjson_dom {
|
|||
return element;
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
||||
auto user = tweet["user"];
|
||||
result.emplace_back(partial_tweets::tweet{
|
||||
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||
tweet["created_at"],
|
||||
tweet["id"],
|
||||
tweet["text"],
|
||||
|
|
|
@ -10,6 +10,8 @@ using namespace simdjson;
|
|||
using namespace simdjson::builtin;
|
||||
|
||||
struct simdjson_ondemand {
|
||||
using StringType=std::string_view;
|
||||
|
||||
ondemand::parser parser{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
|
||||
|
@ -17,15 +19,15 @@ struct simdjson_ondemand {
|
|||
return value;
|
||||
}
|
||||
|
||||
simdjson_really_inline twitter_user read_user(ondemand::object user) {
|
||||
simdjson_really_inline twitter_user<std::string_view> read_user(ondemand::object user) {
|
||||
return { user.find_field("id"), user.find_field("screen_name") };
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||
// Walk the document, parsing the tweets as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||
result.emplace_back(partial_tweets::tweet{
|
||||
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||
tweet.find_field("created_at"),
|
||||
tweet.find_field("id"),
|
||||
tweet.find_field("text"),
|
||||
|
|
|
@ -22,15 +22,17 @@ namespace partial_tweets {
|
|||
// ]
|
||||
// }
|
||||
|
||||
template<typename StringType=std::string_view>
|
||||
struct tweet {
|
||||
std::string_view created_at{};
|
||||
StringType created_at{};
|
||||
uint64_t id{};
|
||||
std::string_view result{};
|
||||
StringType result{};
|
||||
uint64_t in_reply_to_status_id{};
|
||||
twitter_user user{};
|
||||
twitter_user<StringType> user{};
|
||||
uint64_t retweet_count{};
|
||||
uint64_t favorite_count{};
|
||||
simdjson_really_inline bool operator==(const tweet &other) const {
|
||||
template<typename OtherStringType>
|
||||
simdjson_really_inline bool operator==(const tweet<OtherStringType> &other) const {
|
||||
return created_at == other.created_at &&
|
||||
id == other.id &&
|
||||
result == other.result &&
|
||||
|
@ -39,10 +41,12 @@ struct tweet {
|
|||
retweet_count == other.retweet_count &&
|
||||
favorite_count == other.favorite_count;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const tweet &other) const { return !(*this == other); }
|
||||
template<typename OtherStringType>
|
||||
simdjson_really_inline bool operator!=(const tweet<OtherStringType> &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet &t) {
|
||||
template<typename StringType>
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet<StringType> &t) {
|
||||
o << "created_at: " << t.created_at << std::endl;
|
||||
o << "id: " << t.id << std::endl;
|
||||
o << "result: " << t.result << std::endl;
|
||||
|
|
|
@ -3,11 +3,13 @@
|
|||
|
||||
namespace partial_tweets {
|
||||
|
||||
template<typename StringType=std::string_view>
|
||||
struct twitter_user {
|
||||
uint64_t id{};
|
||||
std::string_view screen_name{};
|
||||
StringType screen_name{};
|
||||
|
||||
bool operator==(const twitter_user &other) const {
|
||||
template<typename OtherStringType>
|
||||
bool operator==(const twitter_user<OtherStringType> &other) const {
|
||||
return id == other.id &&
|
||||
screen_name == other.screen_name;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
namespace partial_tweets {
|
||||
|
||||
struct yyjson_base {
|
||||
using StringType=std::string_view;
|
||||
|
||||
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
|
||||
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }
|
||||
|
@ -24,13 +26,13 @@ struct yyjson_base {
|
|||
if (type != YYJSON_TYPE_NUM && type != YYJSON_TYPE_NULL ) { throw "field is not uint64 or null!"; }
|
||||
return yyjson_get_uint(val);
|
||||
}
|
||||
simdjson_really_inline partial_tweets::twitter_user get_user(yyjson_val *obj, std::string_view key) {
|
||||
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(yyjson_val *obj, std::string_view key) {
|
||||
auto user = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
if (!yyjson_is_obj(user)) { throw "missing twitter user field!"; }
|
||||
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
|
||||
}
|
||||
|
||||
bool run(yyjson_doc *doc, std::vector<tweet> &result) {
|
||||
bool run(yyjson_doc *doc, std::vector<tweet<std::string_view>> &result) {
|
||||
if (!doc) { return false; }
|
||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||
if (!yyjson_is_obj(root)) { return false; }
|
||||
|
@ -43,7 +45,7 @@ struct yyjson_base {
|
|||
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
|
||||
if (!yyjson_is_obj(tweet)) { return false; }
|
||||
// TODO these can't actually handle errors
|
||||
result.emplace_back(partial_tweets::tweet{
|
||||
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||
get_string_view(tweet, "created_at"),
|
||||
get_uint64 (tweet, "id"),
|
||||
get_string_view(tweet, "text"),
|
||||
|
@ -59,14 +61,14 @@ struct yyjson_base {
|
|||
};
|
||||
|
||||
struct yyjson : yyjson_base {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||
return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), result);
|
||||
}
|
||||
};
|
||||
BENCHMARK_TEMPLATE(partial_tweets, yyjson)->UseManualTime();
|
||||
|
||||
struct yyjson_insitu : yyjson_base {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||
return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), result);
|
||||
}
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue