Merge pull request #1372 from simdjson/jkeiser/ondemand-sajson
Add sajson and nlohmann_json benchmarks
This commit is contained in:
commit
3849cc400e
|
@ -34,16 +34,25 @@ if (TARGET competition-all)
|
||||||
target_compile_definitions(allparsingcompetition PRIVATE ALLPARSER)
|
target_compile_definitions(allparsingcompetition PRIVATE ALLPARSER)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (TARGET benchmark::benchmark)
|
if(TARGET benchmark::benchmark)
|
||||||
link_libraries(benchmark::benchmark)
|
link_libraries(benchmark::benchmark)
|
||||||
add_executable(bench_parse_call bench_parse_call.cpp)
|
add_executable(bench_parse_call bench_parse_call.cpp)
|
||||||
add_executable(bench_dom_api bench_dom_api.cpp)
|
add_executable(bench_dom_api bench_dom_api.cpp)
|
||||||
if (SIMDJSON_EXCEPTIONS)
|
if(SIMDJSON_EXCEPTIONS)
|
||||||
add_executable(bench_ondemand bench_ondemand.cpp)
|
add_executable(bench_ondemand bench_ondemand.cpp)
|
||||||
if (TARGET yyjson)
|
if(TARGET yyjson)
|
||||||
target_link_libraries(bench_ondemand PRIVATE yyjson rapidjson)
|
target_link_libraries(bench_ondemand PRIVATE yyjson)
|
||||||
endif (TARGET yyjson)
|
endif()
|
||||||
endif (SIMDJSON_EXCEPTIONS)
|
if(TARGET rapidjson)
|
||||||
|
target_link_libraries(bench_ondemand PRIVATE rapidjson)
|
||||||
|
endif()
|
||||||
|
if(TARGET sajson)
|
||||||
|
target_link_libraries(bench_ondemand PRIVATE sajson)
|
||||||
|
endif()
|
||||||
|
if(TARGET nlohmann_json)
|
||||||
|
target_link_libraries(bench_ondemand PRIVATE nlohmann_json)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include(checkperf.cmake)
|
include(checkperf.cmake)
|
||||||
|
|
|
@ -13,6 +13,14 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||||
#include "rapidjson/writer.h"
|
#include "rapidjson/writer.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||||
|
#include "sajson.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
// This has to be last, for reasons I don't yet understand
|
// This has to be last, for reasons I don't yet understand
|
||||||
#include <benchmark/benchmark.h>
|
#include <benchmark/benchmark.h>
|
||||||
|
|
||||||
|
@ -21,27 +29,37 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
||||||
#include "partial_tweets/simdjson_dom.h"
|
#include "partial_tweets/simdjson_dom.h"
|
||||||
#include "partial_tweets/simdjson_ondemand.h"
|
#include "partial_tweets/simdjson_ondemand.h"
|
||||||
#include "partial_tweets/yyjson.h"
|
#include "partial_tweets/yyjson.h"
|
||||||
|
#include "partial_tweets/sajson.h"
|
||||||
#include "partial_tweets/rapidjson.h"
|
#include "partial_tweets/rapidjson.h"
|
||||||
|
#include "partial_tweets/nlohmann_json.h"
|
||||||
|
|
||||||
#include "large_random/simdjson_dom.h"
|
#include "large_random/simdjson_dom.h"
|
||||||
#include "large_random/simdjson_ondemand.h"
|
#include "large_random/simdjson_ondemand.h"
|
||||||
#include "large_random/simdjson_ondemand_unordered.h"
|
#include "large_random/simdjson_ondemand_unordered.h"
|
||||||
#include "large_random/yyjson.h"
|
#include "large_random/yyjson.h"
|
||||||
|
#include "large_random/sajson.h"
|
||||||
#include "large_random/rapidjson.h"
|
#include "large_random/rapidjson.h"
|
||||||
|
#include "large_random/nlohmann_json.h"
|
||||||
|
|
||||||
#include "kostya/simdjson_dom.h"
|
#include "kostya/simdjson_dom.h"
|
||||||
#include "kostya/simdjson_ondemand.h"
|
#include "kostya/simdjson_ondemand.h"
|
||||||
#include "kostya/yyjson.h"
|
#include "kostya/yyjson.h"
|
||||||
|
#include "kostya/sajson.h"
|
||||||
#include "kostya/rapidjson.h"
|
#include "kostya/rapidjson.h"
|
||||||
|
#include "kostya/nlohmann_json.h"
|
||||||
|
|
||||||
#include "distinct_user_id/simdjson_dom.h"
|
#include "distinct_user_id/simdjson_dom.h"
|
||||||
#include "distinct_user_id/simdjson_ondemand.h"
|
#include "distinct_user_id/simdjson_ondemand.h"
|
||||||
#include "distinct_user_id/yyjson.h"
|
#include "distinct_user_id/yyjson.h"
|
||||||
|
#include "distinct_user_id/sajson.h"
|
||||||
#include "distinct_user_id/rapidjson.h"
|
#include "distinct_user_id/rapidjson.h"
|
||||||
|
#include "distinct_user_id/nlohmann_json.h"
|
||||||
|
|
||||||
#include "find_tweet/simdjson_dom.h"
|
#include "find_tweet/simdjson_dom.h"
|
||||||
#include "find_tweet/simdjson_ondemand.h"
|
#include "find_tweet/simdjson_ondemand.h"
|
||||||
#include "find_tweet/yyjson.h"
|
#include "find_tweet/yyjson.h"
|
||||||
|
#include "find_tweet/sajson.h"
|
||||||
#include "find_tweet/rapidjson.h"
|
#include "find_tweet/rapidjson.h"
|
||||||
|
#include "find_tweet/nlohmann_json.h"
|
||||||
|
|
||||||
BENCHMARK_MAIN();
|
BENCHMARK_MAIN();
|
||||||
|
|
|
@ -6,16 +6,18 @@
|
||||||
|
|
||||||
namespace distinct_user_id {
|
namespace distinct_user_id {
|
||||||
|
|
||||||
|
using namespace json_benchmark;
|
||||||
|
|
||||||
template<typename I>
|
template<typename I>
|
||||||
struct runner : public json_benchmark::file_runner<I> {
|
struct runner : public file_runner<I> {
|
||||||
std::vector<uint64_t> result{};
|
std::vector<uint64_t> result{};
|
||||||
|
|
||||||
bool setup(benchmark::State &state) {
|
bool setup(benchmark::State &state) {
|
||||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
return this->load_json(state, TWITTER_JSON);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool before_run(benchmark::State &state) {
|
bool before_run(benchmark::State &state) {
|
||||||
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
|
if (!file_runner<I>::before_run(state)) { return false; }
|
||||||
result.clear();
|
result.clear();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -25,7 +27,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool after_run(benchmark::State &state) {
|
bool after_run(benchmark::State &state) {
|
||||||
if (!json_benchmark::file_runner<I>::after_run(state)) { return false; }
|
if (!file_runner<I>::after_run(state)) { return false; }
|
||||||
std::sort(result.begin(), result.end());
|
std::sort(result.begin(), result.end());
|
||||||
auto last = std::unique(result.begin(), result.end());
|
auto last = std::unique(result.begin(), result.end());
|
||||||
result.erase(last, result.end());
|
result.erase(last, result.end());
|
||||||
|
@ -34,7 +36,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
||||||
|
|
||||||
template<typename R>
|
template<typename R>
|
||||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||||
return diff_results(state, result, reference.result);
|
return diff_results(state, result, reference.result, diff_flags::NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t items_per_iteration() {
|
size_t items_per_iteration() {
|
||||||
|
@ -45,7 +47,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
||||||
struct simdjson_dom;
|
struct simdjson_dom;
|
||||||
|
|
||||||
template<typename I> simdjson_really_inline static void distinct_user_id(benchmark::State &state) {
|
template<typename I> simdjson_really_inline static void distinct_user_id(benchmark::State &state) {
|
||||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace distinct_user_id
|
} // namespace distinct_user_id
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "distinct_user_id.h"
|
||||||
|
|
||||||
|
namespace distinct_user_id {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto tweet : root["statuses"]) {
|
||||||
|
result.push_back(tweet["user"]["id"]);
|
||||||
|
if (tweet.contains("retweeted_status")) {
|
||||||
|
result.push_back(tweet["retweeted_status"]["user"]["id"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(distinct_user_id, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace distinct_user_id
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -0,0 +1,80 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
||||||
|
#include "distinct_user_id.h"
|
||||||
|
|
||||||
|
namespace distinct_user_id {
|
||||||
|
|
||||||
|
struct sajson {
|
||||||
|
size_t ast_buffer_size{0};
|
||||||
|
size_t *ast_buffer{nullptr};
|
||||||
|
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||||
|
return { val.as_cstring(), val.get_string_length() };
|
||||||
|
}
|
||||||
|
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||||
|
auto str = val.as_cstring();
|
||||||
|
char *endptr;
|
||||||
|
uint64_t result = strtoull(str, &endptr, 10);
|
||||||
|
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||||
|
using namespace sajson;
|
||||||
|
if (!ast_buffer) {
|
||||||
|
ast_buffer_size = json.size();
|
||||||
|
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||||
|
}
|
||||||
|
auto doc = parse(
|
||||||
|
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||||
|
mutable_string_view(json.size(), json.data())
|
||||||
|
);
|
||||||
|
if (!doc.is_valid()) { return false; }
|
||||||
|
|
||||||
|
auto root = doc.get_root();
|
||||||
|
if (root.get_type() != TYPE_OBJECT) { return false; }
|
||||||
|
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||||
|
if (statuses.get_type() != TYPE_ARRAY) { return false; }
|
||||||
|
|
||||||
|
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||||
|
auto tweet = statuses.get_array_element(i);
|
||||||
|
|
||||||
|
// get tweet.user.id
|
||||||
|
if (tweet.get_type() != TYPE_OBJECT) { return false; }
|
||||||
|
auto user = tweet.get_value_of_key({"user", strlen("user")});
|
||||||
|
if (user.get_type() != TYPE_OBJECT) { return false; }
|
||||||
|
result.push_back(get_str_uint64(user, "id_str"));
|
||||||
|
|
||||||
|
// get tweet.retweeted_status.user.id
|
||||||
|
auto retweet = tweet.get_value_of_key({"retweeted_status", strlen("retweeted_status")});
|
||||||
|
switch (retweet.get_type()) {
|
||||||
|
case TYPE_OBJECT: {
|
||||||
|
auto retweet_user = retweet.get_value_of_key({"user", strlen("user")});
|
||||||
|
if (retweet_user.get_type() != TYPE_OBJECT) { return false; }
|
||||||
|
result.push_back(get_str_uint64(retweet_user, "id_str"));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// TODO distinguish null and missing. null is bad. missing is fine.
|
||||||
|
case TYPE_NULL:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(distinct_user_id, sajson)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace distinct_user_id
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
|
@ -5,34 +5,36 @@
|
||||||
|
|
||||||
namespace find_tweet {
|
namespace find_tweet {
|
||||||
|
|
||||||
|
using namespace json_benchmark;
|
||||||
|
|
||||||
template<typename I>
|
template<typename I>
|
||||||
struct runner : public json_benchmark::file_runner<I> {
|
struct runner : public file_runner<I> {
|
||||||
std::string_view result;
|
typename I::StringType result;
|
||||||
|
|
||||||
bool setup(benchmark::State &state) {
|
bool setup(benchmark::State &state) {
|
||||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
return this->load_json(state, TWITTER_JSON);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool before_run(benchmark::State &state) {
|
bool before_run(benchmark::State &state) {
|
||||||
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
|
if (!file_runner<I>::before_run(state)) { return false; }
|
||||||
result = "";
|
result = "";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(benchmark::State &) {
|
bool run(benchmark::State &) {
|
||||||
return this->implementation.run(this->json, 505874901689851900ULL, result);
|
return this->implementation.run(this->json, 505874901689851904ULL, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename R>
|
template<typename R>
|
||||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||||
return diff_results(state, result, reference.result);
|
return diff_results(state, result, reference.result, diff_flags::NONE);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct simdjson_dom;
|
struct simdjson_dom;
|
||||||
|
|
||||||
template<typename I> simdjson_really_inline static void find_tweet(benchmark::State &state) {
|
template<typename I> simdjson_really_inline static void find_tweet(benchmark::State &state) {
|
||||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace find_tweet
|
} // namespace find_tweet
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "find_tweet.h"
|
||||||
|
|
||||||
|
namespace find_tweet {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
using StringType=std::string;
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, uint64_t find_id, std::string &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto tweet : root["statuses"]) {
|
||||||
|
if (tweet["id"] == find_id) {
|
||||||
|
result = tweet["text"];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(find_tweet, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace find_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,6 +9,8 @@ namespace find_tweet {
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
|
|
||||||
struct rapidjson_base {
|
struct rapidjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
Document doc{};
|
Document doc{};
|
||||||
|
|
||||||
bool run(Document &root, uint64_t find_id, std::string_view &result) {
|
bool run(Document &root, uint64_t find_id, std::string_view &result) {
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
||||||
|
#include "find_tweet.h"
|
||||||
|
|
||||||
|
namespace find_tweet {
|
||||||
|
|
||||||
|
struct sajson {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
size_t ast_buffer_size{0};
|
||||||
|
size_t *ast_buffer{nullptr};
|
||||||
|
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||||
|
return { val.as_cstring(), val.get_string_length() };
|
||||||
|
}
|
||||||
|
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||||
|
auto str = val.as_cstring();
|
||||||
|
char *endptr;
|
||||||
|
uint64_t result = strtoull(str, &endptr, 10);
|
||||||
|
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||||
|
if (!ast_buffer) {
|
||||||
|
ast_buffer_size = json.size();
|
||||||
|
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||||
|
}
|
||||||
|
auto doc = ::sajson::parse(
|
||||||
|
::sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
||||||
|
::sajson::mutable_string_view(json.size(), json.data())
|
||||||
|
);
|
||||||
|
if (!doc.is_valid()) { return false; }
|
||||||
|
|
||||||
|
auto root = doc.get_root();
|
||||||
|
if (root.get_type() != ::sajson::TYPE_OBJECT) { printf("a\n"); return false; }
|
||||||
|
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||||
|
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; }
|
||||||
|
|
||||||
|
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||||
|
auto tweet = statuses.get_array_element(i);
|
||||||
|
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { printf("b\n"); return false; }
|
||||||
|
// TODO if there is a way to get the raw string, it might be faster to iota find_id and then
|
||||||
|
// compare it to each id_str, instead of parsing each int and comparing to find_id.
|
||||||
|
if (get_str_uint64(tweet, "id_str") == find_id) {
|
||||||
|
result = get_string_view(tweet, "text");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(find_tweet, sajson)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace find_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
|
@ -9,6 +9,8 @@ namespace find_tweet {
|
||||||
using namespace simdjson;
|
using namespace simdjson;
|
||||||
|
|
||||||
struct simdjson_dom {
|
struct simdjson_dom {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
dom::parser parser{};
|
dom::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||||
|
|
|
@ -10,6 +10,8 @@ using namespace simdjson;
|
||||||
using namespace simdjson::builtin;
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
struct simdjson_ondemand {
|
struct simdjson_ondemand {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
ondemand::parser parser{};
|
ondemand::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace find_tweet {
|
namespace find_tweet {
|
||||||
|
|
||||||
struct yyjson_base {
|
struct yyjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
bool run(yyjson_doc *doc, uint64_t find_id, std::string_view &result) {
|
bool run(yyjson_doc *doc, uint64_t find_id, std::string_view &result) {
|
||||||
if (!doc) { return false; }
|
if (!doc) { return false; }
|
||||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
|
|
|
@ -2,13 +2,21 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
template<typename T>
|
namespace json_benchmark {
|
||||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference);
|
|
||||||
|
|
||||||
template<typename T>
|
enum class diff_flags {
|
||||||
|
NONE = 0,
|
||||||
|
IMPRECISE_FLOATS = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T, typename U>
|
||||||
|
static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags);
|
||||||
|
|
||||||
|
template<typename T, typename U>
|
||||||
struct result_differ {
|
struct result_differ {
|
||||||
static bool diff(benchmark::State &state, const T &result, const T &reference) {
|
static bool diff(benchmark::State &state, const T &result, const U &reference, diff_flags flags) {
|
||||||
if (result != reference) {
|
if (result != reference) {
|
||||||
std::stringstream str;
|
std::stringstream str;
|
||||||
str << "result incorrect: " << result << " ... reference: " << reference;
|
str << "result incorrect: " << result << " ... reference: " << reference;
|
||||||
|
@ -19,13 +27,13 @@ struct result_differ {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, typename U>
|
||||||
struct result_differ<std::vector<T>> {
|
struct result_differ<std::vector<T>, std::vector<U>> {
|
||||||
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
|
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<U> &reference, diff_flags flags) {
|
||||||
auto result_iter = result.begin();
|
auto result_iter = result.begin();
|
||||||
auto reference_iter = reference.begin();
|
auto reference_iter = reference.begin();
|
||||||
while (result_iter != result.end() && reference_iter != reference.end()) {
|
while (result_iter != result.end() && reference_iter != reference.end()) {
|
||||||
if (!diff_results(state, *result_iter, *reference_iter)) { return false; }
|
if (!diff_results(state, *result_iter, *reference_iter, flags)) { return false; }
|
||||||
result_iter++;
|
result_iter++;
|
||||||
reference_iter++;
|
reference_iter++;
|
||||||
}
|
}
|
||||||
|
@ -44,8 +52,41 @@ struct result_differ<std::vector<T>> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<>
|
||||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference) {
|
struct result_differ<double, double> {
|
||||||
return result_differ<T>::diff(state, result, reference);
|
static bool diff(benchmark::State &state, const double &result, const double &reference, diff_flags flags) {
|
||||||
|
bool different;
|
||||||
|
if (int(flags) & int(diff_flags::IMPRECISE_FLOATS)) {
|
||||||
|
different = f64_ulp_dist(result, reference) > 1;
|
||||||
|
} else {
|
||||||
|
different = result != reference;
|
||||||
|
}
|
||||||
|
if (different) {
|
||||||
|
std::stringstream str;
|
||||||
|
// We print it out using full precision.
|
||||||
|
constexpr auto precision = std::numeric_limits<double>::max_digits10;
|
||||||
|
str << std::setprecision(precision);
|
||||||
|
str << "incorrect double result: " << std::endl;
|
||||||
|
str << " result: " << std::left << std::setw(precision+2) << result << " (hexfloat " << std::hexfloat << result << ")" << std::defaultfloat << std::endl;
|
||||||
|
str << "reference: " << std::left << std::setw(precision+2) << reference << " (hexfloat " << std::hexfloat << reference << ")" << std::defaultfloat << std::endl;
|
||||||
|
state.SkipWithError(str.str().data());
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t f64_ulp_dist(double a, double b) {
|
||||||
|
uint64_t ua, ub;
|
||||||
|
std::memcpy(&ua, &a, sizeof(ua));
|
||||||
|
std::memcpy(&ub, &b, sizeof(ub));
|
||||||
|
if ((int64_t)(ub ^ ua) >= 0)
|
||||||
|
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
|
||||||
|
return ua + ub + 0x80000000;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T, typename U>
|
||||||
|
static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags) {
|
||||||
|
return result_differ<T, U>::diff(state, result, reference, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace json_benchmark
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "diff_results.h"
|
||||||
|
|
||||||
|
namespace json_benchmark {
|
||||||
|
|
||||||
|
struct point {
|
||||||
|
double x;
|
||||||
|
double y;
|
||||||
|
double z;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct result_differ<point, point> {
|
||||||
|
static bool diff(benchmark::State &state, const point &result, const point &reference, diff_flags flags) {
|
||||||
|
return diff_results(state, result.x, reference.x, flags)
|
||||||
|
&& diff_results(state, result.y, reference.y, flags)
|
||||||
|
&& diff_results(state, result.z, reference.z, flags);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static simdjson_unused std::ostream &operator<<(std::ostream &o, const point &p) {
|
||||||
|
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace json_benchmark
|
|
@ -1,39 +1,24 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if SIMDJSON_EXCEPTIONS
|
|
||||||
|
|
||||||
#include "json_benchmark/string_runner.h"
|
#include "json_benchmark/string_runner.h"
|
||||||
|
#include "json_benchmark/point.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
namespace kostya {
|
namespace kostya {
|
||||||
|
|
||||||
|
using namespace json_benchmark;
|
||||||
|
|
||||||
static const simdjson::padded_string &get_built_json_array();
|
static const simdjson::padded_string &get_built_json_array();
|
||||||
|
|
||||||
struct point {
|
|
||||||
double x;
|
|
||||||
double y;
|
|
||||||
double z;
|
|
||||||
simdjson_really_inline bool operator==(const point &other) const {
|
|
||||||
return x == other.x && y == other.y && z == other.z;
|
|
||||||
}
|
|
||||||
simdjson_really_inline bool operator!=(const point &other) const {
|
|
||||||
return !(*this == other);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
|
|
||||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename I>
|
template<typename I>
|
||||||
struct runner : public json_benchmark::string_runner<I> {
|
struct runner : public string_runner<I> {
|
||||||
std::vector<point> result;
|
std::vector<point> result;
|
||||||
|
|
||||||
runner() : json_benchmark::string_runner<I>(get_built_json_array()) {}
|
runner() : string_runner<I>(get_built_json_array()) {}
|
||||||
|
|
||||||
bool before_run(benchmark::State &state) {
|
bool before_run(benchmark::State &state) {
|
||||||
if (!json_benchmark::string_runner<I>::before_run(state)) { return false; }
|
if (!string_runner<I>::before_run(state)) { return false; }
|
||||||
result.clear();
|
result.clear();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -44,7 +29,7 @@ struct runner : public json_benchmark::string_runner<I> {
|
||||||
|
|
||||||
template<typename R>
|
template<typename R>
|
||||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||||
return diff_results(state, result, reference.result);
|
return diff_results(state, result, reference.result, I::DiffFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t items_per_iteration() {
|
size_t items_per_iteration() {
|
||||||
|
@ -95,9 +80,7 @@ static const simdjson::padded_string &get_built_json_array() {
|
||||||
struct simdjson_dom;
|
struct simdjson_dom;
|
||||||
|
|
||||||
template<typename I> simdjson_really_inline static void kostya(benchmark::State &state) {
|
template<typename I> simdjson_really_inline static void kostya(benchmark::State &state) {
|
||||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace kostya
|
} // namespace kostya
|
||||||
|
|
||||||
#endif // SIMDJSON_EXCEPTIONS
|
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "kostya.h"
|
||||||
|
|
||||||
|
namespace kostya {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto point : root["coordinates"]) {
|
||||||
|
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(kostya, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace kostya
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,6 +9,8 @@ namespace kostya {
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
|
|
||||||
struct rapidjson_base {
|
struct rapidjson_base {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
|
|
||||||
simdjson_really_inline double get_double(Value &object, std::string_view key) {
|
simdjson_really_inline double get_double(Value &object, std::string_view key) {
|
||||||
|
@ -26,7 +28,7 @@ struct rapidjson_base {
|
||||||
if (!coords->value.IsArray()) { return false; }
|
if (!coords->value.IsArray()) { return false; }
|
||||||
for (auto &coord : coords->value.GetArray()) {
|
for (auto &coord : coords->value.GetArray()) {
|
||||||
if (!coord.IsObject()) { return false; }
|
if (!coord.IsObject()) { return false; }
|
||||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
||||||
|
#include "kostya.h"
|
||||||
|
|
||||||
|
namespace kostya {
|
||||||
|
|
||||||
|
struct sajson {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS;
|
||||||
|
|
||||||
|
size_t ast_buffer_size{0};
|
||||||
|
size_t *ast_buffer{nullptr};
|
||||||
|
|
||||||
|
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
using namespace sajson;
|
||||||
|
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
switch (val.get_type()) {
|
||||||
|
case TYPE_INTEGER:
|
||||||
|
case TYPE_DOUBLE:
|
||||||
|
return val.get_number_value();
|
||||||
|
default:
|
||||||
|
throw "field not double";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
|
using namespace sajson;
|
||||||
|
|
||||||
|
if (!ast_buffer) {
|
||||||
|
ast_buffer_size = json.size();
|
||||||
|
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||||
|
}
|
||||||
|
auto doc = parse(
|
||||||
|
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||||
|
mutable_string_view(json.size(), json.data())
|
||||||
|
);
|
||||||
|
if (!doc.is_valid()) { return false; }
|
||||||
|
|
||||||
|
auto root = doc.get_root();
|
||||||
|
if (root.get_type() != TYPE_OBJECT) { return false; }
|
||||||
|
auto points = root.get_value_of_key({"coordinates", strlen("coordinates")});
|
||||||
|
if (points.get_type() != TYPE_ARRAY) { return false; }
|
||||||
|
|
||||||
|
for (size_t i=0; i<points.get_length(); i++) {
|
||||||
|
auto point = points.get_array_element(i);
|
||||||
|
if (point.get_type() != TYPE_OBJECT) { return false; }
|
||||||
|
result.emplace_back(json_benchmark::point{
|
||||||
|
get_double(point, "x"),
|
||||||
|
get_double(point, "y"),
|
||||||
|
get_double(point, "z")
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(kostya, sajson)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace kostya
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
|
@ -9,11 +9,13 @@ namespace kostya {
|
||||||
using namespace simdjson;
|
using namespace simdjson;
|
||||||
|
|
||||||
struct simdjson_dom {
|
struct simdjson_dom {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
dom::parser parser{};
|
dom::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
for (auto point : parser.parse(json)["coordinates"]) {
|
for (auto point : parser.parse(json)["coordinates"]) {
|
||||||
result.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
|
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,12 +10,14 @@ using namespace simdjson;
|
||||||
using namespace simdjson::builtin;
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
struct simdjson_ondemand {
|
struct simdjson_ondemand {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
ondemand::parser parser{};
|
ondemand::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
auto doc = parser.iterate(json);
|
auto doc = parser.iterate(json);
|
||||||
for (ondemand::object point : doc.find_field("coordinates")) {
|
for (ondemand::object point : doc.find_field("coordinates")) {
|
||||||
result.emplace_back(kostya::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
|
result.emplace_back(json_benchmark::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace kostya {
|
namespace kostya {
|
||||||
|
|
||||||
struct yyjson_base {
|
struct yyjson_base {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
||||||
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||||
if (!val) { throw "missing point field!"; }
|
if (!val) { throw "missing point field!"; }
|
||||||
|
@ -35,7 +37,7 @@ struct yyjson_base {
|
||||||
yyjson_val *coord;
|
yyjson_val *coord;
|
||||||
yyjson_arr_foreach(coords, idx, max, coord) {
|
yyjson_arr_foreach(coords, idx, max, coord) {
|
||||||
if (!yyjson_is_obj(coord)) { return false; }
|
if (!yyjson_is_obj(coord)) { return false; }
|
||||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1,36 +1,27 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "json_benchmark/string_runner.h"
|
#include "json_benchmark/string_runner.h"
|
||||||
|
#include "json_benchmark/point.h"
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
namespace large_random {
|
namespace large_random {
|
||||||
|
|
||||||
static const simdjson::padded_string &get_built_json_array();
|
static const simdjson::padded_string &get_built_json_array();
|
||||||
|
|
||||||
struct point {
|
using namespace json_benchmark;
|
||||||
double x;
|
|
||||||
double y;
|
|
||||||
double z;
|
|
||||||
simdjson_really_inline bool operator==(const point &other) const {
|
|
||||||
return x == other.x && y == other.y && z == other.z;
|
|
||||||
}
|
|
||||||
simdjson_really_inline bool operator!=(const point &other) const {
|
|
||||||
return !(*this == other);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
|
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
|
||||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename I>
|
template<typename I>
|
||||||
struct runner : public json_benchmark::string_runner<I> {
|
struct runner : public string_runner<I> {
|
||||||
std::vector<point> result;
|
std::vector<point> result;
|
||||||
|
|
||||||
runner() : json_benchmark::string_runner<I>(get_built_json_array()) {}
|
runner() : string_runner<I>(get_built_json_array()) {}
|
||||||
|
|
||||||
bool before_run(benchmark::State &state) {
|
bool before_run(benchmark::State &state) {
|
||||||
if (!json_benchmark::string_runner<I>::before_run(state)) { return false; }
|
if (!string_runner<I>::before_run(state)) { return false; }
|
||||||
result.clear();
|
result.clear();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -41,7 +32,7 @@ struct runner : public json_benchmark::string_runner<I> {
|
||||||
|
|
||||||
template<typename R>
|
template<typename R>
|
||||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||||
return diff_results(state, result, reference.result);
|
return diff_results(state, result, reference.result, I::DiffFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t items_per_iteration() {
|
size_t items_per_iteration() {
|
||||||
|
@ -76,7 +67,7 @@ static const simdjson::padded_string &get_built_json_array() {
|
||||||
struct simdjson_dom;
|
struct simdjson_dom;
|
||||||
|
|
||||||
template<typename T> static void large_random(benchmark::State &state) {
|
template<typename T> static void large_random(benchmark::State &state) {
|
||||||
json_benchmark::run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
|
run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace large_random
|
} // namespace large_random
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "large_random.h"
|
||||||
|
|
||||||
|
namespace large_random {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
|
for (auto point : nlohmann::json::parse(json.data(), json.data() + json.size())) {
|
||||||
|
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(large_random, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace large_random
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -9,6 +9,8 @@ namespace large_random {
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
|
|
||||||
struct rapidjson_base {
|
struct rapidjson_base {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
|
|
||||||
simdjson_really_inline double get_double(Value &object, std::string_view key) {
|
simdjson_really_inline double get_double(Value &object, std::string_view key) {
|
||||||
|
@ -23,7 +25,7 @@ struct rapidjson_base {
|
||||||
if (!coords.IsArray()) { return false; }
|
if (!coords.IsArray()) { return false; }
|
||||||
for (auto &coord : coords.GetArray()) {
|
for (auto &coord : coords.GetArray()) {
|
||||||
if (!coord.IsObject()) { return false; }
|
if (!coord.IsObject()) { return false; }
|
||||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
||||||
|
#include "large_random.h"
|
||||||
|
|
||||||
|
namespace large_random {
|
||||||
|
|
||||||
|
struct sajson {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS;
|
||||||
|
|
||||||
|
size_t ast_buffer_size{0};
|
||||||
|
size_t *ast_buffer{nullptr};
|
||||||
|
|
||||||
|
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
using namespace sajson;
|
||||||
|
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
switch (val.get_type()) {
|
||||||
|
case TYPE_INTEGER:
|
||||||
|
case TYPE_DOUBLE:
|
||||||
|
return val.get_number_value();
|
||||||
|
default:
|
||||||
|
throw "field not double";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
|
using namespace sajson;
|
||||||
|
|
||||||
|
if (!ast_buffer) {
|
||||||
|
ast_buffer_size = json.size();
|
||||||
|
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||||
|
}
|
||||||
|
auto doc = parse(
|
||||||
|
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||||
|
mutable_string_view(json.size(), json.data())
|
||||||
|
);
|
||||||
|
if (!doc.is_valid()) { return false; }
|
||||||
|
|
||||||
|
auto points = doc.get_root();
|
||||||
|
if (points.get_type() != TYPE_ARRAY) { return false; }
|
||||||
|
|
||||||
|
for (size_t i=0; i<points.get_length(); i++) {
|
||||||
|
auto point = points.get_array_element(i);
|
||||||
|
if (point.get_type() != TYPE_OBJECT) { return false; }
|
||||||
|
result.emplace_back(json_benchmark::point{
|
||||||
|
get_double(point, "x"),
|
||||||
|
get_double(point, "y"),
|
||||||
|
get_double(point, "z")
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(large_random, sajson)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace large_random
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
|
@ -9,11 +9,13 @@ namespace large_random {
|
||||||
using namespace simdjson;
|
using namespace simdjson;
|
||||||
|
|
||||||
struct simdjson_dom {
|
struct simdjson_dom {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
dom::parser parser{};
|
dom::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
for (auto point : parser.parse(json)) {
|
for (auto point : parser.parse(json)) {
|
||||||
result.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
|
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,12 +10,14 @@ using namespace simdjson;
|
||||||
using namespace simdjson::builtin;
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
struct simdjson_ondemand {
|
struct simdjson_ondemand {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
ondemand::parser parser{};
|
ondemand::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
auto doc = parser.iterate(json);
|
auto doc = parser.iterate(json);
|
||||||
for (ondemand::object coord : doc) {
|
for (ondemand::object coord : doc) {
|
||||||
result.emplace_back(point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
|
result.emplace_back(json_benchmark::point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,12 +10,14 @@ using namespace simdjson;
|
||||||
using namespace simdjson::builtin;
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
struct simdjson_ondemand_unordered {
|
struct simdjson_ondemand_unordered {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
ondemand::parser parser{};
|
ondemand::parser parser{};
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||||
auto doc = parser.iterate(json);
|
auto doc = parser.iterate(json);
|
||||||
for (ondemand::object coord : doc) {
|
for (ondemand::object coord : doc) {
|
||||||
result.emplace_back(large_random::point{coord["x"], coord["y"], coord["z"]});
|
result.emplace_back(json_benchmark::point{coord["x"], coord["y"], coord["z"]});
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace large_random {
|
namespace large_random {
|
||||||
|
|
||||||
struct yyjson_base {
|
struct yyjson_base {
|
||||||
|
static constexpr diff_flags DiffFlags = diff_flags::NONE;
|
||||||
|
|
||||||
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
||||||
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||||
if (!val) { throw "missing point field!"; }
|
if (!val) { throw "missing point field!"; }
|
||||||
|
@ -34,7 +36,7 @@ struct yyjson_base {
|
||||||
yyjson_val *coord;
|
yyjson_val *coord;
|
||||||
yyjson_arr_foreach(coords, idx, max, coord) {
|
yyjson_arr_foreach(coords, idx, max, coord) {
|
||||||
if (!yyjson_is_obj(coord)) { return false; }
|
if (!yyjson_is_obj(coord)) { return false; }
|
||||||
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "partial_tweets.h"
|
||||||
|
|
||||||
|
namespace partial_tweets {
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
using StringType=std::string;
|
||||||
|
|
||||||
|
simdjson_really_inline uint64_t nullable_int(nlohmann::json value) {
|
||||||
|
if (value.is_null()) { return 0; }
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string>> &result) {
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto tweet : root["statuses"]) {
|
||||||
|
auto user = tweet["user"];
|
||||||
|
result.emplace_back(partial_tweets::tweet<std::string>{
|
||||||
|
tweet["created_at"],
|
||||||
|
tweet["id"],
|
||||||
|
tweet["text"],
|
||||||
|
nullable_int(tweet["in_reply_to_status_id"]),
|
||||||
|
{ user["id"], user["screen_name"] },
|
||||||
|
tweet["retweet_count"],
|
||||||
|
tweet["favorite_count"]
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(partial_tweets, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace partial_tweets
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -7,16 +7,18 @@
|
||||||
|
|
||||||
namespace partial_tweets {
|
namespace partial_tweets {
|
||||||
|
|
||||||
|
using namespace json_benchmark;
|
||||||
|
|
||||||
template<typename I>
|
template<typename I>
|
||||||
struct runner : public json_benchmark::file_runner<I> {
|
struct runner : public file_runner<I> {
|
||||||
std::vector<tweet> result{};
|
std::vector<tweet<typename I::StringType>> result{};
|
||||||
|
|
||||||
bool setup(benchmark::State &state) {
|
bool setup(benchmark::State &state) {
|
||||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
return this->load_json(state, TWITTER_JSON);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool before_run(benchmark::State &state) {
|
bool before_run(benchmark::State &state) {
|
||||||
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
|
if (!file_runner<I>::before_run(state)) { return false; }
|
||||||
result.clear();
|
result.clear();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -27,7 +29,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
||||||
|
|
||||||
template<typename R>
|
template<typename R>
|
||||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||||
return diff_results(state, result, reference.result);
|
return diff_results(state, result, reference.result, diff_flags::NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t items_per_iteration() {
|
size_t items_per_iteration() {
|
||||||
|
@ -38,7 +40,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
||||||
struct simdjson_dom;
|
struct simdjson_dom;
|
||||||
|
|
||||||
template<typename I> simdjson_really_inline static void partial_tweets(benchmark::State &state) {
|
template<typename I> simdjson_really_inline static void partial_tweets(benchmark::State &state) {
|
||||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace partial_tweets
|
} // namespace partial_tweets
|
||||||
|
|
|
@ -9,6 +9,8 @@ namespace partial_tweets {
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
|
|
||||||
struct rapidjson_base {
|
struct rapidjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
Document doc{};
|
Document doc{};
|
||||||
|
|
||||||
simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) {
|
simdjson_really_inline std::string_view get_string_view(Value &object, std::string_view key) {
|
||||||
|
@ -31,20 +33,20 @@ struct rapidjson_base {
|
||||||
if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; }
|
if (!field->value.IsUint64()) { throw "Field is not nullable uint64"; }
|
||||||
return field->value.GetUint64();
|
return field->value.GetUint64();
|
||||||
}
|
}
|
||||||
simdjson_really_inline partial_tweets::twitter_user get_user(Value &object, std::string_view key) {
|
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(Value &object, std::string_view key) {
|
||||||
auto field = object.FindMember(key.data());
|
auto field = object.FindMember(key.data());
|
||||||
if (field == object.MemberEnd()) { throw "Missing user field"; }
|
if (field == object.MemberEnd()) { throw "Missing user field"; }
|
||||||
if (!field->value.IsObject()) { throw "User field is not an object"; }
|
if (!field->value.IsObject()) { throw "User field is not an object"; }
|
||||||
return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") };
|
return { get_uint64(field->value, "id"), get_string_view(field->value, "screen_name") };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(Document &root, std::vector<tweet> &result) {
|
bool run(Document &root, std::vector<tweet<std::string_view>> &result) {
|
||||||
if (root.HasParseError() || !root.IsObject()) { return false; }
|
if (root.HasParseError() || !root.IsObject()) { return false; }
|
||||||
auto statuses = root.FindMember("statuses");
|
auto statuses = root.FindMember("statuses");
|
||||||
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
|
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
|
||||||
for (auto &tweet : statuses->value.GetArray()) {
|
for (auto &tweet : statuses->value.GetArray()) {
|
||||||
if (!tweet.IsObject()) { return false; }
|
if (!tweet.IsObject()) { return false; }
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
get_string_view(tweet, "created_at"),
|
get_string_view(tweet, "created_at"),
|
||||||
get_uint64 (tweet, "id"),
|
get_uint64 (tweet, "id"),
|
||||||
get_string_view(tweet, "text"),
|
get_string_view(tweet, "text"),
|
||||||
|
@ -60,14 +62,14 @@ struct rapidjson_base {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rapidjson : rapidjson_base {
|
struct rapidjson : rapidjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return rapidjson_base::run(doc.Parse<kParseValidateEncodingFlag>(json.data()), result);
|
return rapidjson_base::run(doc.Parse<kParseValidateEncodingFlag>(json.data()), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
BENCHMARK_TEMPLATE(partial_tweets, rapidjson)->UseManualTime();
|
BENCHMARK_TEMPLATE(partial_tweets, rapidjson)->UseManualTime();
|
||||||
|
|
||||||
struct rapidjson_insitu : rapidjson_base {
|
struct rapidjson_insitu : rapidjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return rapidjson_base::run(doc.ParseInsitu<kParseValidateEncodingFlag>(json.data()), result);
|
return rapidjson_base::run(doc.ParseInsitu<kParseValidateEncodingFlag>(json.data()), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
||||||
|
#include "partial_tweets.h"
|
||||||
|
|
||||||
|
namespace partial_tweets {
|
||||||
|
|
||||||
|
struct sajson {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
size_t ast_buffer_size{0};
|
||||||
|
size_t *ast_buffer{nullptr};
|
||||||
|
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||||
|
return { val.as_cstring(), val.get_string_length() };
|
||||||
|
}
|
||||||
|
simdjson_really_inline uint64_t get_uint52(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
switch (val.get_type()) {
|
||||||
|
case ::sajson::TYPE_INTEGER: {
|
||||||
|
int64_t result;
|
||||||
|
if (!val.get_int53_value(&result) || result < 0) { throw "field is not uint52"; }
|
||||||
|
return uint64_t(result);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw "field not integer";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||||
|
auto str = val.as_cstring();
|
||||||
|
char *endptr;
|
||||||
|
uint64_t result = strtoull(str, &endptr, 10);
|
||||||
|
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
simdjson_really_inline uint64_t get_nullable_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (val.get_type() == ::sajson::TYPE_NULL) { return 0; }
|
||||||
|
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||||
|
auto str = val.as_cstring();
|
||||||
|
char *endptr;
|
||||||
|
uint64_t result = strtoull(str, &endptr, 10);
|
||||||
|
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(const ::sajson::value &obj, std::string_view key) {
|
||||||
|
auto user = obj.get_value_of_key({key.data(), key.length()});
|
||||||
|
if (user.get_type() != ::sajson::TYPE_OBJECT) { throw "user is not an object"; }
|
||||||
|
return { get_str_uint64(user, "id_str"), get_string_view(user, "screen_name") };
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
|
if (!ast_buffer) {
|
||||||
|
ast_buffer_size = json.size();
|
||||||
|
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||||
|
}
|
||||||
|
auto doc = ::sajson::parse(
|
||||||
|
::sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
||||||
|
::sajson::mutable_string_view(json.size(), json.data())
|
||||||
|
);
|
||||||
|
if (!doc.is_valid()) { return false; }
|
||||||
|
|
||||||
|
auto root = doc.get_root();
|
||||||
|
if (root.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||||
|
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||||
|
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; }
|
||||||
|
|
||||||
|
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||||
|
auto tweet = statuses.get_array_element(i);
|
||||||
|
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||||
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
|
get_string_view(tweet, "created_at"),
|
||||||
|
get_str_uint64 (tweet, "id_str"),
|
||||||
|
get_string_view(tweet, "text"),
|
||||||
|
get_nullable_str_uint64(tweet, "in_reply_to_status_id_str"),
|
||||||
|
get_user (tweet, "user"),
|
||||||
|
get_uint52 (tweet, "retweet_count"),
|
||||||
|
get_uint52 (tweet, "favorite_count")
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(partial_tweets, sajson)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace partial_tweets
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
|
@ -9,6 +9,8 @@ namespace partial_tweets {
|
||||||
using namespace simdjson;
|
using namespace simdjson;
|
||||||
|
|
||||||
struct simdjson_dom {
|
struct simdjson_dom {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
dom::parser parser{};
|
dom::parser parser{};
|
||||||
|
|
||||||
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
||||||
|
@ -16,10 +18,10 @@ struct simdjson_dom {
|
||||||
return element;
|
return element;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
||||||
auto user = tweet["user"];
|
auto user = tweet["user"];
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
tweet["created_at"],
|
tweet["created_at"],
|
||||||
tweet["id"],
|
tweet["id"],
|
||||||
tweet["text"],
|
tweet["text"],
|
||||||
|
|
|
@ -10,6 +10,8 @@ using namespace simdjson;
|
||||||
using namespace simdjson::builtin;
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
struct simdjson_ondemand {
|
struct simdjson_ondemand {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
ondemand::parser parser{};
|
ondemand::parser parser{};
|
||||||
|
|
||||||
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
|
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
|
||||||
|
@ -17,15 +19,15 @@ struct simdjson_ondemand {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline twitter_user read_user(ondemand::object user) {
|
simdjson_really_inline twitter_user<std::string_view> read_user(ondemand::object user) {
|
||||||
return { user.find_field("id"), user.find_field("screen_name") };
|
return { user.find_field("id"), user.find_field("screen_name") };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
// Walk the document, parsing the tweets as we go
|
// Walk the document, parsing the tweets as we go
|
||||||
auto doc = parser.iterate(json);
|
auto doc = parser.iterate(json);
|
||||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
tweet.find_field("created_at"),
|
tweet.find_field("created_at"),
|
||||||
tweet.find_field("id"),
|
tweet.find_field("id"),
|
||||||
tweet.find_field("text"),
|
tweet.find_field("text"),
|
||||||
|
|
|
@ -22,15 +22,17 @@ namespace partial_tweets {
|
||||||
// ]
|
// ]
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
template<typename StringType=std::string_view>
|
||||||
struct tweet {
|
struct tweet {
|
||||||
std::string_view created_at{};
|
StringType created_at{};
|
||||||
uint64_t id{};
|
uint64_t id{};
|
||||||
std::string_view result{};
|
StringType result{};
|
||||||
uint64_t in_reply_to_status_id{};
|
uint64_t in_reply_to_status_id{};
|
||||||
twitter_user user{};
|
twitter_user<StringType> user{};
|
||||||
uint64_t retweet_count{};
|
uint64_t retweet_count{};
|
||||||
uint64_t favorite_count{};
|
uint64_t favorite_count{};
|
||||||
simdjson_really_inline bool operator==(const tweet &other) const {
|
template<typename OtherStringType>
|
||||||
|
simdjson_really_inline bool operator==(const tweet<OtherStringType> &other) const {
|
||||||
return created_at == other.created_at &&
|
return created_at == other.created_at &&
|
||||||
id == other.id &&
|
id == other.id &&
|
||||||
result == other.result &&
|
result == other.result &&
|
||||||
|
@ -39,10 +41,12 @@ struct tweet {
|
||||||
retweet_count == other.retweet_count &&
|
retweet_count == other.retweet_count &&
|
||||||
favorite_count == other.favorite_count;
|
favorite_count == other.favorite_count;
|
||||||
}
|
}
|
||||||
simdjson_really_inline bool operator!=(const tweet &other) const { return !(*this == other); }
|
template<typename OtherStringType>
|
||||||
|
simdjson_really_inline bool operator!=(const tweet<OtherStringType> &other) const { return !(*this == other); }
|
||||||
};
|
};
|
||||||
|
|
||||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet &t) {
|
template<typename StringType>
|
||||||
|
simdjson_unused static std::ostream &operator<<(std::ostream &o, const tweet<StringType> &t) {
|
||||||
o << "created_at: " << t.created_at << std::endl;
|
o << "created_at: " << t.created_at << std::endl;
|
||||||
o << "id: " << t.id << std::endl;
|
o << "id: " << t.id << std::endl;
|
||||||
o << "result: " << t.result << std::endl;
|
o << "result: " << t.result << std::endl;
|
||||||
|
|
|
@ -3,11 +3,13 @@
|
||||||
|
|
||||||
namespace partial_tweets {
|
namespace partial_tweets {
|
||||||
|
|
||||||
|
template<typename StringType=std::string_view>
|
||||||
struct twitter_user {
|
struct twitter_user {
|
||||||
uint64_t id{};
|
uint64_t id{};
|
||||||
std::string_view screen_name{};
|
StringType screen_name{};
|
||||||
|
|
||||||
bool operator==(const twitter_user &other) const {
|
template<typename OtherStringType>
|
||||||
|
bool operator==(const twitter_user<OtherStringType> &other) const {
|
||||||
return id == other.id &&
|
return id == other.id &&
|
||||||
screen_name == other.screen_name;
|
screen_name == other.screen_name;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
namespace partial_tweets {
|
namespace partial_tweets {
|
||||||
|
|
||||||
struct yyjson_base {
|
struct yyjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
|
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
|
||||||
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||||
if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }
|
if (!yyjson_is_str(val)) { throw "field is not uint64 or null!"; }
|
||||||
|
@ -24,13 +26,13 @@ struct yyjson_base {
|
||||||
if (type != YYJSON_TYPE_NUM && type != YYJSON_TYPE_NULL ) { throw "field is not uint64 or null!"; }
|
if (type != YYJSON_TYPE_NUM && type != YYJSON_TYPE_NULL ) { throw "field is not uint64 or null!"; }
|
||||||
return yyjson_get_uint(val);
|
return yyjson_get_uint(val);
|
||||||
}
|
}
|
||||||
simdjson_really_inline partial_tweets::twitter_user get_user(yyjson_val *obj, std::string_view key) {
|
simdjson_really_inline partial_tweets::twitter_user<std::string_view> get_user(yyjson_val *obj, std::string_view key) {
|
||||||
auto user = yyjson_obj_getn(obj, key.data(), key.length());
|
auto user = yyjson_obj_getn(obj, key.data(), key.length());
|
||||||
if (!yyjson_is_obj(user)) { throw "missing twitter user field!"; }
|
if (!yyjson_is_obj(user)) { throw "missing twitter user field!"; }
|
||||||
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
|
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run(yyjson_doc *doc, std::vector<tweet> &result) {
|
bool run(yyjson_doc *doc, std::vector<tweet<std::string_view>> &result) {
|
||||||
if (!doc) { return false; }
|
if (!doc) { return false; }
|
||||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
if (!yyjson_is_obj(root)) { return false; }
|
if (!yyjson_is_obj(root)) { return false; }
|
||||||
|
@ -43,7 +45,7 @@ struct yyjson_base {
|
||||||
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
|
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
|
||||||
if (!yyjson_is_obj(tweet)) { return false; }
|
if (!yyjson_is_obj(tweet)) { return false; }
|
||||||
// TODO these can't actually handle errors
|
// TODO these can't actually handle errors
|
||||||
result.emplace_back(partial_tweets::tweet{
|
result.emplace_back(partial_tweets::tweet<std::string_view>{
|
||||||
get_string_view(tweet, "created_at"),
|
get_string_view(tweet, "created_at"),
|
||||||
get_uint64 (tweet, "id"),
|
get_uint64 (tweet, "id"),
|
||||||
get_string_view(tweet, "text"),
|
get_string_view(tweet, "text"),
|
||||||
|
@ -59,14 +61,14 @@ struct yyjson_base {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct yyjson : yyjson_base {
|
struct yyjson : yyjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), result);
|
return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
BENCHMARK_TEMPLATE(partial_tweets, yyjson)->UseManualTime();
|
BENCHMARK_TEMPLATE(partial_tweets, yyjson)->UseManualTime();
|
||||||
|
|
||||||
struct yyjson_insitu : yyjson_base {
|
struct yyjson_insitu : yyjson_base {
|
||||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
bool run(simdjson::padded_string &json, std::vector<tweet<std::string_view>> &result) {
|
||||||
return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), result);
|
return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue