From 03f7396d504ac62794c1614a533334f59b6101aa Mon Sep 17 00:00:00 2001 From: Nicolas Boyer <83141563+NicolasJiaxin@users.noreply.github.com> Date: Thu, 17 Jun 2021 18:31:40 -0400 Subject: [PATCH] Fix branches. (#1619) --- benchmark/bench_ondemand.cpp | 2 + .../simdjson_dom_json_pointer.h | 36 ++++++++++++++++++ .../simdjson_ondemand_json_pointer.h | 37 +++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 benchmark/distinct_user_id/simdjson_dom_json_pointer.h create mode 100644 benchmark/distinct_user_id/simdjson_ondemand_json_pointer.h diff --git a/benchmark/bench_ondemand.cpp b/benchmark/bench_ondemand.cpp index c0173606..209d3a5b 100644 --- a/benchmark/bench_ondemand.cpp +++ b/benchmark/bench_ondemand.cpp @@ -55,7 +55,9 @@ SIMDJSON_POP_DISABLE_WARNINGS #include "kostya/nlohmann_json_sax.h" #include "distinct_user_id/simdjson_dom.h" +#include "distinct_user_id/simdjson_dom_json_pointer.h" #include "distinct_user_id/simdjson_ondemand.h" +#include "distinct_user_id/simdjson_ondemand_json_pointer.h" #include "distinct_user_id/yyjson.h" #include "distinct_user_id/sajson.h" #include "distinct_user_id/rapidjson.h" diff --git a/benchmark/distinct_user_id/simdjson_dom_json_pointer.h b/benchmark/distinct_user_id/simdjson_dom_json_pointer.h new file mode 100644 index 00000000..c852e018 --- /dev/null +++ b/benchmark/distinct_user_id/simdjson_dom_json_pointer.h @@ -0,0 +1,36 @@ +#pragma once + +#if SIMDJSON_EXCEPTIONS + +#include "distinct_user_id.h" + +namespace distinct_user_id { + +using namespace simdjson; + +struct simdjson_dom_json_pointer { + dom::parser parser{}; + + bool run(simdjson::padded_string &json, std::vector &result) { + // Walk the document, parsing as we go + auto doc = parser.parse(json); + for (dom::object tweet : doc["statuses"]) { + // We believe that all statuses have a matching + // user, and we are willing to throw when they do not. + result.push_back(tweet.at_pointer("/user/id")); + // Not all tweets have a "retweeted_status", but when they do + // we want to go and find the user within. + auto retweet_id = tweet.at_pointer("/retweeted_status/user/id"); + if (retweet_id.error() != NO_SUCH_FIELD) { + result.push_back(retweet_id); + } + } + return true; + } +}; + +BENCHMARK_TEMPLATE(distinct_user_id, simdjson_dom_json_pointer)->UseManualTime(); + +} // namespace distinct_user_id + +#endif // SIMDJSON_EXCEPTIONS \ No newline at end of file diff --git a/benchmark/distinct_user_id/simdjson_ondemand_json_pointer.h b/benchmark/distinct_user_id/simdjson_ondemand_json_pointer.h new file mode 100644 index 00000000..df102c0b --- /dev/null +++ b/benchmark/distinct_user_id/simdjson_ondemand_json_pointer.h @@ -0,0 +1,37 @@ +#pragma once + +#if SIMDJSON_EXCEPTIONS + +#include "distinct_user_id.h" + +namespace distinct_user_id { + +using namespace simdjson; + +struct simdjson_ondemand_json_pointer { + ondemand::parser parser{}; + + bool run(simdjson::padded_string &json, std::vector &result) { + // Walk the document, parsing as we go + auto doc = parser.iterate(json); + for (ondemand::object tweet : doc.find_field("statuses")) { + // We believe that all statuses have a matching + // user, and we are willing to throw when they do not. + result.push_back(tweet.at_pointer("/user/id")); + // Not all tweets have a "retweeted_status", but when they do + // we want to go and find the user within. + auto retweet_id = tweet.at_pointer("/retweeted_status/user/id"); + if (retweet_id.error() != NO_SUCH_FIELD) { + result.push_back(retweet_id); + } + } + + return true; + } +}; + +BENCHMARK_TEMPLATE(distinct_user_id, simdjson_ondemand_json_pointer)->UseManualTime(); + +} // namespace distinct_user_id + +#endif // SIMDJSON_EXCEPTIONS