Rearrange benchmarks to be easier to create

This commit is contained in:
John Keiser 2021-01-04 12:21:50 -08:00
parent 3af54a9978
commit 5add8ac255
54 changed files with 945 additions and 2069 deletions

View File

@ -1,13 +1,5 @@
include_directories( . linux )
link_libraries(simdjson-windows-headers test-data)
# bench_sax links against the source
if (TARGET benchmark::benchmark)
add_executable(bench_sax bench_sax.cpp)
target_link_libraries(bench_sax PRIVATE simdjson-internal-flags simdjson-include-source benchmark::benchmark)
endif (TARGET benchmark::benchmark)
# Everything else links against simdjson proper
link_libraries(simdjson simdjson-flags)
add_executable(benchfeatures benchfeatures.cpp)
@ -44,7 +36,6 @@ endif()
if (TARGET benchmark::benchmark)
link_libraries(benchmark::benchmark)
add_subdirectory(largerandom)
add_executable(bench_parse_call bench_parse_call.cpp)
add_executable(bench_dom_api bench_dom_api.cpp)
if (SIMDJSON_EXCEPTIONS)

View File

@ -1,40 +1,35 @@
#include "simdjson.h"
#include <iostream>
#include <sstream>
#include <random>
#include <vector>
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#ifdef SIMDJSON_COMPETITION_YYJSON
#include "yyjson.h"
#endif
// This has to be last, for reasons I don't yet understand
#include <benchmark/benchmark.h>
SIMDJSON_POP_DISABLE_WARNINGS
// simdjson ondemand
#include "partial_tweets/ondemand.h"
#include "largerandom/ondemand.h"
#include "largerandom/ondemand_unordered.h"
#include "kostya/ondemand.h"
#include "distinctuserid/ondemand.h"
#include "find_tweet/ondemand.h"
// simdjson dom
#include "partial_tweets/dom.h"
#include "largerandom/dom.h"
#include "kostya/dom.h"
#include "distinctuserid/dom.h"
#include "find_tweet/dom.h"
// // yyjson
#ifdef SIMDJSON_COMPETITION_YYJSON
#include "partial_tweets/simdjson_dom.h"
#include "partial_tweets/simdjson_ondemand.h"
#include "partial_tweets/yyjson.h"
#include "largerandom/yyjson.h"
#include "kostya/yyjson.h"
#include "distinctuserid/yyjson.h"
#include "find_tweet/yyjson.h"
#endif
#include "large_random/simdjson_dom.h"
#include "large_random/simdjson_ondemand.h"
#include "large_random/simdjson_ondemand_unordered.h"
#include "large_random/yyjson.h"
#include "kostya/simdjson_dom.h"
#include "kostya/simdjson_ondemand.h"
#include "kostya/yyjson.h"
#include "distinct_user_id/simdjson_dom.h"
#include "distinct_user_id/simdjson_ondemand.h"
#include "distinct_user_id/yyjson.h"
#include "find_tweet/simdjson_dom.h"
#include "find_tweet/simdjson_ondemand.h"
#include "find_tweet/yyjson.h"
BENCHMARK_MAIN();

View File

@ -1,14 +0,0 @@
#include "simdjson.h"
#include "simdjson.cpp"
#include <iostream>
#include <sstream>
#include <random>
#include <vector>
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#include <benchmark/benchmark.h>
SIMDJSON_POP_DISABLE_WARNINGS
#include "partial_tweets/sax.h"
#include "largerandom/sax.h"
BENCHMARK_MAIN();

View File

@ -0,0 +1,50 @@
#pragma once
#include "json_benchmark/file_runner.h"
#include <vector>
namespace distinct_user_id {
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
std::vector<uint64_t> ids{};
public:
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);
}
bool before_run(benchmark::State &state) {
ids.clear();
return true;
}
bool run(benchmark::State &) {
return this->implementation.run(this->json, ids);
}
bool after_run(benchmark::State &state) {
std::sort(ids.begin(), ids.end());
auto last = std::unique(ids.begin(), ids.end());
ids.erase(last, ids.end());
return true;
}
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return diff_results(state, ids, reference.ids);
}
size_t items_per_iteration() {
return ids.size();
}
};
struct simdjson_dom;
template<typename I> simdjson_really_inline static void distinct_user_id(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace distinct_user_id

View File

@ -0,0 +1,37 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "distinct_user_id.h"
namespace distinct_user_id {
using namespace simdjson;
class simdjson_dom {
dom::parser parser{};
public:
bool run(const simdjson::padded_string &json, std::vector<uint64_t> &ids) {
// Walk the document, parsing as we go
auto doc = parser.parse(json);
for (dom::object tweet : doc["statuses"]) {
// We believe that all statuses have a matching
// user, and we are willing to throw when they do not.
ids.push_back(tweet["user"]["id"]);
// Not all tweets have a "retweeted_status", but when they do
// we want to go and find the user within.
auto retweet = tweet["retweeted_status"];
if(retweet.error() != NO_SUCH_FIELD) {
ids.push_back(retweet["user"]["id"]);
}
}
return true;
}
};
BENCHMARK_TEMPLATE(distinct_user_id, simdjson_dom);
} // namespace distinct_user_id
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,38 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "distinct_user_id.h"
namespace distinct_user_id {
using namespace simdjson;
using namespace simdjson::builtin;
class simdjson_ondemand {
ondemand::parser parser{};
public:
bool run(const simdjson::padded_string &json, std::vector<uint64_t> &ids) {
// Walk the document, parsing as we go
auto doc = parser.iterate(json);
for (ondemand::object tweet : doc.find_field("statuses")) {
// We believe that all statuses have a matching
// user, and we are willing to throw when they do not.
ids.push_back(tweet.find_field("user").find_field("id"));
// Not all tweets have a "retweeted_status", but when they do
// we want to go and find the user within.
auto retweet = tweet.find_field("retweeted_status");
if(!retweet.error()) {
ids.push_back(retweet.find_field("user").find_field("id"));
}
}
return true;
}
};
BENCHMARK_TEMPLATE(distinct_user_id, simdjson_ondemand);
} // namespace distinct_user_id
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,21 +1,14 @@
#pragma once
#include "distinctuserid.h"
#ifdef SIMDJSON_COMPETITION_YYJSON
#include "distinct_user_id.h"
namespace distinct_user_id {
class Yyjson {
class yyjson {
public:
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
simdjson_really_inline size_t ItemCount() { return ids.size(); }
private:
std::vector<int64_t> ids{};
public:
simdjson_really_inline bool Run(const padded_string &json) {
ids.clear();
bool run(const simdjson::padded_string &json, std::vector<uint64_t> &ids) {
// Walk the document, parsing the tweets as we go
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
if (!doc) { return false; }
@ -38,12 +31,14 @@ public:
ids.push_back(yyjson_get_sint(id));
}
}
remove_duplicates(ids);
return true;
}
};
BENCHMARK_TEMPLATE(DistinctUserID, Yyjson);
BENCHMARK_TEMPLATE(distinct_user_id, yyjson);
} // namespace partial_tweets
} // namespace distinct_user_id
#endif // SIMDJSON_COMPETITION_YYJSON

View File

@ -1,52 +0,0 @@
#pragma once
#include <vector>
#include <cstdint>
#include "event_counter.h"
#include "json_benchmark.h"
//
// Interface
//
namespace distinct_user_id {
template<typename T> static void DistinctUserID(benchmark::State &state);
bool equals(const char *s1, const char *s2) { return strcmp(s1, s2) == 0; }
void remove_duplicates(std::vector<int64_t> &v) {
std::sort(v.begin(), v.end());
auto last = std::unique(v.begin(), v.end());
v.erase(last, v.end());
}
} // namespace
//
// Implementation
//
#include "dom.h"
namespace distinct_user_id {
using namespace simdjson;
template<typename T> static void DistinctUserID(benchmark::State &state) {
//
// Load the JSON file
//
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
error_code error;
padded_string json;
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
std::cerr << error << std::endl;
state.SkipWithError("error loading");
return;
}
JsonBenchmark<T, Dom>(state, json);
}
} // namespace distinct_user_id

View File

@ -1,45 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "distinctuserid.h"
namespace distinct_user_id {
using namespace simdjson;
class Dom {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
simdjson_really_inline size_t ItemCount() { return ids.size(); }
private:
dom::parser parser{};
std::vector<int64_t> ids{};
};
simdjson_really_inline bool Dom::Run(const padded_string &json) {
ids.clear();
// Walk the document, parsing as we go
auto doc = parser.parse(json);
for (dom::object tweet : doc["statuses"]) {
// We believe that all statuses have a matching
// user, and we are willing to throw when they do not.
ids.push_back(tweet["user"]["id"]);
// Not all tweets have a "retweeted_status", but when they do
// we want to go and find the user within.
auto retweet = tweet["retweeted_status"];
if(retweet.error() != NO_SUCH_FIELD) {
ids.push_back(retweet["user"]["id"]);
}
}
remove_duplicates(ids);
return true;
}
BENCHMARK_TEMPLATE(DistinctUserID, Dom);
} // namespace distinct_user_id
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,55 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "distinctuserid.h"
namespace distinct_user_id {
using namespace simdjson;
using namespace simdjson::builtin;
class OnDemand {
public:
OnDemand() {
if(!displayed_implementation) {
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
displayed_implementation = true;
}
}
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
simdjson_really_inline size_t ItemCount() { return ids.size(); }
private:
ondemand::parser parser{};
std::vector<int64_t> ids{};
static inline bool displayed_implementation = false;
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
ids.clear();
// Walk the document, parsing as we go
auto doc = parser.iterate(json);
for (ondemand::object tweet : doc.find_field("statuses")) {
// We believe that all statuses have a matching
// user, and we are willing to throw when they do not.
ids.push_back(tweet.find_field("user").find_field("id"));
// Not all tweets have a "retweeted_status", but when they do
// we want to go and find the user within.
auto retweet = tweet.find_field("retweeted_status");
if(!retweet.error()) {
ids.push_back(retweet.find_field("user").find_field("id"));
}
}
remove_duplicates(ids);
return true;
}
BENCHMARK_TEMPLATE(DistinctUserID, OnDemand);
} // namespace distinct_user_id
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,38 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "find_tweet.h"
namespace find_tweet {
using namespace simdjson;
class Dom {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline std::string_view Result() { return text; }
simdjson_really_inline size_t ItemCount() { return 1; }
private:
dom::parser parser{};
std::string_view text{};
};
simdjson_really_inline bool Dom::Run(const padded_string &json) {
text = "";
auto doc = parser.parse(json);
for (dom::object tweet : doc["statuses"]) {
if (uint64_t(tweet["id"]) == TWEET_ID) {
text = tweet["text"];
return true;
}
}
return false;
}
BENCHMARK_TEMPLATE(FindTweet, Dom);
} // namespace find_tweet
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,45 +1,38 @@
#pragma once
#include <vector>
#include <cstdint>
#include "event_counter.h"
#include "json_benchmark.h"
//
// Interface
//
namespace find_tweet {
template<typename T> static void FindTweet(benchmark::State &state);
const uint64_t TWEET_ID = 505874901689851900;
} // namespace
//
// Implementation
//
#include "dom.h"
#include "json_benchmark/file_runner.h"
namespace find_tweet {
using namespace simdjson;
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
std::string_view text;
template<typename T> static void FindTweet(benchmark::State &state) {
//
// Load the JSON file
//
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
error_code error;
padded_string json;
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
std::cerr << error << std::endl;
state.SkipWithError("error loading");
return;
public:
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);
}
JsonBenchmark<T, Dom>(state, json);
bool before_run(benchmark::State &state) {
text = "";
return true;
}
bool run(benchmark::State &) {
return this->implementation.run(this->json, 505874901689851900ULL, text);
}
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return diff_results(state, text, reference.text);
}
};
struct simdjson_dom;
template<typename I> simdjson_really_inline static void find_tweet(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace find_tweet

View File

@ -1,49 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "find_tweet.h"
namespace find_tweet {
using namespace simdjson;
using namespace simdjson::builtin;
class OnDemand {
public:
OnDemand() {
if(!displayed_implementation) {
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
displayed_implementation = true;
}
}
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline std::string_view Result() { return text; }
simdjson_really_inline size_t ItemCount() { return 1; }
private:
ondemand::parser parser{};
std::string_view text{};
static inline bool displayed_implementation = false;
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
text = "";
// Walk the document, parsing as we go
auto doc = parser.iterate(json);
for (ondemand::object tweet : doc.find_field("statuses")) {
if (uint64_t(tweet.find_field("id")) == TWEET_ID) {
text = tweet.find_field("text");
return true;
}
}
return false;
}
BENCHMARK_TEMPLATE(FindTweet, OnDemand);
} // namespace find_tweet
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,31 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "find_tweet.h"
namespace find_tweet {
using namespace simdjson;
class simdjson_dom {
dom::parser parser{};
public:
bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) {
text = "";
auto doc = parser.parse(json);
for (auto tweet : doc["statuses"]) {
if (uint64_t(tweet["id"]) == find_id) {
text = tweet["text"];
return true;
}
}
return false;
}
};
BENCHMARK_TEMPLATE(find_tweet, simdjson_dom);
} // namespace find_tweet
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,32 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "find_tweet.h"
namespace find_tweet {
using namespace simdjson;
using namespace simdjson::builtin;
class simdjson_ondemand {
ondemand::parser parser{};
public:
bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) {
// Walk the document, parsing as we go
auto doc = parser.iterate(json);
for (auto tweet : doc.find_field("statuses")) {
if (uint64_t(tweet.find_field("id")) == find_id) {
text = tweet.find_field("text");
return true;
}
}
return false;
}
};
BENCHMARK_TEMPLATE(find_tweet, simdjson_ondemand);
} // namespace find_tweet
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,21 +1,21 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_YYJSON
#include "find_tweet.h"
namespace find_tweet {
class Yyjson {
class yyjson {
public:
simdjson_really_inline std::string_view Result() { return text; }
simdjson_really_inline size_t ItemCount() { return 1; }
simdjson_really_inline std::string_view result() { return text; }
simdjson_really_inline size_t item_count() { return 1; }
private:
std::string_view text{};
public:
simdjson_really_inline bool Run(const padded_string &json) {
text = "";
bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) {
// Walk the document, parsing the tweets as we go
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
if (!doc) { return false; }
@ -26,7 +26,7 @@ public:
yyjson_val *tweet;
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
auto id = yyjson_obj_get(tweet, "id");
if (yyjson_get_uint(id) == TWEET_ID) {
if (yyjson_get_uint(id) == find_id) {
auto _text = yyjson_obj_get(tweet, "text");
text = yyjson_get_str(_text);
return true;
@ -36,6 +36,8 @@ public:
}
};
BENCHMARK_TEMPLATE(FindTweet, Yyjson);
BENCHMARK_TEMPLATE(find_tweet, yyjson);
} // namespace find_tweet
#endif // SIMDJSON_COMPETITION_YYJSON

View File

@ -1,126 +0,0 @@
#pragma once
template<typename T>
static bool DiffResults(benchmark::State &state, const T &result, const T &reference);
template<typename T>
struct ResultDiffer {
static bool Diff(benchmark::State &state, const T &result, const T &reference) {
if (result != reference) {
std::stringstream str;
str << "result incorrect: " << result << " ... reference: " << reference;
state.SkipWithError(str.str().data());
return false;
}
return true;
}
};
template<typename T>
struct ResultDiffer<std::vector<T>> {
static bool Diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
auto result_iter = result.begin();
auto reference_iter = reference.begin();
while (result_iter != result.end() && reference_iter != reference.end()) {
if (!DiffResults(state, *result_iter, *reference_iter)) { return false; }
result_iter++;
reference_iter++;
}
if (result_iter != result.end()) {
std::stringstream str;
str << "extra results (got " << result.size() << ", expected " << reference.size() << "): first extra element: " << *result_iter;
state.SkipWithError(str.str().data());
return false;
} else if (reference_iter != reference.end()) {
std::stringstream str;
str << "missing results (got " << result.size() << ", expected " << reference.size() << "): first missing element: " << *reference_iter;
state.SkipWithError(str.str().data());
return false;
}
return true;
}
};
template<typename T>
static bool DiffResults(benchmark::State &state, const T &result, const T &reference) {
return ResultDiffer<T>::Diff(state, result, reference);
}
template<typename B, typename R> static void JsonBenchmark(benchmark::State &state, const simdjson::padded_string &json) {
event_collector collector(true);
event_aggregate events;
// Warmup and equality check (make sure the data is right!)
B bench;
if (!bench.Run(json)) { state.SkipWithError("warmup document reading failed"); return; }
{
R reference;
if (!reference.Run(json)) { state.SkipWithError("reference document reading failed"); return; }
if (!DiffResults(state, bench.Result(), reference.Result())) { return; }
}
// Run the benchmark
for (simdjson_unused auto _ : state) {
collector.start();
if (!bench.Run(json)) { state.SkipWithError("document reading failed"); return; }
events << collector.end();
}
state.SetBytesProcessed(json.size() * state.iterations());
state.SetItemsProcessed(bench.ItemCount() * state.iterations());
state.counters["best_bytes_per_sec"] = benchmark::Counter(double(json.size()) / events.best.elapsed_sec());
state.counters["best_items_per_sec"] = benchmark::Counter(double(bench.ItemCount()) / events.best.elapsed_sec());
state.counters["docs_per_sec"] = benchmark::Counter(1.0, benchmark::Counter::kIsIterationInvariantRate);
state.counters["best_docs_per_sec"] = benchmark::Counter(1.0 / events.best.elapsed_sec());
if (collector.has_events()) {
state.counters["instructions"] = events.instructions();
state.counters["cycles"] = events.cycles();
state.counters["branch_miss"] = events.branch_misses();
state.counters["cache_miss"] = events.cache_misses();
state.counters["cache_ref"] = events.cache_references();
state.counters["instructions_per_byte"] = events.instructions() / double(json.size());
state.counters["instructions_per_cycle"] = events.instructions() / events.cycles();
state.counters["cycles_per_byte"] = events.cycles() / double(json.size());
state.counters["frequency"] = benchmark::Counter(events.cycles(), benchmark::Counter::kIsIterationInvariantRate);
state.counters["best_instructions"] = events.best.instructions();
state.counters["best_cycles"] = events.best.cycles();
state.counters["best_branch_miss"] = events.best.branch_misses();
state.counters["best_cache_miss"] = events.best.cache_misses();
state.counters["best_cache_ref"] = events.best.cache_references();
state.counters["best_instructions_per_byte"] = events.best.instructions() / double(json.size());
state.counters["best_instructions_per_cycle"] = events.best.instructions() / events.best.cycles();
state.counters["best_cycles_per_byte"] = events.best.cycles() / double(json.size());
state.counters["best_frequency"] = events.best.cycles() / events.best.elapsed_sec();
}
state.counters["bytes"] = benchmark::Counter(double(json.size()));
state.counters["items"] = benchmark::Counter(double(bench.ItemCount()));
// Build the label
using namespace std;
stringstream label;
label << fixed << setprecision(2);
label << "[best:";
label << " throughput=" << setw(6) << (double(json.size()) / 1000000000.0 / events.best.elapsed_sec()) << " GB/s";
label << " doc_throughput=" << setw(6) << uint64_t(1.0 / events.best.elapsed_sec()) << " docs/s";
if (collector.has_events()) {
label << " instructions=" << setw(12) << uint64_t(events.best.instructions()) << setw(0);
label << " cycles=" << setw(12) << uint64_t(events.best.cycles()) << setw(0);
label << " branch_miss=" << setw(8) << uint64_t(events.best.branch_misses()) << setw(0);
label << " cache_miss=" << setw(8) << uint64_t(events.best.cache_misses()) << setw(0);
label << " cache_ref=" << setw(10) << uint64_t(events.best.cache_references()) << setw(0);
}
label << " items=" << setw(10) << bench.ItemCount() << setw(0);
label << " avg_time=" << setw(10) << uint64_t(events.elapsed_ns()) << setw(0) << " ns";
label << "]";
state.SetLabel(label.str());
}

View File

@ -0,0 +1,31 @@
#pragma once
#include "runner_base.h"
#include "simdjson.h"
namespace json_benchmark {
template<typename I>
struct const_json_runner : public runner_base<I> {
protected:
const simdjson::padded_string &json;
const_json_runner(const simdjson::padded_string &_json) : json{_json} {}
public:
/** Get the total number of bytes processed in each iteration. Used for metrics like bytes/second. */
size_t bytes_per_iteration() {
return json.size();
}
/** Get the total number of documents processed in each iteration. Used for metrics like documents/second. */
size_t documents_per_iteration() {
return 1;
}
/** Get the total number of items processed in each iteration. Used for metrics like items/second. */
size_t items_per_iteration() {
return 1;
}
};
} // namespace json_benchmark

View File

@ -0,0 +1,8 @@
#pragma once
namespace json_benchmark {
static constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
static constexpr const char *NUMBERS_JSON = SIMDJSON_BENCHMARK_DATA_DIR "numbers.json";
}

View File

@ -0,0 +1,51 @@
#pragma once
#include <vector>
#include <sstream>
template<typename T>
static bool diff_results(benchmark::State &state, const T &result, const T &reference);
template<typename T>
struct result_differ {
static bool diff(benchmark::State &state, const T &result, const T &reference) {
if (result != reference) {
std::stringstream str;
str << "result incorrect: " << result << " ... reference: " << reference;
state.SkipWithError(str.str().data());
return false;
}
return true;
}
};
template<typename T>
struct result_differ<std::vector<T>> {
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
auto result_iter = result.begin();
auto reference_iter = reference.begin();
while (result_iter != result.end() && reference_iter != reference.end()) {
if (!diff_results(state, *result_iter, *reference_iter)) { return false; }
result_iter++;
reference_iter++;
}
if (result_iter != result.end()) {
std::stringstream str;
str << "extra results (got " << result.size() << ", expected " << reference.size() << "): first extra element: " << *result_iter;
state.SkipWithError(str.str().data());
return false;
} else if (reference_iter != reference.end()) {
std::stringstream str;
str << "missing results (got " << result.size() << ", expected " << reference.size() << "): first missing element: " << *reference_iter;
state.SkipWithError(str.str().data());
return false;
}
return true;
}
};
template<typename T>
static bool diff_results(benchmark::State &state, const T &result, const T &reference) {
return result_differ<T>::diff(state, result, reference);
}

View File

@ -0,0 +1,41 @@
#pragma once
#include "json_benchmark/runner_base.h"
#include "simdjson.h"
namespace json_benchmark {
template<typename I>
struct file_runner : public runner_base<I> {
protected:
simdjson::padded_string json{};
bool load_json(benchmark::State &state, const char *file) {
simdjson::error_code error;
if ((error = simdjson::padded_string::load(file).get(json))) {
std::stringstream err;
err << "error loading " << file << ": " << error;
state.SkipWithError(err.str().data());
return false;
}
return true;
}
public:
/** Get the total number of bytes processed in each iteration. Used for metrics like bytes/second. */
size_t bytes_per_iteration() {
return json.size();
}
/** Get the total number of documents processed in each iteration. Used for metrics like documents/second. */
size_t documents_per_iteration() {
return 1;
}
/** Get the total number of items processed in each iteration. Used for metrics like items/second. */
size_t items_per_iteration() {
return 1;
}
};
} // namespace json_benchmark

View File

@ -0,0 +1,100 @@
#pragma once
#include "simdjson.h"
#include "event_counter.h"
#include <iostream>
namespace json_benchmark {
void maybe_display_implementation() {
static bool displayed_implementation = false;
if(!displayed_implementation) {
displayed_implementation = true;
std::cout << "simdjson::dom implementation: " << simdjson::active_implementation->name() << std::endl;
std::cout << "simdjson::ondemand implementation: " << simdjson::builtin_implementation()->name() << std::endl;
}
}
template<typename B, typename R> static void run_json_benchmark(benchmark::State &state) {
maybe_display_implementation();
event_collector collector(true);
event_aggregate events;
// Warmup and equality check (make sure the data is right!)
B bench;
if (!bench.setup(state)) { return; }
if (!bench.run(state)) { state.SkipWithError("warmup document reading failed"); return; }
{
R reference;
if (!reference.setup(state)) { return; }
if (!reference.run(state)) { state.SkipWithError("reference document reading failed"); return; }
if (!bench.diff(state, reference)) { return; }
}
// Run the benchmark
for (simdjson_unused auto _ : state) {
if (!bench.before_run(state)) { state.SkipWithError("before_run failed"); };
collector.start();
if (!bench.run(state)) { state.SkipWithError("run failed"); return; }
events << collector.end();
if (!bench.after_run(state)) { state.SkipWithError("after_run failed"); return; };
}
state.SetBytesProcessed(bench.bytes_per_iteration() * state.iterations());
state.SetItemsProcessed(bench.items_per_iteration() * state.iterations());
state.counters["best_docs_per_sec"] = benchmark::Counter(double(bench.documents_per_iteration()) / events.best.elapsed_sec());
state.counters["best_bytes_per_sec"] = benchmark::Counter(double(bench.bytes_per_iteration()) / events.best.elapsed_sec());
state.counters["best_items_per_sec"] = benchmark::Counter(double(bench.items_per_iteration()) / events.best.elapsed_sec());
state.counters["docs_per_sec"] = benchmark::Counter(double(bench.documents_per_iteration()), benchmark::Counter::kIsIterationInvariantRate);
if (collector.has_events()) {
state.counters["instructions"] = events.instructions();
state.counters["cycles"] = events.cycles();
state.counters["branch_miss"] = events.branch_misses();
state.counters["cache_miss"] = events.cache_misses();
state.counters["cache_ref"] = events.cache_references();
state.counters["instructions_per_byte"] = events.instructions() / double(bench.bytes_per_iteration());
state.counters["instructions_per_cycle"] = events.instructions() / events.cycles();
state.counters["cycles_per_byte"] = events.cycles() / double(bench.bytes_per_iteration());
state.counters["frequency"] = benchmark::Counter(events.cycles(), benchmark::Counter::kIsIterationInvariantRate);
state.counters["best_instructions"] = events.best.instructions();
state.counters["best_cycles"] = events.best.cycles();
state.counters["best_branch_miss"] = events.best.branch_misses();
state.counters["best_cache_miss"] = events.best.cache_misses();
state.counters["best_cache_ref"] = events.best.cache_references();
state.counters["best_instructions_per_byte"] = events.best.instructions() / double(bench.bytes_per_iteration());
state.counters["best_instructions_per_cycle"] = events.best.instructions() / events.best.cycles();
state.counters["best_cycles_per_byte"] = events.best.cycles() / double(bench.bytes_per_iteration());
state.counters["best_frequency"] = events.best.cycles() / events.best.elapsed_sec();
}
state.counters["bytes"] = benchmark::Counter(double(bench.bytes_per_iteration()));
state.counters["items"] = benchmark::Counter(double(bench.items_per_iteration()));
// Build the label
using namespace std;
stringstream label;
label << fixed << setprecision(2);
label << "[BEST:";
label << " throughput=" << setw(6) << (double(bench.bytes_per_iteration()) / 1000000000.0 / events.best.elapsed_sec()) << " GB/s";
label << " doc_throughput=" << setw(6) << uint64_t(bench.documents_per_iteration() / events.best.elapsed_sec()) << " docs/s";
if (collector.has_events()) {
label << " instructions=" << setw(12) << uint64_t(events.best.instructions()) << setw(0);
label << " cycles=" << setw(12) << uint64_t(events.best.cycles()) << setw(0);
label << " branch_miss=" << setw(8) << uint64_t(events.best.branch_misses()) << setw(0);
label << " cache_miss=" << setw(8) << uint64_t(events.best.cache_misses()) << setw(0);
label << " cache_ref=" << setw(10) << uint64_t(events.best.cache_references()) << setw(0);
}
label << " items=" << setw(10) << bench.items_per_iteration() << setw(0);
label << " avg_time=" << setw(10) << uint64_t(events.elapsed_ns()) << setw(0) << " ns";
label << "]";
state.SetLabel(label.str());
}
} // namespace json_benchmark

View File

@ -0,0 +1,42 @@
#pragma once
#include "constants.h"
#include "run_json_benchmark.h"
#include "diff_results.h"
namespace json_benchmark {
//
// Extend this to create a new type of test (e.g. partial_tweets).
//
template<typename I>
struct runner_base {
public:
/** Run once, before all iterations. */
simdjson_warn_unused bool setup(benchmark::State &) { return true; }
/** Run on each iteration. This is what gets benchmarked. */
simdjson_warn_unused bool run(benchmark::State &state) {
return implementation.run(state);
}
/** Called before each iteration, to clear / set up state. */
simdjson_warn_unused bool before_run(benchmark::State &state) { return true; }
/** Called after each iteration, to tear down / massage state. */
simdjson_warn_unused bool after_run(benchmark::State &) { return true; }
/** Get the total number of bytes processed in each iteration. Used for metrics like bytes/second. */
size_t bytes_per_iteration();
/** Get the total number of documents processed in each iteration. Used for metrics like documents/second. */
size_t documents_per_iteration();
/** Get the total number of items processed in each iteration. Used for metrics like items/second. */
size_t items_per_iteration();
protected:
I implementation{};
};
}

View File

@ -1,69 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "kostya.h"
namespace kostya {
using namespace simdjson;
class Dom {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
dom::parser parser{};
std::vector<my_point> container{};
};
simdjson_really_inline bool Dom::Run(const padded_string &json) {
container.clear();
for (auto point : parser.parse(json)["coordinates"]) {
container.emplace_back(my_point{point["x"], point["y"], point["z"]});
}
return true;
}
BENCHMARK_TEMPLATE(Kostya, Dom);
namespace sum {
class Dom {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline my_point &Result() { return sum; }
simdjson_really_inline size_t ItemCount() { return count; }
private:
dom::parser parser{};
my_point sum{};
size_t count{};
};
simdjson_really_inline bool Dom::Run(const padded_string &json) {
sum = { 0, 0, 0 };
count = 0;
for (auto coord : parser.parse(json)["coordinates"]) {
sum.x += double(coord["x"]);
sum.y += double(coord["y"]);
sum.z += double(coord["z"]);
count++;
}
return true;
}
BENCHMARK_TEMPLATE(KostyaSum, Dom);
} // namespace sum
} // namespace kostya
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,96 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "kostya.h"
namespace kostya {
using namespace simdjson;
using namespace simdjson::builtin;
class Iter {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
simdjson_really_inline simdjson_result<double> first_double(ondemand::json_iterator &iter, const char *key) {
if (!iter.start_object() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
return iter.consume_double();
}
simdjson_really_inline simdjson_result<double> next_double(ondemand::json_iterator &iter, const char *key) {
if (!iter.has_next_field() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
return iter.consume_double();
}
};
simdjson_really_inline bool Iter::Run(const padded_string &json) {
container.clear();
using std::cerr;
using std::endl;
auto iter = parser.iterate_raw(json).value();
if (!iter.start_object() || !iter.find_field_raw("coordinates")) { cerr << "find coordinates field failed" << endl; return false; }
if (iter.start_array()) {
do {
container.emplace_back(my_point{first_double(iter, "x"), next_double(iter, "y"), next_double(iter, "z")});
if (iter.skip_container()) { return false; } // Skip the rest of the coordinates object
} while (iter.has_next_element());
}
return true;
}
BENCHMARK_TEMPLATE(Kostya, Iter);
namespace sum {
class Iter {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline my_point &Result() { return sum; }
simdjson_really_inline size_t ItemCount() { return count; }
private:
ondemand::parser parser{};
my_point sum{};
size_t count{};
};
simdjson_really_inline bool Iter::Run(const padded_string &json) {
sum = {0,0,0};
count = 0;
auto iter = parser.iterate_raw(json).value();
if (!iter.start_object() || !iter.find_field_raw("coordinates")) { return false; }
if (!iter.start_array()) { return false; }
do {
if (!iter.start_object() || !iter.find_field_raw("x")) { return false; }
sum.x += iter.consume_double();
if (!iter.has_next_field() || !iter.find_field_raw("y")) { return false; }
sum.y += iter.consume_double();
if (!iter.has_next_field() || !iter.find_field_raw("z")) { return false; }
sum.z += iter.consume_double();
if (iter.skip_container()) { return false; } // Skip the rest of the coordinates object
count++;
} while (iter.has_next_element());
return true;
}
BENCHMARK_TEMPLATE(KostyaSum, Iter);
} // namespace sum
} // namespace kostya
#endif // SIMDJSON_EXCEPTIONS

View File

@ -2,17 +2,51 @@
#if SIMDJSON_EXCEPTIONS
//
// Interface
//
#include "json_benchmark/const_json_runner.h"
#include <vector>
#include <random>
namespace kostya {
template<typename T> static void Kostya(benchmark::State &state);
namespace sum {
template<typename T> static void KostyaSum(benchmark::State &state);
static const simdjson::padded_string &get_built_json_array();
struct point {
double x;
double y;
double z;
simdjson_really_inline bool operator==(const point &other) const {
return x == other.x && y == other.y && z == other.z;
}
simdjson_really_inline bool operator!=(const point &other) const {
return !(*this == other);
}
};
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
return o << p.x << "," << p.y << "," << p.z << std::endl;
}
using namespace simdjson;
template<typename I>
struct runner : public json_benchmark::const_json_runner<I> {
std::vector<point> points;
public:
runner() : json_benchmark::const_json_runner<I>(get_built_json_array()) {}
bool before_run(benchmark::State &state) {
points.clear();
return true;
}
bool run(benchmark::State &) {
return this->implementation.run(this->json, points);
}
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return diff_results(state, points, reference.points);
}
};
static void append_coordinate(std::default_random_engine &e, std::uniform_real_distribution<> &dis, std::stringstream &myss) {
using std::endl;
@ -49,45 +83,15 @@ static std::string build_json_array(size_t N) {
return answer;
}
static const padded_string &get_built_json_array() {
static padded_string json = build_json_array(524288);
static const simdjson::padded_string &get_built_json_array() {
static simdjson::padded_string json = build_json_array(524288);
return json;
}
struct my_point {
double x;
double y;
double z;
simdjson_really_inline bool operator==(const my_point &other) const {
return x == other.x && y == other.y && z == other.z;
}
simdjson_really_inline bool operator!=(const my_point &other) const { return !(*this == other); }
};
struct simdjson_dom;
simdjson_unused static std::ostream &operator<<(std::ostream &o, const my_point &p) {
return o << p.x << "," << p.y << "," << p.z << std::endl;
}
} // namespace kostya
//
// Implementation
//
#include <vector>
#include "event_counter.h"
#include "dom.h"
#include "json_benchmark.h"
namespace kostya {
template<typename T> static void Kostya(benchmark::State &state) {
JsonBenchmark<T, Dom>(state, get_built_json_array());
}
namespace sum {
template<typename T> static void KostyaSum(benchmark::State &state) {
JsonBenchmark<T, Dom>(state, get_built_json_array());
}
template<typename I> simdjson_really_inline static void kostya(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace kostya

View File

@ -1,74 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "kostya.h"
namespace kostya {
using namespace simdjson;
using namespace simdjson::builtin;
class OnDemand {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
container.clear();
using std::cout;
using std::endl;
auto doc = parser.iterate(json);
for (ondemand::object coord : doc.find_field("coordinates")) {
container.emplace_back(my_point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
}
return true;
}
BENCHMARK_TEMPLATE(Kostya, OnDemand);
namespace sum {
class OnDemand {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline my_point &Result() { return sum; }
simdjson_really_inline size_t ItemCount() { return count; }
private:
ondemand::parser parser{};
my_point sum{};
size_t count{};
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
sum = {0,0,0};
count = 0;
auto doc = parser.iterate(json);
for (ondemand::object coord : doc.find_field("coordinates")) {
sum.x += double(coord.find_field("x"));
sum.y += double(coord.find_field("y"));
sum.z += double(coord.find_field("z"));
count++;
}
return true;
}
BENCHMARK_TEMPLATE(KostyaSum, OnDemand);
} // namespace sum
} // namespace kostya
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,26 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "kostya.h"
namespace kostya {
using namespace simdjson;
class simdjson_dom {
dom::parser parser{};
public:
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
for (auto point : parser.parse(json)["coordinates"]) {
points.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
}
return true;
}
};
BENCHMARK_TEMPLATE(kostya, simdjson_dom);
} // namespace kostya
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,28 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "kostya.h"
namespace kostya {
using namespace simdjson;
using namespace simdjson::builtin;
class simdjson_ondemand {
ondemand::parser parser{};
public:
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
auto doc = parser.iterate(json);
for (ondemand::object point : doc.find_field("coordinates")) {
points.emplace_back(kostya::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
}
return true;
}
};
BENCHMARK_TEMPLATE(kostya, simdjson_ondemand);
} // namespace kostya
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,28 +1,19 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_YYJSON
#include "kostya.h"
namespace kostya {
class Yyjson {
class yyjson {
public:
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
return yyjson_get_real(val);
}
public:
simdjson_really_inline bool Run(const padded_string &json) {
container.clear();
// Walk the document, parsing the tweets as we go
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
if (!doc) { return false; }
yyjson_val *root = yyjson_doc_get_root(doc);
@ -31,12 +22,16 @@ public:
size_t idx, max;
yyjson_val *coord;
yyjson_arr_foreach(coords, idx, max, coord) {
container.emplace_back(my_point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;
}
};
BENCHMARK_TEMPLATE(Kostya, Yyjson);
BENCHMARK_TEMPLATE(kostya, yyjson);
} // namespace kostya
#endif // SIMDJSON_COMPETITION_YYJSON

View File

@ -0,0 +1,78 @@
#pragma once
#include "json_benchmark/const_json_runner.h"
#include <random>
namespace large_random {
static const simdjson::padded_string &get_built_json_array();
struct point {
double x;
double y;
double z;
simdjson_really_inline bool operator==(const point &other) const {
return x == other.x && y == other.y && z == other.z;
}
simdjson_really_inline bool operator!=(const point &other) const {
return !(*this == other);
}
};
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
return o << p.x << "," << p.y << "," << p.z << std::endl;
}
template<typename I>
struct runner : public json_benchmark::const_json_runner<I> {
std::vector<point> points;
public:
runner() : json_benchmark::const_json_runner<I>(get_built_json_array()) {}
bool before_run(benchmark::State &state) {
points.clear();
return true;
}
bool run(benchmark::State &) {
return this->implementation.run(this->json, points);
}
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return diff_results(state, points, reference.points);
}
};
static std::string build_json_array(size_t N) {
std::default_random_engine e;
std::uniform_real_distribution<> dis(0, 1);
std::stringstream myss;
myss << "[" << std::endl;
if(N > 0) {
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
}
for(size_t i = 1; i < N; i++) {
myss << "," << std::endl;
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
}
myss << std::endl;
myss << "]" << std::endl;
std::string answer = myss.str();
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
return answer;
}
static const simdjson::padded_string &get_built_json_array() {
static simdjson::padded_string json = build_json_array(1000000);
return json;
}
struct simdjson_dom;
template<typename T> static void large_random(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
}
} // namespace large_random

View File

@ -0,0 +1,26 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "large_random.h"
namespace large_random {
using namespace simdjson;
class simdjson_dom {
dom::parser parser{};
public:
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
for (auto point : parser.parse(json)) {
points.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
}
return true;
}
};
BENCHMARK_TEMPLATE(large_random, simdjson_dom);
} // namespace large_random
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,28 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "large_random.h"
namespace large_random {
using namespace simdjson;
using namespace simdjson::builtin;
class simdjson_ondemand {
ondemand::parser parser{};
public:
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
auto doc = parser.iterate(json);
for (ondemand::object coord : doc) {
points.emplace_back(point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
}
return true;
}
};
BENCHMARK_TEMPLATE(large_random, simdjson_ondemand);
} // namespace large_random
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,28 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "large_random.h"
namespace large_random {
using namespace simdjson;
using namespace simdjson::builtin;
class simdjson_ondemand_unordered {
ondemand::parser parser{};
public:
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
auto doc = parser.iterate(json);
for (ondemand::object coord : doc) {
points.emplace_back(large_random::point{coord["x"], coord["y"], coord["z"]});
}
return true;
}
};
BENCHMARK_TEMPLATE(large_random, simdjson_ondemand_unordered);
} // namespace large_random
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,17 +1,13 @@
#pragma once
#include "largerandom.h"
#ifdef SIMDJSON_COMPETITION_YYJSON
namespace largerandom {
#include "large_random.h"
class Yyjson {
public:
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
namespace large_random {
private:
class yyjson {
ondemand::parser parser{};
std::vector<my_point> container{};
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
@ -19,9 +15,7 @@ private:
}
public:
simdjson_really_inline bool Run(const padded_string &json) {
container.clear();
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
// Walk the document, parsing the tweets as we go
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
if (!doc) { return false; }
@ -30,12 +24,14 @@ public:
size_t idx, max;
yyjson_val *coord;
yyjson_arr_foreach(coords, idx, max, coord) {
container.emplace_back(my_point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;
}
};
BENCHMARK_TEMPLATE(LargeRandom, Yyjson);
BENCHMARK_TEMPLATE(large_random, yyjson);
} // namespace kostya
} // namespace large_random
#endif // SIMDJSON_COMPETITION_YYJSON

View File

@ -1,5 +0,0 @@
if (TARGET benchmark::benchmark)
link_libraries(benchmark::benchmark)
add_executable(bench_ondemand_largerandom bench_ondemand_largerandom.cpp)
add_executable(bench_ondemand_unordered_largerandom bench_ondemand_unordered_largerandom.cpp)
endif()

View File

@ -1,14 +0,0 @@
#include "simdjson.h"
#include <iostream>
#include <sstream>
#include <random>
#include <vector>
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#include <benchmark/benchmark.h>
SIMDJSON_POP_DISABLE_WARNINGS
#define BENCHMARK_NO_DOM
#include "largerandom/ondemand.h"
BENCHMARK_MAIN();

View File

@ -1,14 +0,0 @@
#include "simdjson.h"
#include <iostream>
#include <sstream>
#include <random>
#include <vector>
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#include <benchmark/benchmark.h>
SIMDJSON_POP_DISABLE_WARNINGS
#define BENCHMARK_NO_DOM
#include "largerandom/ondemand_unordered.h"
BENCHMARK_MAIN();

View File

@ -1,37 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
class Dom {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
dom::parser parser{};
std::vector<my_point> container{};
};
simdjson_really_inline bool Dom::Run(const padded_string &json) {
container.clear();
for (auto point : parser.parse(json)) {
container.emplace_back(my_point{point["x"], point["y"], point["z"]});
}
return true;
}
BENCHMARK_TEMPLATE(LargeRandom, Dom);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,53 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
using namespace simdjson::builtin;
class Iter {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
simdjson_really_inline double first_double(ondemand::json_iterator &iter) {
if (iter.start_object().error() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
return iter.consume_double();
}
simdjson_really_inline double next_double(ondemand::json_iterator &iter) {
if (!iter.has_next_field() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
return iter.consume_double();
}
};
simdjson_really_inline bool Iter::Run(const padded_string &json) {
container.clear();
auto iter = parser.iterate_raw(json).value();
if (iter.start_array()) {
do {
container.emplace_back(my_point{first_double(iter), next_double(iter), next_double(iter)});
if (iter.has_next_field()) { throw "Too many fields"; }
} while (iter.has_next_element());
}
return true;
}
BENCHMARK_TEMPLATE(LargeRandom, Iter);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,72 +0,0 @@
#pragma once
//
// Interface
//
namespace largerandom {
template<typename T> static void LargeRandom(benchmark::State &state);
using namespace simdjson;
static std::string build_json_array(size_t N) {
std::default_random_engine e;
std::uniform_real_distribution<> dis(0, 1);
std::stringstream myss;
myss << "[" << std::endl;
if(N > 0) {
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
}
for(size_t i = 1; i < N; i++) {
myss << "," << std::endl;
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
}
myss << std::endl;
myss << "]" << std::endl;
std::string answer = myss.str();
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
return answer;
}
static const padded_string &get_built_json_array() {
static padded_string json = build_json_array(1000000);
return json;
}
struct my_point {
double x;
double y;
double z;
simdjson_really_inline bool operator==(const my_point &other) const {
return x == other.x && y == other.y && z == other.z;
}
simdjson_really_inline bool operator!=(const my_point &other) const { return !(*this == other); }
};
simdjson_unused static std::ostream &operator<<(std::ostream &o, const my_point &p) {
return o << p.x << "," << p.y << "," << p.z << std::endl;
}
} // namespace largerandom
//
// Implementation
//
#include <vector>
#include "event_counter.h"
#ifndef BENCHMARK_NO_DOM
#include "dom.h"
#endif
#include "json_benchmark.h"
namespace largerandom {
template<typename T> static void LargeRandom(benchmark::State &state) {
#ifdef BENCHMARK_NO_DOM
JsonBenchmark<T, T>(state, get_built_json_array());
#else
JsonBenchmark<T, Dom>(state, get_built_json_array());
#endif
}
} // namespace largerandom

View File

@ -1,38 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
using namespace simdjson::builtin;
class OnDemand {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
container.clear();
auto doc = parser.iterate(json);
for (ondemand::object coord : doc) {
container.emplace_back(my_point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
}
return true;
}
BENCHMARK_TEMPLATE(LargeRandom, OnDemand);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,38 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
using namespace simdjson::builtin;
class OnDemandUnordered {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
};
simdjson_really_inline bool OnDemandUnordered::Run(const padded_string &json) {
container.clear();
auto doc = parser.iterate(json);
for (ondemand::object coord : doc) {
container.emplace_back(my_point{coord["x"], coord["y"], coord["z"]});
}
return true;
}
BENCHMARK_TEMPLATE(LargeRandom, OnDemandUnordered);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,121 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
using namespace simdjson::builtin;
using namespace simdjson::builtin::stage2;
class Sax {
public:
simdjson_really_inline bool Run(const padded_string &json) noexcept;
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
simdjson_really_inline error_code RunNoExcept(const padded_string &json) noexcept;
error_code Allocate(size_t new_capacity);
std::unique_ptr<uint8_t[]> string_buf{};
size_t capacity{};
dom_parser_implementation dom_parser{};
std::vector<my_point> container{};
};
struct sax_point_reader_visitor {
public:
std::vector<my_point> &points;
enum {GOT_X=0, GOT_Y=1, GOT_Z=2, GOT_SOMETHING_ELSE=4};
size_t idx{GOT_SOMETHING_ELSE};
double buffer[3]={};
explicit sax_point_reader_visitor(std::vector<my_point> &_points) : points(_points) {}
simdjson_really_inline error_code visit_object_start(json_iterator &) {
idx = 0;
return SUCCESS;
}
simdjson_really_inline error_code visit_primitive(json_iterator &, const uint8_t *value) {
if(idx == GOT_SOMETHING_ELSE) { return simdjson::SUCCESS; }
return numberparsing::parse_double(value).get(buffer[idx]);
}
simdjson_really_inline error_code visit_object_end(json_iterator &) {
points.emplace_back(my_point{buffer[0], buffer[1], buffer[2]});
return SUCCESS;
}
simdjson_really_inline error_code visit_document_start(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t * key) {
switch(key[1]) {
// Technically, we should check the other characters
// in the key, but we are cheating to go as fast
// as possible.
case 'x':
idx = GOT_X;
break;
case 'y':
idx = GOT_Y;
break;
case 'z':
idx = GOT_Z;
break;
default:
idx = GOT_SOMETHING_ELSE;
}
return SUCCESS;
}
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_document_end(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_empty_array(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_empty_object(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_root_primitive(json_iterator &, const uint8_t *) { return SUCCESS; }
simdjson_really_inline error_code increment_count(json_iterator &) { return SUCCESS; }
};
// NOTE: this assumes the dom_parser is already allocated
bool Sax::Run(const padded_string &json) noexcept {
auto error = RunNoExcept(json);
if (error) { std::cerr << error << std::endl; return false; }
return true;
}
error_code Sax::RunNoExcept(const padded_string &json) noexcept {
container.clear();
// Allocate capacity if needed
if (capacity < json.size()) {
SIMDJSON_TRY( Allocate(json.size()) );
}
// Run stage 1 first.
SIMDJSON_TRY( dom_parser.stage1(json.u8data(), json.size(), false) );
// Then walk the document, parsing the tweets as we go
json_iterator iter(dom_parser, 0);
sax_point_reader_visitor visitor(container);
SIMDJSON_TRY( iter.walk_document<false>(visitor) );
return SUCCESS;
}
error_code Sax::Allocate(size_t new_capacity) {
// string_capacity copied from document::allocate
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
if (capacity == 0) { // set max depth the first time only
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
}
capacity = new_capacity;
return SUCCESS;
}
BENCHMARK_TEMPLATE(LargeRandom, Sax);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,51 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "partial_tweets.h"
namespace partial_tweets {
using namespace simdjson;
class Dom {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
private:
dom::parser parser{};
std::vector<tweet> tweets{};
simdjson_really_inline uint64_t nullable_int(dom::element element) {
if (element.is_null()) { return 0; }
return element;
}
};
simdjson_really_inline bool Dom::Run(const padded_string &json) {
tweets.clear();
for (dom::element tweet : parser.parse(json)["statuses"]) {
auto user = tweet["user"];
tweets.emplace_back(partial_tweets::tweet{
tweet["created_at"],
tweet["id"],
tweet["text"],
nullable_int(tweet["in_reply_to_status_id"]),
{ user["id"], user["screen_name"] },
tweet["retweet_count"],
tweet["favorite_count"]
});
}
return true;
}
BENCHMARK_TEMPLATE(PartialTweets, Dom);
} // namespace partial_tweets
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,64 +0,0 @@
#pragma once
#include "partial_tweets.h"
namespace partial_tweets {
using namespace simdjson;
class DomNoExcept {
public:
simdjson_really_inline bool Run(const simdjson::padded_string &json) noexcept;
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
private:
dom::parser parser{};
std::vector<tweet> tweets{};
simdjson_really_inline simdjson_result<uint64_t> nullable_int(simdjson_result<dom::element> result) noexcept {
dom::element element;
SIMDJSON_TRY( result.get(element) );
if (element.is_null()) { return 0; }
return element.get_uint64();
}
simdjson_really_inline error_code RunNoExcept(const simdjson::padded_string &json) noexcept;
};
simdjson_really_inline bool DomNoExcept::Run(const simdjson::padded_string &json) noexcept {
auto error = RunNoExcept(json);
if (error) { std::cerr << error << std::endl; return false; }
return true;
}
simdjson_really_inline error_code DomNoExcept::RunNoExcept(const simdjson::padded_string &json) noexcept {
tweets.clear();
dom::array tweet_array;
SIMDJSON_TRY( parser.parse(json)["statuses"].get_array().get(tweet_array) );
for (auto tweet_element : tweet_array) {
dom::object tweet;
SIMDJSON_TRY( tweet_element.get_object().get(tweet) );
dom::object user;
SIMDJSON_TRY( tweet["user"].get_object().get(user) );
partial_tweets::tweet t;
SIMDJSON_TRY( tweet["created_at"] .get_string().get(t.created_at) );
SIMDJSON_TRY( tweet["id"] .get_uint64().get(t.id) );
SIMDJSON_TRY( tweet["text"] .get_string().get(t.text) );
SIMDJSON_TRY( nullable_int(tweet["in_reply_to_status_id"]).get(t.in_reply_to_status_id) );
SIMDJSON_TRY( user["id"] .get_uint64().get(t.user.id) );
SIMDJSON_TRY( user["screen_name"] .get_string().get(t.user.screen_name) );
SIMDJSON_TRY( tweet["retweet_count"] .get_uint64().get(t.retweet_count) );
SIMDJSON_TRY( tweet["favorite_count"].get_uint64().get(t.favorite_count) );
tweets.push_back(t);
}
return SUCCESS;
}
} // namespace partial_tweets

View File

@ -1,93 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "partial_tweets.h"
namespace partial_tweets {
using namespace simdjson;
using namespace simdjson::builtin;
class Iter {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
private:
ondemand::parser parser{};
std::vector<tweet> tweets{};
simdjson_really_inline uint64_t nullable_int(ondemand::value && value) {
if (value.is_null()) { return 0; }
return std::move(value);
}
simdjson_really_inline twitter_user read_user(ondemand::object && user) {
// Move user into a local object so it gets destroyed (and moves the iterator)
ondemand::object u = std::move(user);
return { u["id"], u["screen_name"] };
}
};
simdjson_really_inline bool Iter::Run(const padded_string &json) {
tweets.clear();
// Walk the document, parsing the tweets as we go
// { "statuses":
auto iter = parser.iterate_raw(json).value();
if (!iter.start_object() || !iter.find_field_raw("statuses")) { return false; }
// { "statuses": [
if (!iter.start_array()) { return false; }
do {
tweet tweet;
if (!iter.start_object() || !iter.find_field_raw("created_at")) { return false; }
tweet.created_at = iter.consume_string();
if (!iter.has_next_field() || !iter.find_field_raw("id")) { return false; }
tweet.id = iter.consume_uint64();
if (!iter.has_next_field() || !iter.find_field_raw("text")) { return false; }
tweet.text = iter.consume_string();
if (!iter.has_next_field() || !iter.find_field_raw("in_reply_to_status_id")) { return false; }
if (!iter.is_null()) {
tweet.in_reply_to_status_id = iter.consume_uint64();
}
if (!iter.has_next_field() || !iter.find_field_raw("user")) { return false; }
{
if (!iter.start_object() || !iter.find_field_raw("id")) { return false; }
tweet.user.id = iter.consume_uint64();
if (!iter.has_next_field() || !iter.find_field_raw("screen_name")) { return false; }
tweet.user.screen_name = iter.consume_string();
if (iter.skip_container()) { return false; } // Skip the rest of the user object
}
if (!iter.has_next_field() || !iter.find_field_raw("retweet_count")) { return false; }
tweet.retweet_count = iter.consume_uint64();
if (!iter.has_next_field() || !iter.find_field_raw("favorite_count")) { return false; }
tweet.favorite_count = iter.consume_uint64();
tweets.push_back(tweet);
if (iter.skip_container()) { return false; } // Skip the rest of the tweet object
} while (iter.has_next_element());
return true;
}
BENCHMARK_TEMPLATE(PartialTweets, Iter);
} // namespace partial_tweets
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,64 +0,0 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "partial_tweets.h"
namespace partial_tweets {
using namespace simdjson;
using namespace simdjson::builtin;
class OnDemand {
public:
OnDemand() {
if(!displayed_implementation) {
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
displayed_implementation = true;
}
}
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
private:
ondemand::parser parser{};
std::vector<tweet> tweets{};
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
if (value.is_null()) { return 0; }
return value;
}
simdjson_really_inline twitter_user read_user(ondemand::object user) {
return { user.find_field("id"), user.find_field("screen_name") };
}
static inline bool displayed_implementation = false;
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
tweets.clear();
// Walk the document, parsing the tweets as we go
auto doc = parser.iterate(json);
for (ondemand::object tweet : doc.find_field("statuses")) {
tweets.emplace_back(partial_tweets::tweet{
tweet.find_field("created_at"),
tweet.find_field("id"),
tweet.find_field("text"),
nullable_int(tweet.find_field("in_reply_to_status_id")),
read_user(tweet.find_field("user")),
tweet.find_field("retweet_count"),
tweet.find_field("favorite_count")
});
}
return true;
}
BENCHMARK_TEMPLATE(PartialTweets, OnDemand);
} // namespace partial_tweets
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,41 +1,44 @@
#pragma once
//
// Interface
//
namespace partial_tweets {
template<typename T> static void PartialTweets(benchmark::State &state);
} // namespace partial_tweets
//
// Implementation
//
#include "json_benchmark/file_runner.h"
#include "tweet.h"
#include <vector>
#include "event_counter.h"
#include "domnoexcept.h"
#include "json_benchmark.h"
namespace partial_tweets {
using namespace simdjson;
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
std::vector<tweet> tweets{};
template<typename T> static void PartialTweets(benchmark::State &state) {
//
// Load the JSON file
//
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
error_code error;
padded_string json;
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
std::cerr << error << std::endl;
state.SkipWithError("error loading");
return;
public:
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);
}
JsonBenchmark<T, DomNoExcept>(state, json);
bool before_run(benchmark::State &state) {
tweets.clear();
return true;
}
bool run(benchmark::State &) {
return this->implementation.run(this->json, tweets);
}
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return diff_results(state, tweets, reference.tweets);
}
size_t items_per_iteration() {
return tweets.size();
}
};
struct simdjson_dom;
template<typename I> simdjson_really_inline static void partial_tweets(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace partial_tweets

View File

@ -1,69 +0,0 @@
#pragma once
#include "partial_tweets.h"
#include "sax_tweet_reader_visitor.h"
namespace partial_tweets {
using namespace simdjson;
using namespace simdjson::builtin;
using namespace simdjson::builtin::stage2;
class Sax {
public:
simdjson_really_inline bool Run(const padded_string &json) noexcept;
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
private:
simdjson_really_inline error_code RunNoExcept(const padded_string &json) noexcept;
error_code Allocate(size_t new_capacity);
std::unique_ptr<uint8_t[]> string_buf{};
size_t capacity{};
dom_parser_implementation dom_parser{};
std::vector<tweet> tweets{};
};
// NOTE: this assumes the dom_parser is already allocated
bool Sax::Run(const padded_string &json) noexcept {
auto error = RunNoExcept(json);
if (error) { std::cerr << error << std::endl; return false; }
return true;
}
error_code Sax::RunNoExcept(const padded_string &json) noexcept {
tweets.clear();
// Allocate capacity if needed
if (capacity < json.size()) {
SIMDJSON_TRY( Allocate(json.size()) );
}
// Run stage 1 first.
SIMDJSON_TRY( dom_parser.stage1((uint8_t *)json.data(), json.size(), false) );
// Then walk the document, parsing the tweets as we go
json_iterator iter(dom_parser, 0);
sax_tweet_reader_visitor visitor(tweets, string_buf.get());
SIMDJSON_TRY( iter.walk_document<false>(visitor) );
return SUCCESS;
}
error_code Sax::Allocate(size_t new_capacity) {
// string_capacity copied from document::allocate
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
if (capacity == 0) { // set max depth the first time only
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
}
capacity = new_capacity;
return SUCCESS;
}
BENCHMARK_TEMPLATE(PartialTweets, Sax);
} // namespace partial_tweets

View File

@ -1,514 +0,0 @@
#pragma once
#include "simdjson.h"
#include "tweet.h"
#include <vector>
namespace partial_tweets {
using namespace simdjson;
using namespace simdjson::builtin;
using namespace simdjson::builtin::stage2;
struct sax_tweet_reader_visitor {
public:
simdjson_really_inline sax_tweet_reader_visitor(std::vector<tweet> &tweets, uint8_t *string_buf);
simdjson_really_inline error_code visit_document_start(json_iterator &iter);
simdjson_really_inline error_code visit_object_start(json_iterator &iter);
simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key);
simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value);
simdjson_really_inline error_code visit_array_start(json_iterator &iter);
simdjson_really_inline error_code visit_array_end(json_iterator &iter);
simdjson_really_inline error_code visit_object_end(json_iterator &iter);
simdjson_really_inline error_code visit_document_end(json_iterator &iter);
simdjson_really_inline error_code visit_empty_array(json_iterator &iter);
simdjson_really_inline error_code visit_empty_object(json_iterator &iter);
simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value);
simdjson_really_inline error_code increment_count(json_iterator &iter);
private:
// Since we only care about one thing at each level, we just use depth as the marker for what
// object/array we're nested inside.
enum class containers {
document = 0, //
top_object = 1, // {
statuses = 2, // { "statuses": [
tweet = 3, // { "statuses": [ {
user = 4 // { "statuses": [ { "user": {
};
/**
* The largest depth we care about.
* There can be things at lower depths.
*/
static constexpr uint32_t MAX_SUPPORTED_DEPTH = uint32_t(containers::user);
static constexpr const char *STATE_NAMES[] = {
"document",
"top object",
"statuses",
"tweet",
"user"
};
enum class field_type {
any,
unsigned_integer,
string,
nullable_unsigned_integer,
object,
array
};
struct field {
const char * key{};
size_t len{0};
size_t offset;
containers container{containers::document};
field_type type{field_type::any};
};
std::vector<tweet> &tweets;
containers container{containers::document};
uint8_t *current_string_buf_loc;
const uint8_t *current_key{};
simdjson_really_inline bool in_container(json_iterator &iter);
simdjson_really_inline bool in_container_child(json_iterator &iter);
simdjson_really_inline void start_container(json_iterator &iter);
simdjson_really_inline void end_container(json_iterator &iter);
simdjson_really_inline error_code parse_nullable_unsigned(json_iterator &iter, const uint8_t *value, const field &f);
simdjson_really_inline error_code parse_unsigned(json_iterator &iter, const uint8_t *value, const field &f);
simdjson_really_inline error_code parse_string(json_iterator &iter, const uint8_t *value, const field &f);
struct field_lookup {
field entries[256]{};
field_lookup();
simdjson_really_inline field get(const uint8_t * key, containers container);
private:
simdjson_really_inline uint8_t hash(const char * key, uint32_t depth);
simdjson_really_inline void add(const char * key, size_t len, containers container, field_type type, size_t offset);
simdjson_really_inline void neg(const char * const key, uint32_t depth);
};
static field_lookup fields;
}; // sax_tweet_reader_visitor
simdjson_really_inline sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *_string_buf)
: tweets{_tweets},
current_string_buf_loc{_string_buf} {
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_start(json_iterator &iter) {
start_container(iter);
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_start(json_iterator &iter) {
// If we're not in a container we care about, don't bother with the rest
if (!in_container_child(iter)) { return SUCCESS; }
// Handle fields first
if (current_key) {
switch (fields.get(current_key, container).type) {
case field_type::array: // { "statuses": [
start_container(iter);
current_key = nullptr;
return SUCCESS;
case field_type::any:
return SUCCESS;
case field_type::object:
case field_type::unsigned_integer:
case field_type::nullable_unsigned_integer:
case field_type::string:
iter.log_error("unexpected array field");
return INCORRECT_TYPE;
}
}
// We're not in a field, so it must be a child of an array. We support any of those.
iter.log_error("unexpected array");
return INCORRECT_TYPE;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_object_start(json_iterator &iter) {
// If we're not in a container we care about, don't bother with the rest
if (!in_container_child(iter)) { return SUCCESS; }
// Handle known fields
if (current_key) {
auto f = fields.get(current_key, container);
switch (f.type) {
case field_type::object: // { "statuses": [ { "user": {
start_container(iter);
return SUCCESS;
case field_type::any:
return SUCCESS;
case field_type::array:
case field_type::unsigned_integer:
case field_type::nullable_unsigned_integer:
case field_type::string:
iter.log_error("unexpected object field");
return INCORRECT_TYPE;
}
}
// It's not a field, so it's a child of an array or document
switch (container) {
case containers::document: // top_object: {
case containers::statuses: // tweet: { "statuses": [ {
start_container(iter);
return SUCCESS;
case containers::top_object:
case containers::tweet:
case containers::user:
iter.log_error("unexpected object");
return INCORRECT_TYPE;
}
SIMDJSON_UNREACHABLE();
return UNINITIALIZED;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_key(json_iterator &, const uint8_t *key) {
current_key = key;
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_primitive(json_iterator &iter, const uint8_t *value) {
// Don't bother unless we're in a container we care about
if (!in_container(iter)) { return SUCCESS; }
// Handle fields first
if (current_key) {
auto f = fields.get(current_key, container);
switch (f.type) {
case field_type::unsigned_integer:
return parse_unsigned(iter, value, f);
case field_type::nullable_unsigned_integer:
return parse_nullable_unsigned(iter, value, f);
case field_type::string:
return parse_string(iter, value, f);
case field_type::any:
return SUCCESS;
case field_type::array:
case field_type::object:
iter.log_error("unexpected primitive");
return INCORRECT_TYPE;
}
current_key = nullptr;
}
// If it's not a field, it's a child of an array.
// The only array we support is statuses, which must contain objects.
iter.log_error("unexpected primitive");
return INCORRECT_TYPE;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_end(json_iterator &iter) {
if (in_container(iter)) { end_container(iter); }
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_object_end(json_iterator &iter) {
current_key = nullptr;
if (in_container(iter)) { end_container(iter); }
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &) {
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_array(json_iterator &) {
current_key = nullptr;
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_object(json_iterator &) {
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_root_primitive(json_iterator &iter, const uint8_t *) {
iter.log_error("unexpected root primitive");
return INCORRECT_TYPE;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::increment_count(json_iterator &) { return SUCCESS; }
simdjson_really_inline bool sax_tweet_reader_visitor::in_container(json_iterator &iter) {
return iter.depth == uint32_t(container);
}
simdjson_really_inline bool sax_tweet_reader_visitor::in_container_child(json_iterator &iter) {
return iter.depth == uint32_t(container) + 1;
}
simdjson_really_inline void sax_tweet_reader_visitor::start_container(json_iterator &iter) {
SIMDJSON_ASSUME(iter.depth <= MAX_SUPPORTED_DEPTH); // Asserts in debug mode
container = containers(iter.depth);
if (logger::LOG_ENABLED) { iter.log_value(STATE_NAMES[iter.depth]); }
if (container == containers::tweet) { tweets.push_back({}); }
}
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &) {
container = containers(int(container) - 1);
}
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
iter.log_value(f.key);
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
if (auto error = numberparsing::parse_unsigned(value).get(*i)) {
// If number parsing failed, check if it's null before returning the error
if (!atomparsing::is_valid_null_atom(value)) { iter.log_error("expected number or null"); return error; }
i = 0;
}
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
iter.log_value(f.key);
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
return numberparsing::parse_unsigned(value).get(*i);
}
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_string(json_iterator &iter, const uint8_t *value, const field &f) {
iter.log_value(f.key);
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
return stringparsing::parse_string_to_buffer(value, current_string_buf_loc, *s);
}
sax_tweet_reader_visitor::field_lookup sax_tweet_reader_visitor::fields{};
simdjson_really_inline uint8_t sax_tweet_reader_visitor::field_lookup::hash(const char * key, uint32_t depth) {
// These shift numbers were chosen specifically because this yields only 2 collisions between
// keys in twitter.json, leaves 0 as a distinct value, and has 0 collisions between keys we
// actually care about.
return uint8_t((key[0] << 0) ^ (key[1] << 3) ^ (key[2] << 3) ^ (key[3] << 1) ^ depth);
}
simdjson_really_inline sax_tweet_reader_visitor::field sax_tweet_reader_visitor::field_lookup::get(const uint8_t * key, containers c) {
auto index = hash((const char *)key, uint32_t(c));
auto entry = entries[index];
// TODO if any key is > SIMDJSON_PADDING, this will access inaccessible memory!
if (c != entry.container || memcmp(key, entry.key, entry.len)) { return entries[0]; }
return entry;
}
simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::add(const char * key, size_t len, containers c, field_type type, size_t offset) {
auto index = hash(key, uint32_t(c));
if (index == 0) {
fprintf(stderr, "%s (depth %d) hashes to zero, which is used as 'missing value'\n", key, int(c));
assert(false);
}
if (entries[index].key) {
fprintf(stderr, "%s (depth %d) collides with %s (depth %d) !\n", key, int(c), entries[index].key, int(entries[index].container));
assert(false);
}
entries[index] = { key, len, offset, c, type };
}
simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const char * const key, uint32_t depth) {
auto index = hash(key, depth);
if (entries[index].key) {
fprintf(stderr, "%s (depth %d) conflicts with %s (depth %d) !\n", key, depth, entries[index].key, int(entries[index].container));
}
}
sax_tweet_reader_visitor::field_lookup::field_lookup() {
add("\"statuses\"", std::strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
#define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
TWEET_FIELD(id, field_type::unsigned_integer);
TWEET_FIELD(in_reply_to_status_id, field_type::nullable_unsigned_integer);
TWEET_FIELD(retweet_count, field_type::unsigned_integer);
TWEET_FIELD(favorite_count, field_type::unsigned_integer);
TWEET_FIELD(text, field_type::string);
TWEET_FIELD(created_at, field_type::string);
TWEET_FIELD(user, field_type::object)
#undef TWEET_FIELD
#define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
USER_FIELD(id, field_type::unsigned_integer);
USER_FIELD(screen_name, field_type::string);
#undef USER_FIELD
// Check for collisions with other (unused) hash keys in typical twitter JSON
#define NEG(key, depth) neg("\"" #key "\"", depth);
NEG(display_url, 9);
NEG(expanded_url, 9);
neg("\"h\":", 9);
NEG(indices, 9);
NEG(resize, 9);
NEG(url, 9);
neg("\"w\":", 9);
NEG(display_url, 8);
NEG(expanded_url, 8);
neg("\"h\":", 8);
NEG(indices, 8);
NEG(large, 8);
NEG(medium, 8);
NEG(resize, 8);
NEG(small, 8);
NEG(thumb, 8);
NEG(url, 8);
neg("\"w\":", 8);
NEG(display_url, 7);
NEG(expanded_url, 7);
NEG(id_str, 7);
NEG(id, 7);
NEG(indices, 7);
NEG(large, 7);
NEG(media_url_https, 7);
NEG(media_url, 7);
NEG(medium, 7);
NEG(name, 7);
NEG(sizes, 7);
NEG(small, 7);
NEG(source_status_id_str, 7);
NEG(source_status_id, 7);
NEG(thumb, 7);
NEG(type, 7);
NEG(url, 7);
NEG(urls, 7);
NEG(description, 6);
NEG(display_url, 6);
NEG(expanded_url, 6);
NEG(id_str, 6);
NEG(id, 6);
NEG(indices, 6);
NEG(media_url_https, 6);
NEG(media_url, 6);
NEG(name, 6);
NEG(sizes, 6);
NEG(source_status_id_str, 6);
NEG(source_status_id, 6);
NEG(type, 6);
NEG(url, 6);
NEG(urls, 6);
NEG(contributors_enabled, 5);
NEG(default_profile_image, 5);
NEG(default_profile, 5);
NEG(description, 5);
NEG(entities, 5);
NEG(favourites_count, 5);
NEG(follow_request_sent, 5);
NEG(followers_count, 5);
NEG(following, 5);
NEG(friends_count, 5);
NEG(geo_enabled, 5);
NEG(hashtags, 5);
NEG(id_str, 5);
NEG(id, 5);
NEG(is_translation_enabled, 5);
NEG(is_translator, 5);
NEG(iso_language_code, 5);
NEG(lang, 5);
NEG(listed_count, 5);
NEG(location, 5);
NEG(media, 5);
NEG(name, 5);
NEG(notifications, 5);
NEG(profile_background_color, 5);
NEG(profile_background_image_url_https, 5);
NEG(profile_background_image_url, 5);
NEG(profile_background_tile, 5);
NEG(profile_banner_url, 5);
NEG(profile_image_url_https, 5);
NEG(profile_image_url, 5);
NEG(profile_link_color, 5);
NEG(profile_sidebar_border_color, 5);
NEG(profile_sidebar_fill_color, 5);
NEG(profile_text_color, 5);
NEG(profile_use_background_image, 5);
NEG(protected, 5);
NEG(result_type, 5);
NEG(statuses_count, 5);
NEG(symbols, 5);
NEG(time_zone, 5);
NEG(url, 5);
NEG(urls, 5);
NEG(user_mentions, 5);
NEG(utc_offset, 5);
NEG(verified, 5);
NEG(contributors_enabled, 4);
NEG(contributors, 4);
NEG(coordinates, 4);
NEG(default_profile_image, 4);
NEG(default_profile, 4);
NEG(description, 4);
NEG(entities, 4);
NEG(favorited, 4);
NEG(favourites_count, 4);
NEG(follow_request_sent, 4);
NEG(followers_count, 4);
NEG(following, 4);
NEG(friends_count, 4);
NEG(geo_enabled, 4);
NEG(geo, 4);
NEG(hashtags, 4);
NEG(id_str, 4);
NEG(in_reply_to_screen_name, 4);
NEG(in_reply_to_status_id_str, 4);
NEG(in_reply_to_user_id_str, 4);
NEG(in_reply_to_user_id, 4);
NEG(is_translation_enabled, 4);
NEG(is_translator, 4);
NEG(iso_language_code, 4);
NEG(lang, 4);
NEG(listed_count, 4);
NEG(location, 4);
NEG(media, 4);
NEG(metadata, 4);
NEG(name, 4);
NEG(notifications, 4);
NEG(place, 4);
NEG(possibly_sensitive, 4);
NEG(profile_background_color, 4);
NEG(profile_background_image_url_https, 4);
NEG(profile_background_image_url, 4);
NEG(profile_background_tile, 4);
NEG(profile_banner_url, 4);
NEG(profile_image_url_https, 4);
NEG(profile_image_url, 4);
NEG(profile_link_color, 4);
NEG(profile_sidebar_border_color, 4);
NEG(profile_sidebar_fill_color, 4);
NEG(profile_text_color, 4);
NEG(profile_use_background_image, 4);
NEG(protected, 4);
NEG(result_type, 4);
NEG(retweeted, 4);
NEG(source, 4);
NEG(statuses_count, 4);
NEG(symbols, 4);
NEG(time_zone, 4);
NEG(truncated, 4);
NEG(url, 4);
NEG(urls, 4);
NEG(user_mentions, 4);
NEG(utc_offset, 4);
NEG(verified, 4);
NEG(contributors, 3);
NEG(coordinates, 3);
NEG(entities, 3);
NEG(favorited, 3);
NEG(geo, 3);
NEG(id_str, 3);
NEG(in_reply_to_screen_name, 3);
NEG(in_reply_to_status_id_str, 3);
NEG(in_reply_to_user_id_str, 3);
NEG(in_reply_to_user_id, 3);
NEG(lang, 3);
NEG(metadata, 3);
NEG(place, 3);
NEG(possibly_sensitive, 3);
NEG(retweeted_status, 3);
NEG(retweeted, 3);
NEG(source, 3);
NEG(truncated, 3);
NEG(completed_in, 2);
NEG(count, 2);
NEG(max_id_str, 2);
NEG(max_id, 2);
NEG(next_results, 2);
NEG(query, 2);
NEG(refresh_url, 2);
NEG(since_id_str, 2);
NEG(since_id, 2);
NEG(search_metadata, 1);
#undef NEG
}
// sax_tweet_reader_visitor::field_lookup::find_min() {
// int min_count = 100000;
// for (int a=0;a<4;a++) {
// for (int b=0;b<4;b++) {
// for (int c=0;c<4;c++) {
// sax_tweet_reader_visitor::field_lookup fields(a,b,c);
// if (fields.collision_count) { continue; }
// if (fields.zero_emission) { continue; }
// if (fields.conflict_count < min_count) { printf("min=%d,%d,%d (%d)", a, b, c, fields.conflict_count); }
// }
// }
// }
// }
} // namespace partial_tweets

View File

@ -0,0 +1,42 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "partial_tweets.h"
namespace partial_tweets {
using namespace simdjson;
class simdjson_dom {
dom::parser parser{};
simdjson_really_inline uint64_t nullable_int(dom::element element) {
if (element.is_null()) { return 0; }
return element;
}
public:
bool run(const padded_string &json, std::vector<tweet> &tweets) {
for (dom::element tweet : parser.parse(json)["statuses"]) {
auto user = tweet["user"];
tweets.emplace_back(partial_tweets::tweet{
tweet["created_at"],
tweet["id"],
tweet["text"],
nullable_int(tweet["in_reply_to_status_id"]),
{ user["id"], user["screen_name"] },
tweet["retweet_count"],
tweet["favorite_count"]
});
}
return true;
}
};
BENCHMARK_TEMPLATE(partial_tweets, simdjson_dom);
} // namespace partial_tweets
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,48 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "partial_tweets.h"
namespace partial_tweets {
using namespace simdjson;
using namespace simdjson::builtin;
class simdjson_ondemand {
ondemand::parser parser{};
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
if (value.is_null()) { return 0; }
return value;
}
simdjson_really_inline twitter_user read_user(ondemand::object user) {
return { user.find_field("id"), user.find_field("screen_name") };
}
public:
bool run(const padded_string &json, std::vector<tweet> &tweets) {
// Walk the document, parsing the tweets as we go
auto doc = parser.iterate(json);
for (ondemand::object tweet : doc.find_field("statuses")) {
tweets.emplace_back(partial_tweets::tweet{
tweet.find_field("created_at"),
tweet.find_field("id"),
tweet.find_field("text"),
nullable_int(tweet.find_field("in_reply_to_status_id")),
read_user(tweet.find_field("user")),
tweet.find_field("retweet_count"),
tweet.find_field("favorite_count")
});
}
return true;
}
};
BENCHMARK_TEMPLATE(partial_tweets, simdjson_ondemand);
} // namespace partial_tweets
#endif // SIMDJSON_EXCEPTIONS

View File

@ -1,16 +1,13 @@
#pragma once
#ifdef SIMDJSON_COMPETITION_YYJSON
#include "partial_tweets.h"
namespace partial_tweets {
class Yyjson {
public:
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
private:
std::vector<tweet> tweets{};
class yyjson {
dom::parser parser{};
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
auto val = yyjson_obj_getn(obj, key.data(), key.length());
@ -20,15 +17,17 @@ private:
auto val = yyjson_obj_getn(obj, key.data(), key.length());
return yyjson_get_uint(val);
}
simdjson_really_inline uint64_t get_nullable_uint64(yyjson_val *obj, std::string_view key) {
auto val = yyjson_obj_getn(obj, key.data(), key.length());
return yyjson_get_uint(val);
}
simdjson_really_inline partial_tweets::twitter_user get_user(yyjson_val *obj, std::string_view key) {
auto user = yyjson_obj_getn(obj, key.data(), key.length());
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
}
public:
simdjson_really_inline bool Run(const padded_string &json) {
tweets.clear();
bool run(const padded_string &json, std::vector<tweet> &tweets) {
// Walk the document, parsing the tweets as we go
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
if (!doc) { return false; }
@ -43,17 +42,20 @@ public:
get_string_view(tweet, "created_at"),
get_uint64 (tweet, "id"),
get_string_view(tweet, "text"),
get_uint64 (tweet, "in_reply_to_status_id"),
get_nullable_uint64 (tweet, "in_reply_to_status_id"),
get_user (tweet, "user"),
get_uint64 (tweet, "retweet_count"),
get_uint64 (tweet, "favorite_count")
});
}
return true;
}
};
BENCHMARK_TEMPLATE(PartialTweets, Yyjson);
BENCHMARK_TEMPLATE(partial_tweets, yyjson);
} // namespace partial_tweets
#endif // SIMDJSON_COMPETITION_YYJSON