Rearrange benchmarks to be easier to create
This commit is contained in:
parent
3af54a9978
commit
5add8ac255
|
@ -1,13 +1,5 @@
|
|||
include_directories( . linux )
|
||||
link_libraries(simdjson-windows-headers test-data)
|
||||
|
||||
# bench_sax links against the source
|
||||
if (TARGET benchmark::benchmark)
|
||||
add_executable(bench_sax bench_sax.cpp)
|
||||
target_link_libraries(bench_sax PRIVATE simdjson-internal-flags simdjson-include-source benchmark::benchmark)
|
||||
endif (TARGET benchmark::benchmark)
|
||||
|
||||
# Everything else links against simdjson proper
|
||||
link_libraries(simdjson simdjson-flags)
|
||||
|
||||
add_executable(benchfeatures benchfeatures.cpp)
|
||||
|
@ -44,7 +36,6 @@ endif()
|
|||
|
||||
if (TARGET benchmark::benchmark)
|
||||
link_libraries(benchmark::benchmark)
|
||||
add_subdirectory(largerandom)
|
||||
add_executable(bench_parse_call bench_parse_call.cpp)
|
||||
add_executable(bench_dom_api bench_dom_api.cpp)
|
||||
if (SIMDJSON_EXCEPTIONS)
|
||||
|
|
|
@ -1,40 +1,35 @@
|
|||
#include "simdjson.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||
#include "yyjson.h"
|
||||
#endif
|
||||
|
||||
// This has to be last, for reasons I don't yet understand
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
// simdjson ondemand
|
||||
#include "partial_tweets/ondemand.h"
|
||||
#include "largerandom/ondemand.h"
|
||||
#include "largerandom/ondemand_unordered.h"
|
||||
#include "kostya/ondemand.h"
|
||||
#include "distinctuserid/ondemand.h"
|
||||
#include "find_tweet/ondemand.h"
|
||||
|
||||
// simdjson dom
|
||||
#include "partial_tweets/dom.h"
|
||||
#include "largerandom/dom.h"
|
||||
#include "kostya/dom.h"
|
||||
#include "distinctuserid/dom.h"
|
||||
#include "find_tweet/dom.h"
|
||||
|
||||
// // yyjson
|
||||
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||
#include "partial_tweets/simdjson_dom.h"
|
||||
#include "partial_tweets/simdjson_ondemand.h"
|
||||
#include "partial_tweets/yyjson.h"
|
||||
#include "largerandom/yyjson.h"
|
||||
#include "kostya/yyjson.h"
|
||||
#include "distinctuserid/yyjson.h"
|
||||
#include "find_tweet/yyjson.h"
|
||||
#endif
|
||||
|
||||
#include "large_random/simdjson_dom.h"
|
||||
#include "large_random/simdjson_ondemand.h"
|
||||
#include "large_random/simdjson_ondemand_unordered.h"
|
||||
#include "large_random/yyjson.h"
|
||||
|
||||
#include "kostya/simdjson_dom.h"
|
||||
#include "kostya/simdjson_ondemand.h"
|
||||
#include "kostya/yyjson.h"
|
||||
|
||||
#include "distinct_user_id/simdjson_dom.h"
|
||||
#include "distinct_user_id/simdjson_ondemand.h"
|
||||
#include "distinct_user_id/yyjson.h"
|
||||
|
||||
#include "find_tweet/simdjson_dom.h"
|
||||
#include "find_tweet/simdjson_ondemand.h"
|
||||
#include "find_tweet/yyjson.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
#include "simdjson.h"
|
||||
#include "simdjson.cpp"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||
#include <benchmark/benchmark.h>
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
#include "partial_tweets/sax.h"
|
||||
#include "largerandom/sax.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
|
@ -0,0 +1,50 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "json_benchmark/file_runner.h"
|
||||
#include <vector>
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::file_runner<I> {
|
||||
std::vector<uint64_t> ids{};
|
||||
|
||||
public:
|
||||
bool setup(benchmark::State &state) {
|
||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||
}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
ids.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run(benchmark::State &) {
|
||||
return this->implementation.run(this->json, ids);
|
||||
}
|
||||
|
||||
bool after_run(benchmark::State &state) {
|
||||
std::sort(ids.begin(), ids.end());
|
||||
auto last = std::unique(ids.begin(), ids.end());
|
||||
ids.erase(last, ids.end());
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, ids, reference.ids);
|
||||
}
|
||||
|
||||
size_t items_per_iteration() {
|
||||
return ids.size();
|
||||
}
|
||||
};
|
||||
|
||||
struct simdjson_dom;
|
||||
|
||||
template<typename I> simdjson_really_inline static void distinct_user_id(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace distinct_user_id
|
|
@ -0,0 +1,37 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class simdjson_dom {
|
||||
dom::parser parser{};
|
||||
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, std::vector<uint64_t> &ids) {
|
||||
// Walk the document, parsing as we go
|
||||
auto doc = parser.parse(json);
|
||||
for (dom::object tweet : doc["statuses"]) {
|
||||
// We believe that all statuses have a matching
|
||||
// user, and we are willing to throw when they do not.
|
||||
ids.push_back(tweet["user"]["id"]);
|
||||
// Not all tweets have a "retweeted_status", but when they do
|
||||
// we want to go and find the user within.
|
||||
auto retweet = tweet["retweeted_status"];
|
||||
if(retweet.error() != NO_SUCH_FIELD) {
|
||||
ids.push_back(retweet["user"]["id"]);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, simdjson_dom);
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class simdjson_ondemand {
|
||||
ondemand::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, std::vector<uint64_t> &ids) {
|
||||
// Walk the document, parsing as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||
// We believe that all statuses have a matching
|
||||
// user, and we are willing to throw when they do not.
|
||||
ids.push_back(tweet.find_field("user").find_field("id"));
|
||||
// Not all tweets have a "retweeted_status", but when they do
|
||||
// we want to go and find the user within.
|
||||
auto retweet = tweet.find_field("retweeted_status");
|
||||
if(!retweet.error()) {
|
||||
ids.push_back(retweet.find_field("user").find_field("id"));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, simdjson_ondemand);
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,21 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "distinctuserid.h"
|
||||
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
class Yyjson {
|
||||
class yyjson {
|
||||
public:
|
||||
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
|
||||
simdjson_really_inline size_t ItemCount() { return ids.size(); }
|
||||
|
||||
private:
|
||||
std::vector<int64_t> ids{};
|
||||
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) {
|
||||
ids.clear();
|
||||
|
||||
bool run(const simdjson::padded_string &json, std::vector<uint64_t> &ids) {
|
||||
// Walk the document, parsing the tweets as we go
|
||||
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
|
||||
if (!doc) { return false; }
|
||||
|
@ -38,12 +31,14 @@ public:
|
|||
ids.push_back(yyjson_get_sint(id));
|
||||
}
|
||||
}
|
||||
remove_duplicates(ids);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(DistinctUserID, Yyjson);
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, yyjson);
|
||||
|
||||
} // namespace partial_tweets
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_YYJSON
|
|
@ -1,52 +0,0 @@
|
|||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
#include "event_counter.h"
|
||||
#include "json_benchmark.h"
|
||||
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
namespace distinct_user_id {
|
||||
template<typename T> static void DistinctUserID(benchmark::State &state);
|
||||
|
||||
bool equals(const char *s1, const char *s2) { return strcmp(s1, s2) == 0; }
|
||||
|
||||
void remove_duplicates(std::vector<int64_t> &v) {
|
||||
std::sort(v.begin(), v.end());
|
||||
auto last = std::unique(v.begin(), v.end());
|
||||
v.erase(last, v.end());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
|
||||
#include "dom.h"
|
||||
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
template<typename T> static void DistinctUserID(benchmark::State &state) {
|
||||
//
|
||||
// Load the JSON file
|
||||
//
|
||||
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||
error_code error;
|
||||
padded_string json;
|
||||
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
|
||||
std::cerr << error << std::endl;
|
||||
state.SkipWithError("error loading");
|
||||
return;
|
||||
}
|
||||
|
||||
JsonBenchmark<T, Dom>(state, json);
|
||||
}
|
||||
|
||||
} // namespace distinct_user_id
|
|
@ -1,45 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "distinctuserid.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
|
||||
simdjson_really_inline size_t ItemCount() { return ids.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<int64_t> ids{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
ids.clear();
|
||||
// Walk the document, parsing as we go
|
||||
auto doc = parser.parse(json);
|
||||
for (dom::object tweet : doc["statuses"]) {
|
||||
// We believe that all statuses have a matching
|
||||
// user, and we are willing to throw when they do not.
|
||||
ids.push_back(tweet["user"]["id"]);
|
||||
// Not all tweets have a "retweeted_status", but when they do
|
||||
// we want to go and find the user within.
|
||||
auto retweet = tweet["retweeted_status"];
|
||||
if(retweet.error() != NO_SUCH_FIELD) {
|
||||
ids.push_back(retweet["user"]["id"]);
|
||||
}
|
||||
}
|
||||
remove_duplicates(ids);
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(DistinctUserID, Dom);
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,55 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "distinctuserid.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
OnDemand() {
|
||||
if(!displayed_implementation) {
|
||||
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
|
||||
displayed_implementation = true;
|
||||
}
|
||||
}
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
|
||||
simdjson_really_inline size_t ItemCount() { return ids.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<int64_t> ids{};
|
||||
|
||||
static inline bool displayed_implementation = false;
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
ids.clear();
|
||||
// Walk the document, parsing as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||
// We believe that all statuses have a matching
|
||||
// user, and we are willing to throw when they do not.
|
||||
ids.push_back(tweet.find_field("user").find_field("id"));
|
||||
// Not all tweets have a "retweeted_status", but when they do
|
||||
// we want to go and find the user within.
|
||||
auto retweet = tweet.find_field("retweeted_status");
|
||||
if(!retweet.error()) {
|
||||
ids.push_back(retweet.find_field("user").find_field("id"));
|
||||
}
|
||||
}
|
||||
remove_duplicates(ids);
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(DistinctUserID, OnDemand);
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,38 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline std::string_view Result() { return text; }
|
||||
simdjson_really_inline size_t ItemCount() { return 1; }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::string_view text{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
text = "";
|
||||
auto doc = parser.parse(json);
|
||||
for (dom::object tweet : doc["statuses"]) {
|
||||
if (uint64_t(tweet["id"]) == TWEET_ID) {
|
||||
text = tweet["text"];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(FindTweet, Dom);
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,45 +1,38 @@
|
|||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
#include "event_counter.h"
|
||||
#include "json_benchmark.h"
|
||||
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
namespace find_tweet {
|
||||
template<typename T> static void FindTweet(benchmark::State &state);
|
||||
const uint64_t TWEET_ID = 505874901689851900;
|
||||
} // namespace
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
|
||||
#include "dom.h"
|
||||
|
||||
#include "json_benchmark/file_runner.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
using namespace simdjson;
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::file_runner<I> {
|
||||
std::string_view text;
|
||||
|
||||
template<typename T> static void FindTweet(benchmark::State &state) {
|
||||
//
|
||||
// Load the JSON file
|
||||
//
|
||||
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||
error_code error;
|
||||
padded_string json;
|
||||
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
|
||||
std::cerr << error << std::endl;
|
||||
state.SkipWithError("error loading");
|
||||
return;
|
||||
public:
|
||||
bool setup(benchmark::State &state) {
|
||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||
}
|
||||
|
||||
JsonBenchmark<T, Dom>(state, json);
|
||||
bool before_run(benchmark::State &state) {
|
||||
text = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run(benchmark::State &) {
|
||||
return this->implementation.run(this->json, 505874901689851900ULL, text);
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, text, reference.text);
|
||||
}
|
||||
};
|
||||
|
||||
struct simdjson_dom;
|
||||
|
||||
template<typename I> simdjson_really_inline static void find_tweet(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace find_tweet
|
||||
|
|
|
@ -1,49 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
OnDemand() {
|
||||
if(!displayed_implementation) {
|
||||
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
|
||||
displayed_implementation = true;
|
||||
}
|
||||
}
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline std::string_view Result() { return text; }
|
||||
simdjson_really_inline size_t ItemCount() { return 1; }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::string_view text{};
|
||||
|
||||
static inline bool displayed_implementation = false;
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
text = "";
|
||||
// Walk the document, parsing as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||
if (uint64_t(tweet.find_field("id")) == TWEET_ID) {
|
||||
text = tweet.find_field("text");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(FindTweet, OnDemand);
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class simdjson_dom {
|
||||
dom::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) {
|
||||
text = "";
|
||||
auto doc = parser.parse(json);
|
||||
for (auto tweet : doc["statuses"]) {
|
||||
if (uint64_t(tweet["id"]) == find_id) {
|
||||
text = tweet["text"];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(find_tweet, simdjson_dom);
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,32 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class simdjson_ondemand {
|
||||
ondemand::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) {
|
||||
// Walk the document, parsing as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (auto tweet : doc.find_field("statuses")) {
|
||||
if (uint64_t(tweet.find_field("id")) == find_id) {
|
||||
text = tweet.find_field("text");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(find_tweet, simdjson_ondemand);
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,21 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
class Yyjson {
|
||||
class yyjson {
|
||||
public:
|
||||
simdjson_really_inline std::string_view Result() { return text; }
|
||||
simdjson_really_inline size_t ItemCount() { return 1; }
|
||||
simdjson_really_inline std::string_view result() { return text; }
|
||||
simdjson_really_inline size_t item_count() { return 1; }
|
||||
|
||||
private:
|
||||
std::string_view text{};
|
||||
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) {
|
||||
text = "";
|
||||
|
||||
bool run(const simdjson::padded_string &json, uint64_t find_id, std::string_view &text) {
|
||||
// Walk the document, parsing the tweets as we go
|
||||
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
|
||||
if (!doc) { return false; }
|
||||
|
@ -26,7 +26,7 @@ public:
|
|||
yyjson_val *tweet;
|
||||
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
|
||||
auto id = yyjson_obj_get(tweet, "id");
|
||||
if (yyjson_get_uint(id) == TWEET_ID) {
|
||||
if (yyjson_get_uint(id) == find_id) {
|
||||
auto _text = yyjson_obj_get(tweet, "text");
|
||||
text = yyjson_get_str(_text);
|
||||
return true;
|
||||
|
@ -36,6 +36,8 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(FindTweet, Yyjson);
|
||||
BENCHMARK_TEMPLATE(find_tweet, yyjson);
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_YYJSON
|
||||
|
|
|
@ -1,126 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
template<typename T>
|
||||
static bool DiffResults(benchmark::State &state, const T &result, const T &reference);
|
||||
|
||||
template<typename T>
|
||||
struct ResultDiffer {
|
||||
static bool Diff(benchmark::State &state, const T &result, const T &reference) {
|
||||
if (result != reference) {
|
||||
std::stringstream str;
|
||||
str << "result incorrect: " << result << " ... reference: " << reference;
|
||||
state.SkipWithError(str.str().data());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct ResultDiffer<std::vector<T>> {
|
||||
static bool Diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
|
||||
auto result_iter = result.begin();
|
||||
auto reference_iter = reference.begin();
|
||||
while (result_iter != result.end() && reference_iter != reference.end()) {
|
||||
if (!DiffResults(state, *result_iter, *reference_iter)) { return false; }
|
||||
result_iter++;
|
||||
reference_iter++;
|
||||
}
|
||||
if (result_iter != result.end()) {
|
||||
std::stringstream str;
|
||||
str << "extra results (got " << result.size() << ", expected " << reference.size() << "): first extra element: " << *result_iter;
|
||||
state.SkipWithError(str.str().data());
|
||||
return false;
|
||||
} else if (reference_iter != reference.end()) {
|
||||
std::stringstream str;
|
||||
str << "missing results (got " << result.size() << ", expected " << reference.size() << "): first missing element: " << *reference_iter;
|
||||
state.SkipWithError(str.str().data());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static bool DiffResults(benchmark::State &state, const T &result, const T &reference) {
|
||||
return ResultDiffer<T>::Diff(state, result, reference);
|
||||
}
|
||||
|
||||
template<typename B, typename R> static void JsonBenchmark(benchmark::State &state, const simdjson::padded_string &json) {
|
||||
event_collector collector(true);
|
||||
event_aggregate events;
|
||||
|
||||
// Warmup and equality check (make sure the data is right!)
|
||||
B bench;
|
||||
if (!bench.Run(json)) { state.SkipWithError("warmup document reading failed"); return; }
|
||||
{
|
||||
R reference;
|
||||
if (!reference.Run(json)) { state.SkipWithError("reference document reading failed"); return; }
|
||||
if (!DiffResults(state, bench.Result(), reference.Result())) { return; }
|
||||
}
|
||||
|
||||
// Run the benchmark
|
||||
for (simdjson_unused auto _ : state) {
|
||||
collector.start();
|
||||
|
||||
if (!bench.Run(json)) { state.SkipWithError("document reading failed"); return; }
|
||||
|
||||
events << collector.end();
|
||||
}
|
||||
|
||||
state.SetBytesProcessed(json.size() * state.iterations());
|
||||
state.SetItemsProcessed(bench.ItemCount() * state.iterations());
|
||||
state.counters["best_bytes_per_sec"] = benchmark::Counter(double(json.size()) / events.best.elapsed_sec());
|
||||
state.counters["best_items_per_sec"] = benchmark::Counter(double(bench.ItemCount()) / events.best.elapsed_sec());
|
||||
|
||||
state.counters["docs_per_sec"] = benchmark::Counter(1.0, benchmark::Counter::kIsIterationInvariantRate);
|
||||
state.counters["best_docs_per_sec"] = benchmark::Counter(1.0 / events.best.elapsed_sec());
|
||||
|
||||
if (collector.has_events()) {
|
||||
state.counters["instructions"] = events.instructions();
|
||||
state.counters["cycles"] = events.cycles();
|
||||
state.counters["branch_miss"] = events.branch_misses();
|
||||
state.counters["cache_miss"] = events.cache_misses();
|
||||
state.counters["cache_ref"] = events.cache_references();
|
||||
|
||||
state.counters["instructions_per_byte"] = events.instructions() / double(json.size());
|
||||
state.counters["instructions_per_cycle"] = events.instructions() / events.cycles();
|
||||
state.counters["cycles_per_byte"] = events.cycles() / double(json.size());
|
||||
state.counters["frequency"] = benchmark::Counter(events.cycles(), benchmark::Counter::kIsIterationInvariantRate);
|
||||
|
||||
state.counters["best_instructions"] = events.best.instructions();
|
||||
state.counters["best_cycles"] = events.best.cycles();
|
||||
state.counters["best_branch_miss"] = events.best.branch_misses();
|
||||
state.counters["best_cache_miss"] = events.best.cache_misses();
|
||||
state.counters["best_cache_ref"] = events.best.cache_references();
|
||||
|
||||
state.counters["best_instructions_per_byte"] = events.best.instructions() / double(json.size());
|
||||
state.counters["best_instructions_per_cycle"] = events.best.instructions() / events.best.cycles();
|
||||
state.counters["best_cycles_per_byte"] = events.best.cycles() / double(json.size());
|
||||
state.counters["best_frequency"] = events.best.cycles() / events.best.elapsed_sec();
|
||||
}
|
||||
state.counters["bytes"] = benchmark::Counter(double(json.size()));
|
||||
state.counters["items"] = benchmark::Counter(double(bench.ItemCount()));
|
||||
|
||||
// Build the label
|
||||
using namespace std;
|
||||
stringstream label;
|
||||
label << fixed << setprecision(2);
|
||||
label << "[best:";
|
||||
label << " throughput=" << setw(6) << (double(json.size()) / 1000000000.0 / events.best.elapsed_sec()) << " GB/s";
|
||||
label << " doc_throughput=" << setw(6) << uint64_t(1.0 / events.best.elapsed_sec()) << " docs/s";
|
||||
|
||||
if (collector.has_events()) {
|
||||
label << " instructions=" << setw(12) << uint64_t(events.best.instructions()) << setw(0);
|
||||
label << " cycles=" << setw(12) << uint64_t(events.best.cycles()) << setw(0);
|
||||
label << " branch_miss=" << setw(8) << uint64_t(events.best.branch_misses()) << setw(0);
|
||||
label << " cache_miss=" << setw(8) << uint64_t(events.best.cache_misses()) << setw(0);
|
||||
label << " cache_ref=" << setw(10) << uint64_t(events.best.cache_references()) << setw(0);
|
||||
}
|
||||
|
||||
label << " items=" << setw(10) << bench.ItemCount() << setw(0);
|
||||
label << " avg_time=" << setw(10) << uint64_t(events.elapsed_ns()) << setw(0) << " ns";
|
||||
label << "]";
|
||||
|
||||
state.SetLabel(label.str());
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#include "runner_base.h"
|
||||
#include "simdjson.h"
|
||||
|
||||
namespace json_benchmark {
|
||||
|
||||
template<typename I>
|
||||
struct const_json_runner : public runner_base<I> {
|
||||
protected:
|
||||
const simdjson::padded_string &json;
|
||||
const_json_runner(const simdjson::padded_string &_json) : json{_json} {}
|
||||
|
||||
public:
|
||||
/** Get the total number of bytes processed in each iteration. Used for metrics like bytes/second. */
|
||||
size_t bytes_per_iteration() {
|
||||
return json.size();
|
||||
}
|
||||
|
||||
/** Get the total number of documents processed in each iteration. Used for metrics like documents/second. */
|
||||
size_t documents_per_iteration() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/** Get the total number of items processed in each iteration. Used for metrics like items/second. */
|
||||
size_t items_per_iteration() {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace json_benchmark
|
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
namespace json_benchmark {
|
||||
|
||||
static constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||
static constexpr const char *NUMBERS_JSON = SIMDJSON_BENCHMARK_DATA_DIR "numbers.json";
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
template<typename T>
|
||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference);
|
||||
|
||||
template<typename T>
|
||||
struct result_differ {
|
||||
static bool diff(benchmark::State &state, const T &result, const T &reference) {
|
||||
if (result != reference) {
|
||||
std::stringstream str;
|
||||
str << "result incorrect: " << result << " ... reference: " << reference;
|
||||
state.SkipWithError(str.str().data());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct result_differ<std::vector<T>> {
|
||||
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<T> &reference) {
|
||||
auto result_iter = result.begin();
|
||||
auto reference_iter = reference.begin();
|
||||
while (result_iter != result.end() && reference_iter != reference.end()) {
|
||||
if (!diff_results(state, *result_iter, *reference_iter)) { return false; }
|
||||
result_iter++;
|
||||
reference_iter++;
|
||||
}
|
||||
if (result_iter != result.end()) {
|
||||
std::stringstream str;
|
||||
str << "extra results (got " << result.size() << ", expected " << reference.size() << "): first extra element: " << *result_iter;
|
||||
state.SkipWithError(str.str().data());
|
||||
return false;
|
||||
} else if (reference_iter != reference.end()) {
|
||||
std::stringstream str;
|
||||
str << "missing results (got " << result.size() << ", expected " << reference.size() << "): first missing element: " << *reference_iter;
|
||||
state.SkipWithError(str.str().data());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static bool diff_results(benchmark::State &state, const T &result, const T &reference) {
|
||||
return result_differ<T>::diff(state, result, reference);
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
#pragma once
|
||||
|
||||
#include "json_benchmark/runner_base.h"
|
||||
#include "simdjson.h"
|
||||
|
||||
namespace json_benchmark {
|
||||
|
||||
template<typename I>
|
||||
struct file_runner : public runner_base<I> {
|
||||
protected:
|
||||
simdjson::padded_string json{};
|
||||
|
||||
bool load_json(benchmark::State &state, const char *file) {
|
||||
simdjson::error_code error;
|
||||
if ((error = simdjson::padded_string::load(file).get(json))) {
|
||||
std::stringstream err;
|
||||
err << "error loading " << file << ": " << error;
|
||||
state.SkipWithError(err.str().data());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
/** Get the total number of bytes processed in each iteration. Used for metrics like bytes/second. */
|
||||
size_t bytes_per_iteration() {
|
||||
return json.size();
|
||||
}
|
||||
|
||||
/** Get the total number of documents processed in each iteration. Used for metrics like documents/second. */
|
||||
size_t documents_per_iteration() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/** Get the total number of items processed in each iteration. Used for metrics like items/second. */
|
||||
size_t items_per_iteration() {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace json_benchmark
|
|
@ -0,0 +1,100 @@
|
|||
#pragma once
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "event_counter.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace json_benchmark {
|
||||
|
||||
void maybe_display_implementation() {
|
||||
static bool displayed_implementation = false;
|
||||
if(!displayed_implementation) {
|
||||
displayed_implementation = true;
|
||||
std::cout << "simdjson::dom implementation: " << simdjson::active_implementation->name() << std::endl;
|
||||
std::cout << "simdjson::ondemand implementation: " << simdjson::builtin_implementation()->name() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename B, typename R> static void run_json_benchmark(benchmark::State &state) {
|
||||
maybe_display_implementation();
|
||||
|
||||
event_collector collector(true);
|
||||
event_aggregate events;
|
||||
|
||||
// Warmup and equality check (make sure the data is right!)
|
||||
B bench;
|
||||
if (!bench.setup(state)) { return; }
|
||||
if (!bench.run(state)) { state.SkipWithError("warmup document reading failed"); return; }
|
||||
{
|
||||
R reference;
|
||||
if (!reference.setup(state)) { return; }
|
||||
if (!reference.run(state)) { state.SkipWithError("reference document reading failed"); return; }
|
||||
if (!bench.diff(state, reference)) { return; }
|
||||
}
|
||||
|
||||
// Run the benchmark
|
||||
for (simdjson_unused auto _ : state) {
|
||||
if (!bench.before_run(state)) { state.SkipWithError("before_run failed"); };
|
||||
collector.start();
|
||||
if (!bench.run(state)) { state.SkipWithError("run failed"); return; }
|
||||
events << collector.end();
|
||||
if (!bench.after_run(state)) { state.SkipWithError("after_run failed"); return; };
|
||||
}
|
||||
|
||||
state.SetBytesProcessed(bench.bytes_per_iteration() * state.iterations());
|
||||
state.SetItemsProcessed(bench.items_per_iteration() * state.iterations());
|
||||
state.counters["best_docs_per_sec"] = benchmark::Counter(double(bench.documents_per_iteration()) / events.best.elapsed_sec());
|
||||
state.counters["best_bytes_per_sec"] = benchmark::Counter(double(bench.bytes_per_iteration()) / events.best.elapsed_sec());
|
||||
state.counters["best_items_per_sec"] = benchmark::Counter(double(bench.items_per_iteration()) / events.best.elapsed_sec());
|
||||
state.counters["docs_per_sec"] = benchmark::Counter(double(bench.documents_per_iteration()), benchmark::Counter::kIsIterationInvariantRate);
|
||||
|
||||
if (collector.has_events()) {
|
||||
state.counters["instructions"] = events.instructions();
|
||||
state.counters["cycles"] = events.cycles();
|
||||
state.counters["branch_miss"] = events.branch_misses();
|
||||
state.counters["cache_miss"] = events.cache_misses();
|
||||
state.counters["cache_ref"] = events.cache_references();
|
||||
|
||||
state.counters["instructions_per_byte"] = events.instructions() / double(bench.bytes_per_iteration());
|
||||
state.counters["instructions_per_cycle"] = events.instructions() / events.cycles();
|
||||
state.counters["cycles_per_byte"] = events.cycles() / double(bench.bytes_per_iteration());
|
||||
state.counters["frequency"] = benchmark::Counter(events.cycles(), benchmark::Counter::kIsIterationInvariantRate);
|
||||
|
||||
state.counters["best_instructions"] = events.best.instructions();
|
||||
state.counters["best_cycles"] = events.best.cycles();
|
||||
state.counters["best_branch_miss"] = events.best.branch_misses();
|
||||
state.counters["best_cache_miss"] = events.best.cache_misses();
|
||||
state.counters["best_cache_ref"] = events.best.cache_references();
|
||||
|
||||
state.counters["best_instructions_per_byte"] = events.best.instructions() / double(bench.bytes_per_iteration());
|
||||
state.counters["best_instructions_per_cycle"] = events.best.instructions() / events.best.cycles();
|
||||
state.counters["best_cycles_per_byte"] = events.best.cycles() / double(bench.bytes_per_iteration());
|
||||
state.counters["best_frequency"] = events.best.cycles() / events.best.elapsed_sec();
|
||||
}
|
||||
state.counters["bytes"] = benchmark::Counter(double(bench.bytes_per_iteration()));
|
||||
state.counters["items"] = benchmark::Counter(double(bench.items_per_iteration()));
|
||||
|
||||
// Build the label
|
||||
using namespace std;
|
||||
stringstream label;
|
||||
label << fixed << setprecision(2);
|
||||
label << "[BEST:";
|
||||
label << " throughput=" << setw(6) << (double(bench.bytes_per_iteration()) / 1000000000.0 / events.best.elapsed_sec()) << " GB/s";
|
||||
label << " doc_throughput=" << setw(6) << uint64_t(bench.documents_per_iteration() / events.best.elapsed_sec()) << " docs/s";
|
||||
|
||||
if (collector.has_events()) {
|
||||
label << " instructions=" << setw(12) << uint64_t(events.best.instructions()) << setw(0);
|
||||
label << " cycles=" << setw(12) << uint64_t(events.best.cycles()) << setw(0);
|
||||
label << " branch_miss=" << setw(8) << uint64_t(events.best.branch_misses()) << setw(0);
|
||||
label << " cache_miss=" << setw(8) << uint64_t(events.best.cache_misses()) << setw(0);
|
||||
label << " cache_ref=" << setw(10) << uint64_t(events.best.cache_references()) << setw(0);
|
||||
}
|
||||
|
||||
label << " items=" << setw(10) << bench.items_per_iteration() << setw(0);
|
||||
label << " avg_time=" << setw(10) << uint64_t(events.elapsed_ns()) << setw(0) << " ns";
|
||||
label << "]";
|
||||
|
||||
state.SetLabel(label.str());
|
||||
}
|
||||
|
||||
} // namespace json_benchmark
|
|
@ -0,0 +1,42 @@
|
|||
#pragma once
|
||||
|
||||
#include "constants.h"
|
||||
#include "run_json_benchmark.h"
|
||||
#include "diff_results.h"
|
||||
|
||||
namespace json_benchmark {
|
||||
|
||||
//
|
||||
// Extend this to create a new type of test (e.g. partial_tweets).
|
||||
//
|
||||
template<typename I>
|
||||
struct runner_base {
|
||||
public:
|
||||
/** Run once, before all iterations. */
|
||||
simdjson_warn_unused bool setup(benchmark::State &) { return true; }
|
||||
|
||||
/** Run on each iteration. This is what gets benchmarked. */
|
||||
simdjson_warn_unused bool run(benchmark::State &state) {
|
||||
return implementation.run(state);
|
||||
}
|
||||
|
||||
/** Called before each iteration, to clear / set up state. */
|
||||
simdjson_warn_unused bool before_run(benchmark::State &state) { return true; }
|
||||
|
||||
/** Called after each iteration, to tear down / massage state. */
|
||||
simdjson_warn_unused bool after_run(benchmark::State &) { return true; }
|
||||
|
||||
/** Get the total number of bytes processed in each iteration. Used for metrics like bytes/second. */
|
||||
size_t bytes_per_iteration();
|
||||
|
||||
/** Get the total number of documents processed in each iteration. Used for metrics like documents/second. */
|
||||
size_t documents_per_iteration();
|
||||
|
||||
/** Get the total number of items processed in each iteration. Used for metrics like items/second. */
|
||||
size_t items_per_iteration();
|
||||
|
||||
protected:
|
||||
I implementation{};
|
||||
};
|
||||
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
for (auto point : parser.parse(json)["coordinates"]) {
|
||||
container.emplace_back(my_point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(Kostya, Dom);
|
||||
|
||||
namespace sum {
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
sum = { 0, 0, 0 };
|
||||
count = 0;
|
||||
|
||||
for (auto coord : parser.parse(json)["coordinates"]) {
|
||||
sum.x += double(coord["x"]);
|
||||
sum.y += double(coord["y"]);
|
||||
sum.z += double(coord["z"]);
|
||||
count++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(KostyaSum, Dom);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,96 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
|
||||
simdjson_really_inline simdjson_result<double> first_double(ondemand::json_iterator &iter, const char *key) {
|
||||
if (!iter.start_object() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<double> next_double(ondemand::json_iterator &iter, const char *key) {
|
||||
if (!iter.has_next_field() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_object() || !iter.find_field_raw("coordinates")) { cerr << "find coordinates field failed" << endl; return false; }
|
||||
if (iter.start_array()) {
|
||||
do {
|
||||
container.emplace_back(my_point{first_double(iter, "x"), next_double(iter, "y"), next_double(iter, "z")});
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the coordinates object
|
||||
} while (iter.has_next_element());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(Kostya, Iter);
|
||||
|
||||
|
||||
namespace sum {
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
sum = {0,0,0};
|
||||
count = 0;
|
||||
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_object() || !iter.find_field_raw("coordinates")) { return false; }
|
||||
if (!iter.start_array()) { return false; }
|
||||
do {
|
||||
if (!iter.start_object() || !iter.find_field_raw("x")) { return false; }
|
||||
sum.x += iter.consume_double();
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("y")) { return false; }
|
||||
sum.y += iter.consume_double();
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("z")) { return false; }
|
||||
sum.z += iter.consume_double();
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the coordinates object
|
||||
count++;
|
||||
} while (iter.has_next_element());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(KostyaSum, Iter);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -2,17 +2,51 @@
|
|||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
#include "json_benchmark/const_json_runner.h"
|
||||
#include <vector>
|
||||
#include <random>
|
||||
|
||||
namespace kostya {
|
||||
template<typename T> static void Kostya(benchmark::State &state);
|
||||
namespace sum {
|
||||
template<typename T> static void KostyaSum(benchmark::State &state);
|
||||
|
||||
static const simdjson::padded_string &get_built_json_array();
|
||||
|
||||
struct point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const point &other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
using namespace simdjson;
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::const_json_runner<I> {
|
||||
std::vector<point> points;
|
||||
|
||||
public:
|
||||
runner() : json_benchmark::const_json_runner<I>(get_built_json_array()) {}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
points.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run(benchmark::State &) {
|
||||
return this->implementation.run(this->json, points);
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, points, reference.points);
|
||||
}
|
||||
};
|
||||
|
||||
static void append_coordinate(std::default_random_engine &e, std::uniform_real_distribution<> &dis, std::stringstream &myss) {
|
||||
using std::endl;
|
||||
|
@ -49,45 +83,15 @@ static std::string build_json_array(size_t N) {
|
|||
return answer;
|
||||
}
|
||||
|
||||
static const padded_string &get_built_json_array() {
|
||||
static padded_string json = build_json_array(524288);
|
||||
static const simdjson::padded_string &get_built_json_array() {
|
||||
static simdjson::padded_string json = build_json_array(524288);
|
||||
return json;
|
||||
}
|
||||
|
||||
struct my_point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const my_point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const my_point &other) const { return !(*this == other); }
|
||||
};
|
||||
struct simdjson_dom;
|
||||
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const my_point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
#include <vector>
|
||||
#include "event_counter.h"
|
||||
#include "dom.h"
|
||||
#include "json_benchmark.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
template<typename T> static void Kostya(benchmark::State &state) {
|
||||
JsonBenchmark<T, Dom>(state, get_built_json_array());
|
||||
}
|
||||
|
||||
namespace sum {
|
||||
template<typename T> static void KostyaSum(benchmark::State &state) {
|
||||
JsonBenchmark<T, Dom>(state, get_built_json_array());
|
||||
}
|
||||
template<typename I> simdjson_really_inline static void kostya(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace kostya
|
||||
|
|
|
@ -1,74 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc.find_field("coordinates")) {
|
||||
container.emplace_back(my_point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(Kostya, OnDemand);
|
||||
|
||||
|
||||
namespace sum {
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
sum = {0,0,0};
|
||||
count = 0;
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc.find_field("coordinates")) {
|
||||
sum.x += double(coord.find_field("x"));
|
||||
sum.y += double(coord.find_field("y"));
|
||||
sum.z += double(coord.find_field("z"));
|
||||
count++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(KostyaSum, OnDemand);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class simdjson_dom {
|
||||
dom::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
|
||||
for (auto point : parser.parse(json)["coordinates"]) {
|
||||
points.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(kostya, simdjson_dom);
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class simdjson_ondemand {
|
||||
ondemand::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object point : doc.find_field("coordinates")) {
|
||||
points.emplace_back(kostya::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(kostya, simdjson_ondemand);
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,28 +1,19 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
class Yyjson {
|
||||
class yyjson {
|
||||
public:
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
|
||||
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
||||
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
return yyjson_get_real(val);
|
||||
}
|
||||
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
// Walk the document, parsing the tweets as we go
|
||||
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
|
||||
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
|
||||
if (!doc) { return false; }
|
||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||
|
@ -31,12 +22,16 @@ public:
|
|||
size_t idx, max;
|
||||
yyjson_val *coord;
|
||||
yyjson_arr_foreach(coords, idx, max, coord) {
|
||||
container.emplace_back(my_point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(Kostya, Yyjson);
|
||||
BENCHMARK_TEMPLATE(kostya, yyjson);
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_YYJSON
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
#pragma once
|
||||
|
||||
#include "json_benchmark/const_json_runner.h"
|
||||
#include <random>
|
||||
|
||||
namespace large_random {
|
||||
|
||||
static const simdjson::padded_string &get_built_json_array();
|
||||
|
||||
struct point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const point &other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::const_json_runner<I> {
|
||||
std::vector<point> points;
|
||||
|
||||
public:
|
||||
runner() : json_benchmark::const_json_runner<I>(get_built_json_array()) {}
|
||||
|
||||
bool before_run(benchmark::State &state) {
|
||||
points.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run(benchmark::State &) {
|
||||
return this->implementation.run(this->json, points);
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, points, reference.points);
|
||||
}
|
||||
};
|
||||
|
||||
static std::string build_json_array(size_t N) {
|
||||
std::default_random_engine e;
|
||||
std::uniform_real_distribution<> dis(0, 1);
|
||||
std::stringstream myss;
|
||||
myss << "[" << std::endl;
|
||||
if(N > 0) {
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
|
||||
}
|
||||
for(size_t i = 1; i < N; i++) {
|
||||
myss << "," << std::endl;
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
|
||||
}
|
||||
myss << std::endl;
|
||||
myss << "]" << std::endl;
|
||||
std::string answer = myss.str();
|
||||
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
|
||||
return answer;
|
||||
}
|
||||
|
||||
static const simdjson::padded_string &get_built_json_array() {
|
||||
static simdjson::padded_string json = build_json_array(1000000);
|
||||
return json;
|
||||
}
|
||||
|
||||
struct simdjson_dom;
|
||||
|
||||
template<typename T> static void large_random(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace large_random
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "large_random.h"
|
||||
|
||||
namespace large_random {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class simdjson_dom {
|
||||
dom::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
|
||||
for (auto point : parser.parse(json)) {
|
||||
points.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(large_random, simdjson_dom);
|
||||
|
||||
} // namespace large_random
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "large_random.h"
|
||||
|
||||
namespace large_random {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class simdjson_ondemand {
|
||||
ondemand::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc) {
|
||||
points.emplace_back(point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(large_random, simdjson_ondemand);
|
||||
|
||||
} // namespace large_random
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "large_random.h"
|
||||
|
||||
namespace large_random {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class simdjson_ondemand_unordered {
|
||||
ondemand::parser parser{};
|
||||
public:
|
||||
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc) {
|
||||
points.emplace_back(large_random::point{coord["x"], coord["y"], coord["z"]});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(large_random, simdjson_ondemand_unordered);
|
||||
|
||||
} // namespace large_random
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,17 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include "largerandom.h"
|
||||
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||
|
||||
namespace largerandom {
|
||||
#include "large_random.h"
|
||||
|
||||
class Yyjson {
|
||||
public:
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
namespace large_random {
|
||||
|
||||
private:
|
||||
class yyjson {
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
|
||||
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
|
||||
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
|
@ -19,9 +15,7 @@ private:
|
|||
}
|
||||
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
bool run(const simdjson::padded_string &json, std::vector<point> &points) {
|
||||
// Walk the document, parsing the tweets as we go
|
||||
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
|
||||
if (!doc) { return false; }
|
||||
|
@ -30,12 +24,14 @@ public:
|
|||
size_t idx, max;
|
||||
yyjson_val *coord;
|
||||
yyjson_arr_foreach(coords, idx, max, coord) {
|
||||
container.emplace_back(my_point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
points.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, Yyjson);
|
||||
BENCHMARK_TEMPLATE(large_random, yyjson);
|
||||
|
||||
} // namespace kostya
|
||||
} // namespace large_random
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_YYJSON
|
|
@ -1,5 +0,0 @@
|
|||
if (TARGET benchmark::benchmark)
|
||||
link_libraries(benchmark::benchmark)
|
||||
add_executable(bench_ondemand_largerandom bench_ondemand_largerandom.cpp)
|
||||
add_executable(bench_ondemand_unordered_largerandom bench_ondemand_unordered_largerandom.cpp)
|
||||
endif()
|
|
@ -1,14 +0,0 @@
|
|||
#include "simdjson.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||
#include <benchmark/benchmark.h>
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
#define BENCHMARK_NO_DOM
|
||||
|
||||
#include "largerandom/ondemand.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
|
@ -1,14 +0,0 @@
|
|||
#include "simdjson.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||
#include <benchmark/benchmark.h>
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
#define BENCHMARK_NO_DOM
|
||||
|
||||
#include "largerandom/ondemand_unordered.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
|
@ -1,37 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
for (auto point : parser.parse(json)) {
|
||||
container.emplace_back(my_point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, Dom);
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,53 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
|
||||
simdjson_really_inline double first_double(ondemand::json_iterator &iter) {
|
||||
if (iter.start_object().error() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
simdjson_really_inline double next_double(ondemand::json_iterator &iter) {
|
||||
if (!iter.has_next_field() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (iter.start_array()) {
|
||||
do {
|
||||
container.emplace_back(my_point{first_double(iter), next_double(iter), next_double(iter)});
|
||||
if (iter.has_next_field()) { throw "Too many fields"; }
|
||||
} while (iter.has_next_element());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, Iter);
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,72 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
namespace largerandom {
|
||||
template<typename T> static void LargeRandom(benchmark::State &state);
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
static std::string build_json_array(size_t N) {
|
||||
std::default_random_engine e;
|
||||
std::uniform_real_distribution<> dis(0, 1);
|
||||
std::stringstream myss;
|
||||
myss << "[" << std::endl;
|
||||
if(N > 0) {
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
|
||||
}
|
||||
for(size_t i = 1; i < N; i++) {
|
||||
myss << "," << std::endl;
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
|
||||
}
|
||||
myss << std::endl;
|
||||
myss << "]" << std::endl;
|
||||
std::string answer = myss.str();
|
||||
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
|
||||
return answer;
|
||||
}
|
||||
|
||||
static const padded_string &get_built_json_array() {
|
||||
static padded_string json = build_json_array(1000000);
|
||||
return json;
|
||||
}
|
||||
|
||||
struct my_point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const my_point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const my_point &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
simdjson_unused static std::ostream &operator<<(std::ostream &o, const my_point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
#include <vector>
|
||||
#include "event_counter.h"
|
||||
#ifndef BENCHMARK_NO_DOM
|
||||
#include "dom.h"
|
||||
#endif
|
||||
#include "json_benchmark.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
template<typename T> static void LargeRandom(benchmark::State &state) {
|
||||
#ifdef BENCHMARK_NO_DOM
|
||||
JsonBenchmark<T, T>(state, get_built_json_array());
|
||||
#else
|
||||
JsonBenchmark<T, Dom>(state, get_built_json_array());
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace largerandom
|
|
@ -1,38 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc) {
|
||||
container.emplace_back(my_point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, OnDemand);
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,38 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class OnDemandUnordered {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemandUnordered::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc) {
|
||||
container.emplace_back(my_point{coord["x"], coord["y"], coord["z"]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, OnDemandUnordered);
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,121 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
using namespace simdjson::builtin::stage2;
|
||||
|
||||
class Sax {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) noexcept;
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
simdjson_really_inline error_code RunNoExcept(const padded_string &json) noexcept;
|
||||
error_code Allocate(size_t new_capacity);
|
||||
std::unique_ptr<uint8_t[]> string_buf{};
|
||||
size_t capacity{};
|
||||
dom_parser_implementation dom_parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
struct sax_point_reader_visitor {
|
||||
public:
|
||||
std::vector<my_point> &points;
|
||||
enum {GOT_X=0, GOT_Y=1, GOT_Z=2, GOT_SOMETHING_ELSE=4};
|
||||
size_t idx{GOT_SOMETHING_ELSE};
|
||||
double buffer[3]={};
|
||||
|
||||
explicit sax_point_reader_visitor(std::vector<my_point> &_points) : points(_points) {}
|
||||
|
||||
simdjson_really_inline error_code visit_object_start(json_iterator &) {
|
||||
idx = 0;
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code visit_primitive(json_iterator &, const uint8_t *value) {
|
||||
if(idx == GOT_SOMETHING_ELSE) { return simdjson::SUCCESS; }
|
||||
return numberparsing::parse_double(value).get(buffer[idx]);
|
||||
}
|
||||
simdjson_really_inline error_code visit_object_end(json_iterator &) {
|
||||
points.emplace_back(my_point{buffer[0], buffer[1], buffer[2]});
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code visit_document_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t * key) {
|
||||
switch(key[1]) {
|
||||
// Technically, we should check the other characters
|
||||
// in the key, but we are cheating to go as fast
|
||||
// as possible.
|
||||
case 'x':
|
||||
idx = GOT_X;
|
||||
break;
|
||||
case 'y':
|
||||
idx = GOT_Y;
|
||||
break;
|
||||
case 'z':
|
||||
idx = GOT_Z;
|
||||
break;
|
||||
default:
|
||||
idx = GOT_SOMETHING_ELSE;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_document_end(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_empty_array(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_empty_object(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_root_primitive(json_iterator &, const uint8_t *) { return SUCCESS; }
|
||||
simdjson_really_inline error_code increment_count(json_iterator &) { return SUCCESS; }
|
||||
};
|
||||
|
||||
// NOTE: this assumes the dom_parser is already allocated
|
||||
bool Sax::Run(const padded_string &json) noexcept {
|
||||
auto error = RunNoExcept(json);
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
error_code Sax::RunNoExcept(const padded_string &json) noexcept {
|
||||
container.clear();
|
||||
|
||||
// Allocate capacity if needed
|
||||
if (capacity < json.size()) {
|
||||
SIMDJSON_TRY( Allocate(json.size()) );
|
||||
}
|
||||
|
||||
// Run stage 1 first.
|
||||
SIMDJSON_TRY( dom_parser.stage1(json.u8data(), json.size(), false) );
|
||||
|
||||
// Then walk the document, parsing the tweets as we go
|
||||
json_iterator iter(dom_parser, 0);
|
||||
sax_point_reader_visitor visitor(container);
|
||||
SIMDJSON_TRY( iter.walk_document<false>(visitor) );
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
error_code Sax::Allocate(size_t new_capacity) {
|
||||
// string_capacity copied from document::allocate
|
||||
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
|
||||
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
|
||||
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
|
||||
if (capacity == 0) { // set max depth the first time only
|
||||
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
|
||||
}
|
||||
capacity = new_capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, Sax);
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,51 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
||||
if (element.is_null()) { return 0; }
|
||||
return element;
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
tweets.clear();
|
||||
|
||||
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
||||
auto user = tweet["user"];
|
||||
tweets.emplace_back(partial_tweets::tweet{
|
||||
tweet["created_at"],
|
||||
tweet["id"],
|
||||
tweet["text"],
|
||||
nullable_int(tweet["in_reply_to_status_id"]),
|
||||
{ user["id"], user["screen_name"] },
|
||||
tweet["retweet_count"],
|
||||
tweet["favorite_count"]
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, Dom);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,64 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class DomNoExcept {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const simdjson::padded_string &json) noexcept;
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline simdjson_result<uint64_t> nullable_int(simdjson_result<dom::element> result) noexcept {
|
||||
dom::element element;
|
||||
SIMDJSON_TRY( result.get(element) );
|
||||
if (element.is_null()) { return 0; }
|
||||
return element.get_uint64();
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code RunNoExcept(const simdjson::padded_string &json) noexcept;
|
||||
};
|
||||
|
||||
simdjson_really_inline bool DomNoExcept::Run(const simdjson::padded_string &json) noexcept {
|
||||
auto error = RunNoExcept(json);
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code DomNoExcept::RunNoExcept(const simdjson::padded_string &json) noexcept {
|
||||
tweets.clear();
|
||||
|
||||
dom::array tweet_array;
|
||||
SIMDJSON_TRY( parser.parse(json)["statuses"].get_array().get(tweet_array) );
|
||||
|
||||
for (auto tweet_element : tweet_array) {
|
||||
dom::object tweet;
|
||||
SIMDJSON_TRY( tweet_element.get_object().get(tweet) );
|
||||
|
||||
dom::object user;
|
||||
SIMDJSON_TRY( tweet["user"].get_object().get(user) );
|
||||
|
||||
partial_tweets::tweet t;
|
||||
SIMDJSON_TRY( tweet["created_at"] .get_string().get(t.created_at) );
|
||||
SIMDJSON_TRY( tweet["id"] .get_uint64().get(t.id) );
|
||||
SIMDJSON_TRY( tweet["text"] .get_string().get(t.text) );
|
||||
SIMDJSON_TRY( nullable_int(tweet["in_reply_to_status_id"]).get(t.in_reply_to_status_id) );
|
||||
SIMDJSON_TRY( user["id"] .get_uint64().get(t.user.id) );
|
||||
SIMDJSON_TRY( user["screen_name"] .get_string().get(t.user.screen_name) );
|
||||
SIMDJSON_TRY( tweet["retweet_count"] .get_uint64().get(t.retweet_count) );
|
||||
SIMDJSON_TRY( tweet["favorite_count"].get_uint64().get(t.favorite_count) );
|
||||
|
||||
tweets.push_back(t);
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace partial_tweets
|
|
@ -1,93 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(ondemand::value && value) {
|
||||
if (value.is_null()) { return 0; }
|
||||
return std::move(value);
|
||||
}
|
||||
|
||||
simdjson_really_inline twitter_user read_user(ondemand::object && user) {
|
||||
// Move user into a local object so it gets destroyed (and moves the iterator)
|
||||
ondemand::object u = std::move(user);
|
||||
return { u["id"], u["screen_name"] };
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
tweets.clear();
|
||||
|
||||
// Walk the document, parsing the tweets as we go
|
||||
|
||||
// { "statuses":
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_object() || !iter.find_field_raw("statuses")) { return false; }
|
||||
// { "statuses": [
|
||||
if (!iter.start_array()) { return false; }
|
||||
|
||||
do {
|
||||
tweet tweet;
|
||||
|
||||
if (!iter.start_object() || !iter.find_field_raw("created_at")) { return false; }
|
||||
tweet.created_at = iter.consume_string();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("id")) { return false; }
|
||||
tweet.id = iter.consume_uint64();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("text")) { return false; }
|
||||
tweet.text = iter.consume_string();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("in_reply_to_status_id")) { return false; }
|
||||
if (!iter.is_null()) {
|
||||
tweet.in_reply_to_status_id = iter.consume_uint64();
|
||||
}
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("user")) { return false; }
|
||||
{
|
||||
if (!iter.start_object() || !iter.find_field_raw("id")) { return false; }
|
||||
tweet.user.id = iter.consume_uint64();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("screen_name")) { return false; }
|
||||
tweet.user.screen_name = iter.consume_string();
|
||||
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the user object
|
||||
}
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("retweet_count")) { return false; }
|
||||
tweet.retweet_count = iter.consume_uint64();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("favorite_count")) { return false; }
|
||||
tweet.favorite_count = iter.consume_uint64();
|
||||
|
||||
tweets.push_back(tweet);
|
||||
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the tweet object
|
||||
|
||||
} while (iter.has_next_element());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, Iter);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,64 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
OnDemand() {
|
||||
if(!displayed_implementation) {
|
||||
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
|
||||
displayed_implementation = true;
|
||||
}
|
||||
}
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
|
||||
if (value.is_null()) { return 0; }
|
||||
return value;
|
||||
}
|
||||
|
||||
simdjson_really_inline twitter_user read_user(ondemand::object user) {
|
||||
return { user.find_field("id"), user.find_field("screen_name") };
|
||||
}
|
||||
|
||||
static inline bool displayed_implementation = false;
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
tweets.clear();
|
||||
|
||||
// Walk the document, parsing the tweets as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||
tweets.emplace_back(partial_tweets::tweet{
|
||||
tweet.find_field("created_at"),
|
||||
tweet.find_field("id"),
|
||||
tweet.find_field("text"),
|
||||
nullable_int(tweet.find_field("in_reply_to_status_id")),
|
||||
read_user(tweet.find_field("user")),
|
||||
tweet.find_field("retweet_count"),
|
||||
tweet.find_field("favorite_count")
|
||||
});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, OnDemand);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,41 +1,44 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
namespace partial_tweets {
|
||||
template<typename T> static void PartialTweets(benchmark::State &state);
|
||||
} // namespace partial_tweets
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
|
||||
#include "json_benchmark/file_runner.h"
|
||||
#include "tweet.h"
|
||||
#include <vector>
|
||||
#include "event_counter.h"
|
||||
#include "domnoexcept.h"
|
||||
#include "json_benchmark.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
template<typename I>
|
||||
struct runner : public json_benchmark::file_runner<I> {
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
template<typename T> static void PartialTweets(benchmark::State &state) {
|
||||
//
|
||||
// Load the JSON file
|
||||
//
|
||||
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||
error_code error;
|
||||
padded_string json;
|
||||
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
|
||||
std::cerr << error << std::endl;
|
||||
state.SkipWithError("error loading");
|
||||
return;
|
||||
public:
|
||||
bool setup(benchmark::State &state) {
|
||||
return this->load_json(state, json_benchmark::TWITTER_JSON);
|
||||
}
|
||||
|
||||
JsonBenchmark<T, DomNoExcept>(state, json);
|
||||
bool before_run(benchmark::State &state) {
|
||||
tweets.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run(benchmark::State &) {
|
||||
return this->implementation.run(this->json, tweets);
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||
return diff_results(state, tweets, reference.tweets);
|
||||
}
|
||||
|
||||
size_t items_per_iteration() {
|
||||
return tweets.size();
|
||||
}
|
||||
};
|
||||
|
||||
struct simdjson_dom;
|
||||
|
||||
template<typename I> simdjson_really_inline static void partial_tweets(benchmark::State &state) {
|
||||
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||
}
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
|
||||
#include "partial_tweets.h"
|
||||
#include "sax_tweet_reader_visitor.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
using namespace simdjson::builtin::stage2;
|
||||
|
||||
class Sax {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) noexcept;
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
simdjson_really_inline error_code RunNoExcept(const padded_string &json) noexcept;
|
||||
error_code Allocate(size_t new_capacity);
|
||||
std::unique_ptr<uint8_t[]> string_buf{};
|
||||
size_t capacity{};
|
||||
dom_parser_implementation dom_parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
};
|
||||
|
||||
// NOTE: this assumes the dom_parser is already allocated
|
||||
bool Sax::Run(const padded_string &json) noexcept {
|
||||
auto error = RunNoExcept(json);
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
error_code Sax::RunNoExcept(const padded_string &json) noexcept {
|
||||
tweets.clear();
|
||||
|
||||
// Allocate capacity if needed
|
||||
if (capacity < json.size()) {
|
||||
SIMDJSON_TRY( Allocate(json.size()) );
|
||||
}
|
||||
|
||||
// Run stage 1 first.
|
||||
SIMDJSON_TRY( dom_parser.stage1((uint8_t *)json.data(), json.size(), false) );
|
||||
|
||||
// Then walk the document, parsing the tweets as we go
|
||||
json_iterator iter(dom_parser, 0);
|
||||
sax_tweet_reader_visitor visitor(tweets, string_buf.get());
|
||||
SIMDJSON_TRY( iter.walk_document<false>(visitor) );
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
error_code Sax::Allocate(size_t new_capacity) {
|
||||
// string_capacity copied from document::allocate
|
||||
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
|
||||
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
|
||||
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
|
||||
if (capacity == 0) { // set max depth the first time only
|
||||
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
|
||||
}
|
||||
capacity = new_capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, Sax);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
|
@ -1,514 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "tweet.h"
|
||||
#include <vector>
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
using namespace simdjson::builtin::stage2;
|
||||
|
||||
struct sax_tweet_reader_visitor {
|
||||
public:
|
||||
simdjson_really_inline sax_tweet_reader_visitor(std::vector<tweet> &tweets, uint8_t *string_buf);
|
||||
|
||||
simdjson_really_inline error_code visit_document_start(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_object_start(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key);
|
||||
simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value);
|
||||
simdjson_really_inline error_code visit_array_start(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_array_end(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_object_end(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_document_end(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_empty_array(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_empty_object(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value);
|
||||
simdjson_really_inline error_code increment_count(json_iterator &iter);
|
||||
|
||||
private:
|
||||
// Since we only care about one thing at each level, we just use depth as the marker for what
|
||||
// object/array we're nested inside.
|
||||
enum class containers {
|
||||
document = 0, //
|
||||
top_object = 1, // {
|
||||
statuses = 2, // { "statuses": [
|
||||
tweet = 3, // { "statuses": [ {
|
||||
user = 4 // { "statuses": [ { "user": {
|
||||
};
|
||||
/**
|
||||
* The largest depth we care about.
|
||||
* There can be things at lower depths.
|
||||
*/
|
||||
static constexpr uint32_t MAX_SUPPORTED_DEPTH = uint32_t(containers::user);
|
||||
static constexpr const char *STATE_NAMES[] = {
|
||||
"document",
|
||||
"top object",
|
||||
"statuses",
|
||||
"tweet",
|
||||
"user"
|
||||
};
|
||||
enum class field_type {
|
||||
any,
|
||||
unsigned_integer,
|
||||
string,
|
||||
nullable_unsigned_integer,
|
||||
object,
|
||||
array
|
||||
};
|
||||
struct field {
|
||||
const char * key{};
|
||||
size_t len{0};
|
||||
size_t offset;
|
||||
containers container{containers::document};
|
||||
field_type type{field_type::any};
|
||||
};
|
||||
|
||||
std::vector<tweet> &tweets;
|
||||
containers container{containers::document};
|
||||
uint8_t *current_string_buf_loc;
|
||||
const uint8_t *current_key{};
|
||||
|
||||
simdjson_really_inline bool in_container(json_iterator &iter);
|
||||
simdjson_really_inline bool in_container_child(json_iterator &iter);
|
||||
simdjson_really_inline void start_container(json_iterator &iter);
|
||||
simdjson_really_inline void end_container(json_iterator &iter);
|
||||
simdjson_really_inline error_code parse_nullable_unsigned(json_iterator &iter, const uint8_t *value, const field &f);
|
||||
simdjson_really_inline error_code parse_unsigned(json_iterator &iter, const uint8_t *value, const field &f);
|
||||
simdjson_really_inline error_code parse_string(json_iterator &iter, const uint8_t *value, const field &f);
|
||||
|
||||
struct field_lookup {
|
||||
field entries[256]{};
|
||||
|
||||
field_lookup();
|
||||
simdjson_really_inline field get(const uint8_t * key, containers container);
|
||||
private:
|
||||
simdjson_really_inline uint8_t hash(const char * key, uint32_t depth);
|
||||
simdjson_really_inline void add(const char * key, size_t len, containers container, field_type type, size_t offset);
|
||||
simdjson_really_inline void neg(const char * const key, uint32_t depth);
|
||||
};
|
||||
static field_lookup fields;
|
||||
}; // sax_tweet_reader_visitor
|
||||
|
||||
simdjson_really_inline sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *_string_buf)
|
||||
: tweets{_tweets},
|
||||
current_string_buf_loc{_string_buf} {
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_start(json_iterator &iter) {
|
||||
start_container(iter);
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_start(json_iterator &iter) {
|
||||
// If we're not in a container we care about, don't bother with the rest
|
||||
if (!in_container_child(iter)) { return SUCCESS; }
|
||||
|
||||
// Handle fields first
|
||||
if (current_key) {
|
||||
switch (fields.get(current_key, container).type) {
|
||||
case field_type::array: // { "statuses": [
|
||||
start_container(iter);
|
||||
current_key = nullptr;
|
||||
return SUCCESS;
|
||||
case field_type::any:
|
||||
return SUCCESS;
|
||||
case field_type::object:
|
||||
case field_type::unsigned_integer:
|
||||
case field_type::nullable_unsigned_integer:
|
||||
case field_type::string:
|
||||
iter.log_error("unexpected array field");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
// We're not in a field, so it must be a child of an array. We support any of those.
|
||||
iter.log_error("unexpected array");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_object_start(json_iterator &iter) {
|
||||
// If we're not in a container we care about, don't bother with the rest
|
||||
if (!in_container_child(iter)) { return SUCCESS; }
|
||||
|
||||
// Handle known fields
|
||||
if (current_key) {
|
||||
auto f = fields.get(current_key, container);
|
||||
switch (f.type) {
|
||||
case field_type::object: // { "statuses": [ { "user": {
|
||||
start_container(iter);
|
||||
return SUCCESS;
|
||||
case field_type::any:
|
||||
return SUCCESS;
|
||||
case field_type::array:
|
||||
case field_type::unsigned_integer:
|
||||
case field_type::nullable_unsigned_integer:
|
||||
case field_type::string:
|
||||
iter.log_error("unexpected object field");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
// It's not a field, so it's a child of an array or document
|
||||
switch (container) {
|
||||
case containers::document: // top_object: {
|
||||
case containers::statuses: // tweet: { "statuses": [ {
|
||||
start_container(iter);
|
||||
return SUCCESS;
|
||||
case containers::top_object:
|
||||
case containers::tweet:
|
||||
case containers::user:
|
||||
iter.log_error("unexpected object");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
SIMDJSON_UNREACHABLE();
|
||||
return UNINITIALIZED;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_key(json_iterator &, const uint8_t *key) {
|
||||
current_key = key;
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_primitive(json_iterator &iter, const uint8_t *value) {
|
||||
// Don't bother unless we're in a container we care about
|
||||
if (!in_container(iter)) { return SUCCESS; }
|
||||
|
||||
// Handle fields first
|
||||
if (current_key) {
|
||||
auto f = fields.get(current_key, container);
|
||||
switch (f.type) {
|
||||
case field_type::unsigned_integer:
|
||||
return parse_unsigned(iter, value, f);
|
||||
case field_type::nullable_unsigned_integer:
|
||||
return parse_nullable_unsigned(iter, value, f);
|
||||
case field_type::string:
|
||||
return parse_string(iter, value, f);
|
||||
case field_type::any:
|
||||
return SUCCESS;
|
||||
case field_type::array:
|
||||
case field_type::object:
|
||||
iter.log_error("unexpected primitive");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
current_key = nullptr;
|
||||
}
|
||||
|
||||
// If it's not a field, it's a child of an array.
|
||||
// The only array we support is statuses, which must contain objects.
|
||||
iter.log_error("unexpected primitive");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_end(json_iterator &iter) {
|
||||
if (in_container(iter)) { end_container(iter); }
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_object_end(json_iterator &iter) {
|
||||
current_key = nullptr;
|
||||
if (in_container(iter)) { end_container(iter); }
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &) {
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_array(json_iterator &) {
|
||||
current_key = nullptr;
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_object(json_iterator &) {
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_root_primitive(json_iterator &iter, const uint8_t *) {
|
||||
iter.log_error("unexpected root primitive");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::increment_count(json_iterator &) { return SUCCESS; }
|
||||
|
||||
simdjson_really_inline bool sax_tweet_reader_visitor::in_container(json_iterator &iter) {
|
||||
return iter.depth == uint32_t(container);
|
||||
}
|
||||
simdjson_really_inline bool sax_tweet_reader_visitor::in_container_child(json_iterator &iter) {
|
||||
return iter.depth == uint32_t(container) + 1;
|
||||
}
|
||||
simdjson_really_inline void sax_tweet_reader_visitor::start_container(json_iterator &iter) {
|
||||
SIMDJSON_ASSUME(iter.depth <= MAX_SUPPORTED_DEPTH); // Asserts in debug mode
|
||||
container = containers(iter.depth);
|
||||
if (logger::LOG_ENABLED) { iter.log_value(STATE_NAMES[iter.depth]); }
|
||||
if (container == containers::tweet) { tweets.push_back({}); }
|
||||
}
|
||||
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &) {
|
||||
container = containers(int(container) - 1);
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
|
||||
iter.log_value(f.key);
|
||||
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
|
||||
if (auto error = numberparsing::parse_unsigned(value).get(*i)) {
|
||||
// If number parsing failed, check if it's null before returning the error
|
||||
if (!atomparsing::is_valid_null_atom(value)) { iter.log_error("expected number or null"); return error; }
|
||||
i = 0;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
|
||||
iter.log_value(f.key);
|
||||
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
|
||||
return numberparsing::parse_unsigned(value).get(*i);
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_string(json_iterator &iter, const uint8_t *value, const field &f) {
|
||||
iter.log_value(f.key);
|
||||
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
|
||||
return stringparsing::parse_string_to_buffer(value, current_string_buf_loc, *s);
|
||||
}
|
||||
|
||||
sax_tweet_reader_visitor::field_lookup sax_tweet_reader_visitor::fields{};
|
||||
|
||||
simdjson_really_inline uint8_t sax_tweet_reader_visitor::field_lookup::hash(const char * key, uint32_t depth) {
|
||||
// These shift numbers were chosen specifically because this yields only 2 collisions between
|
||||
// keys in twitter.json, leaves 0 as a distinct value, and has 0 collisions between keys we
|
||||
// actually care about.
|
||||
return uint8_t((key[0] << 0) ^ (key[1] << 3) ^ (key[2] << 3) ^ (key[3] << 1) ^ depth);
|
||||
}
|
||||
simdjson_really_inline sax_tweet_reader_visitor::field sax_tweet_reader_visitor::field_lookup::get(const uint8_t * key, containers c) {
|
||||
auto index = hash((const char *)key, uint32_t(c));
|
||||
auto entry = entries[index];
|
||||
// TODO if any key is > SIMDJSON_PADDING, this will access inaccessible memory!
|
||||
if (c != entry.container || memcmp(key, entry.key, entry.len)) { return entries[0]; }
|
||||
return entry;
|
||||
}
|
||||
simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::add(const char * key, size_t len, containers c, field_type type, size_t offset) {
|
||||
auto index = hash(key, uint32_t(c));
|
||||
if (index == 0) {
|
||||
fprintf(stderr, "%s (depth %d) hashes to zero, which is used as 'missing value'\n", key, int(c));
|
||||
assert(false);
|
||||
}
|
||||
if (entries[index].key) {
|
||||
fprintf(stderr, "%s (depth %d) collides with %s (depth %d) !\n", key, int(c), entries[index].key, int(entries[index].container));
|
||||
assert(false);
|
||||
}
|
||||
entries[index] = { key, len, offset, c, type };
|
||||
}
|
||||
simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const char * const key, uint32_t depth) {
|
||||
auto index = hash(key, depth);
|
||||
if (entries[index].key) {
|
||||
fprintf(stderr, "%s (depth %d) conflicts with %s (depth %d) !\n", key, depth, entries[index].key, int(entries[index].container));
|
||||
}
|
||||
}
|
||||
|
||||
sax_tweet_reader_visitor::field_lookup::field_lookup() {
|
||||
add("\"statuses\"", std::strlen("\"statuses\""), containers::top_object, field_type::array, 0); // { "statuses": [...]
|
||||
#define TWEET_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::tweet, TYPE, offsetof(tweet, KEY));
|
||||
TWEET_FIELD(id, field_type::unsigned_integer);
|
||||
TWEET_FIELD(in_reply_to_status_id, field_type::nullable_unsigned_integer);
|
||||
TWEET_FIELD(retweet_count, field_type::unsigned_integer);
|
||||
TWEET_FIELD(favorite_count, field_type::unsigned_integer);
|
||||
TWEET_FIELD(text, field_type::string);
|
||||
TWEET_FIELD(created_at, field_type::string);
|
||||
TWEET_FIELD(user, field_type::object)
|
||||
#undef TWEET_FIELD
|
||||
#define USER_FIELD(KEY, TYPE) add("\"" #KEY "\"", std::strlen("\"" #KEY "\""), containers::user, TYPE, offsetof(tweet, user)+offsetof(twitter_user, KEY));
|
||||
USER_FIELD(id, field_type::unsigned_integer);
|
||||
USER_FIELD(screen_name, field_type::string);
|
||||
#undef USER_FIELD
|
||||
|
||||
// Check for collisions with other (unused) hash keys in typical twitter JSON
|
||||
#define NEG(key, depth) neg("\"" #key "\"", depth);
|
||||
NEG(display_url, 9);
|
||||
NEG(expanded_url, 9);
|
||||
neg("\"h\":", 9);
|
||||
NEG(indices, 9);
|
||||
NEG(resize, 9);
|
||||
NEG(url, 9);
|
||||
neg("\"w\":", 9);
|
||||
NEG(display_url, 8);
|
||||
NEG(expanded_url, 8);
|
||||
neg("\"h\":", 8);
|
||||
NEG(indices, 8);
|
||||
NEG(large, 8);
|
||||
NEG(medium, 8);
|
||||
NEG(resize, 8);
|
||||
NEG(small, 8);
|
||||
NEG(thumb, 8);
|
||||
NEG(url, 8);
|
||||
neg("\"w\":", 8);
|
||||
NEG(display_url, 7);
|
||||
NEG(expanded_url, 7);
|
||||
NEG(id_str, 7);
|
||||
NEG(id, 7);
|
||||
NEG(indices, 7);
|
||||
NEG(large, 7);
|
||||
NEG(media_url_https, 7);
|
||||
NEG(media_url, 7);
|
||||
NEG(medium, 7);
|
||||
NEG(name, 7);
|
||||
NEG(sizes, 7);
|
||||
NEG(small, 7);
|
||||
NEG(source_status_id_str, 7);
|
||||
NEG(source_status_id, 7);
|
||||
NEG(thumb, 7);
|
||||
NEG(type, 7);
|
||||
NEG(url, 7);
|
||||
NEG(urls, 7);
|
||||
NEG(description, 6);
|
||||
NEG(display_url, 6);
|
||||
NEG(expanded_url, 6);
|
||||
NEG(id_str, 6);
|
||||
NEG(id, 6);
|
||||
NEG(indices, 6);
|
||||
NEG(media_url_https, 6);
|
||||
NEG(media_url, 6);
|
||||
NEG(name, 6);
|
||||
NEG(sizes, 6);
|
||||
NEG(source_status_id_str, 6);
|
||||
NEG(source_status_id, 6);
|
||||
NEG(type, 6);
|
||||
NEG(url, 6);
|
||||
NEG(urls, 6);
|
||||
NEG(contributors_enabled, 5);
|
||||
NEG(default_profile_image, 5);
|
||||
NEG(default_profile, 5);
|
||||
NEG(description, 5);
|
||||
NEG(entities, 5);
|
||||
NEG(favourites_count, 5);
|
||||
NEG(follow_request_sent, 5);
|
||||
NEG(followers_count, 5);
|
||||
NEG(following, 5);
|
||||
NEG(friends_count, 5);
|
||||
NEG(geo_enabled, 5);
|
||||
NEG(hashtags, 5);
|
||||
NEG(id_str, 5);
|
||||
NEG(id, 5);
|
||||
NEG(is_translation_enabled, 5);
|
||||
NEG(is_translator, 5);
|
||||
NEG(iso_language_code, 5);
|
||||
NEG(lang, 5);
|
||||
NEG(listed_count, 5);
|
||||
NEG(location, 5);
|
||||
NEG(media, 5);
|
||||
NEG(name, 5);
|
||||
NEG(notifications, 5);
|
||||
NEG(profile_background_color, 5);
|
||||
NEG(profile_background_image_url_https, 5);
|
||||
NEG(profile_background_image_url, 5);
|
||||
NEG(profile_background_tile, 5);
|
||||
NEG(profile_banner_url, 5);
|
||||
NEG(profile_image_url_https, 5);
|
||||
NEG(profile_image_url, 5);
|
||||
NEG(profile_link_color, 5);
|
||||
NEG(profile_sidebar_border_color, 5);
|
||||
NEG(profile_sidebar_fill_color, 5);
|
||||
NEG(profile_text_color, 5);
|
||||
NEG(profile_use_background_image, 5);
|
||||
NEG(protected, 5);
|
||||
NEG(result_type, 5);
|
||||
NEG(statuses_count, 5);
|
||||
NEG(symbols, 5);
|
||||
NEG(time_zone, 5);
|
||||
NEG(url, 5);
|
||||
NEG(urls, 5);
|
||||
NEG(user_mentions, 5);
|
||||
NEG(utc_offset, 5);
|
||||
NEG(verified, 5);
|
||||
NEG(contributors_enabled, 4);
|
||||
NEG(contributors, 4);
|
||||
NEG(coordinates, 4);
|
||||
NEG(default_profile_image, 4);
|
||||
NEG(default_profile, 4);
|
||||
NEG(description, 4);
|
||||
NEG(entities, 4);
|
||||
NEG(favorited, 4);
|
||||
NEG(favourites_count, 4);
|
||||
NEG(follow_request_sent, 4);
|
||||
NEG(followers_count, 4);
|
||||
NEG(following, 4);
|
||||
NEG(friends_count, 4);
|
||||
NEG(geo_enabled, 4);
|
||||
NEG(geo, 4);
|
||||
NEG(hashtags, 4);
|
||||
NEG(id_str, 4);
|
||||
NEG(in_reply_to_screen_name, 4);
|
||||
NEG(in_reply_to_status_id_str, 4);
|
||||
NEG(in_reply_to_user_id_str, 4);
|
||||
NEG(in_reply_to_user_id, 4);
|
||||
NEG(is_translation_enabled, 4);
|
||||
NEG(is_translator, 4);
|
||||
NEG(iso_language_code, 4);
|
||||
NEG(lang, 4);
|
||||
NEG(listed_count, 4);
|
||||
NEG(location, 4);
|
||||
NEG(media, 4);
|
||||
NEG(metadata, 4);
|
||||
NEG(name, 4);
|
||||
NEG(notifications, 4);
|
||||
NEG(place, 4);
|
||||
NEG(possibly_sensitive, 4);
|
||||
NEG(profile_background_color, 4);
|
||||
NEG(profile_background_image_url_https, 4);
|
||||
NEG(profile_background_image_url, 4);
|
||||
NEG(profile_background_tile, 4);
|
||||
NEG(profile_banner_url, 4);
|
||||
NEG(profile_image_url_https, 4);
|
||||
NEG(profile_image_url, 4);
|
||||
NEG(profile_link_color, 4);
|
||||
NEG(profile_sidebar_border_color, 4);
|
||||
NEG(profile_sidebar_fill_color, 4);
|
||||
NEG(profile_text_color, 4);
|
||||
NEG(profile_use_background_image, 4);
|
||||
NEG(protected, 4);
|
||||
NEG(result_type, 4);
|
||||
NEG(retweeted, 4);
|
||||
NEG(source, 4);
|
||||
NEG(statuses_count, 4);
|
||||
NEG(symbols, 4);
|
||||
NEG(time_zone, 4);
|
||||
NEG(truncated, 4);
|
||||
NEG(url, 4);
|
||||
NEG(urls, 4);
|
||||
NEG(user_mentions, 4);
|
||||
NEG(utc_offset, 4);
|
||||
NEG(verified, 4);
|
||||
NEG(contributors, 3);
|
||||
NEG(coordinates, 3);
|
||||
NEG(entities, 3);
|
||||
NEG(favorited, 3);
|
||||
NEG(geo, 3);
|
||||
NEG(id_str, 3);
|
||||
NEG(in_reply_to_screen_name, 3);
|
||||
NEG(in_reply_to_status_id_str, 3);
|
||||
NEG(in_reply_to_user_id_str, 3);
|
||||
NEG(in_reply_to_user_id, 3);
|
||||
NEG(lang, 3);
|
||||
NEG(metadata, 3);
|
||||
NEG(place, 3);
|
||||
NEG(possibly_sensitive, 3);
|
||||
NEG(retweeted_status, 3);
|
||||
NEG(retweeted, 3);
|
||||
NEG(source, 3);
|
||||
NEG(truncated, 3);
|
||||
NEG(completed_in, 2);
|
||||
NEG(count, 2);
|
||||
NEG(max_id_str, 2);
|
||||
NEG(max_id, 2);
|
||||
NEG(next_results, 2);
|
||||
NEG(query, 2);
|
||||
NEG(refresh_url, 2);
|
||||
NEG(since_id_str, 2);
|
||||
NEG(since_id, 2);
|
||||
NEG(search_metadata, 1);
|
||||
#undef NEG
|
||||
}
|
||||
|
||||
// sax_tweet_reader_visitor::field_lookup::find_min() {
|
||||
// int min_count = 100000;
|
||||
// for (int a=0;a<4;a++) {
|
||||
// for (int b=0;b<4;b++) {
|
||||
// for (int c=0;c<4;c++) {
|
||||
// sax_tweet_reader_visitor::field_lookup fields(a,b,c);
|
||||
// if (fields.collision_count) { continue; }
|
||||
// if (fields.zero_emission) { continue; }
|
||||
// if (fields.conflict_count < min_count) { printf("min=%d,%d,%d (%d)", a, b, c, fields.conflict_count); }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
} // namespace partial_tweets
|
|
@ -0,0 +1,42 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class simdjson_dom {
|
||||
dom::parser parser{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
||||
if (element.is_null()) { return 0; }
|
||||
return element;
|
||||
}
|
||||
|
||||
public:
|
||||
bool run(const padded_string &json, std::vector<tweet> &tweets) {
|
||||
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
||||
auto user = tweet["user"];
|
||||
tweets.emplace_back(partial_tweets::tweet{
|
||||
tweet["created_at"],
|
||||
tweet["id"],
|
||||
tweet["text"],
|
||||
nullable_int(tweet["in_reply_to_status_id"]),
|
||||
{ user["id"], user["screen_name"] },
|
||||
tweet["retweet_count"],
|
||||
tweet["favorite_count"]
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(partial_tweets, simdjson_dom);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,48 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class simdjson_ondemand {
|
||||
ondemand::parser parser{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
|
||||
if (value.is_null()) { return 0; }
|
||||
return value;
|
||||
}
|
||||
|
||||
simdjson_really_inline twitter_user read_user(ondemand::object user) {
|
||||
return { user.find_field("id"), user.find_field("screen_name") };
|
||||
}
|
||||
|
||||
public:
|
||||
bool run(const padded_string &json, std::vector<tweet> &tweets) {
|
||||
// Walk the document, parsing the tweets as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object tweet : doc.find_field("statuses")) {
|
||||
tweets.emplace_back(partial_tweets::tweet{
|
||||
tweet.find_field("created_at"),
|
||||
tweet.find_field("id"),
|
||||
tweet.find_field("text"),
|
||||
nullable_int(tweet.find_field("in_reply_to_status_id")),
|
||||
read_user(tweet.find_field("user")),
|
||||
tweet.find_field("retweet_count"),
|
||||
tweet.find_field("favorite_count")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(partial_tweets, simdjson_ondemand);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -1,16 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
class Yyjson {
|
||||
public:
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
std::vector<tweet> tweets{};
|
||||
class yyjson {
|
||||
dom::parser parser{};
|
||||
|
||||
simdjson_really_inline std::string_view get_string_view(yyjson_val *obj, std::string_view key) {
|
||||
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
|
@ -20,15 +17,17 @@ private:
|
|||
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
return yyjson_get_uint(val);
|
||||
}
|
||||
simdjson_really_inline uint64_t get_nullable_uint64(yyjson_val *obj, std::string_view key) {
|
||||
auto val = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
return yyjson_get_uint(val);
|
||||
}
|
||||
simdjson_really_inline partial_tweets::twitter_user get_user(yyjson_val *obj, std::string_view key) {
|
||||
auto user = yyjson_obj_getn(obj, key.data(), key.length());
|
||||
return { get_uint64(user, "id"), get_string_view(user, "screen_name") };
|
||||
}
|
||||
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) {
|
||||
tweets.clear();
|
||||
|
||||
bool run(const padded_string &json, std::vector<tweet> &tweets) {
|
||||
// Walk the document, parsing the tweets as we go
|
||||
yyjson_doc *doc = yyjson_read(json.data(), json.size(), 0);
|
||||
if (!doc) { return false; }
|
||||
|
@ -43,17 +42,20 @@ public:
|
|||
get_string_view(tweet, "created_at"),
|
||||
get_uint64 (tweet, "id"),
|
||||
get_string_view(tweet, "text"),
|
||||
get_uint64 (tweet, "in_reply_to_status_id"),
|
||||
get_nullable_uint64 (tweet, "in_reply_to_status_id"),
|
||||
get_user (tweet, "user"),
|
||||
get_uint64 (tweet, "retweet_count"),
|
||||
get_uint64 (tweet, "favorite_count")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, Yyjson);
|
||||
BENCHMARK_TEMPLATE(partial_tweets, yyjson);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_YYJSON
|
||||
|
||||
|
|
Loading…
Reference in New Issue