Add top_tweet benchmark to test laziness
This commit is contained in:
parent
3279c2f15b
commit
be61650102
|
@ -62,4 +62,11 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
||||||
#include "find_tweet/rapidjson.h"
|
#include "find_tweet/rapidjson.h"
|
||||||
#include "find_tweet/nlohmann_json.h"
|
#include "find_tweet/nlohmann_json.h"
|
||||||
|
|
||||||
|
#include "top_tweet/simdjson_dom.h"
|
||||||
|
#include "top_tweet/simdjson_ondemand.h"
|
||||||
|
#include "top_tweet/yyjson.h"
|
||||||
|
#include "top_tweet/sajson.h"
|
||||||
|
#include "top_tweet/rapidjson.h"
|
||||||
|
#include "top_tweet/nlohmann_json.h"
|
||||||
|
|
||||||
BENCHMARK_MAIN();
|
BENCHMARK_MAIN();
|
||||||
|
|
|
@ -12,7 +12,7 @@ struct yyjson_base {
|
||||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
if (!yyjson_is_obj(root)) { return false; }
|
if (!yyjson_is_obj(root)) { return false; }
|
||||||
yyjson_val *statuses = yyjson_obj_get(root, "statuses");
|
yyjson_val *statuses = yyjson_obj_get(root, "statuses");
|
||||||
if (!yyjson_is_arr(statuses)) { return "Statuses is not an array!"; }
|
if (!yyjson_is_arr(statuses)) { return false; }
|
||||||
|
|
||||||
// Walk the document, parsing the tweets as we go
|
// Walk the document, parsing the tweets as we go
|
||||||
size_t tweet_idx, tweets_max;
|
size_t tweet_idx, tweets_max;
|
||||||
|
|
|
@ -14,7 +14,7 @@ struct yyjson_base {
|
||||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
if (!yyjson_is_obj(root)) { return false; }
|
if (!yyjson_is_obj(root)) { return false; }
|
||||||
yyjson_val *statuses = yyjson_obj_get(root, "statuses");
|
yyjson_val *statuses = yyjson_obj_get(root, "statuses");
|
||||||
if (!yyjson_is_arr(statuses)) { return "Statuses is not an array!"; }
|
if (!yyjson_is_arr(statuses)) { return false; }
|
||||||
|
|
||||||
// Walk the document, parsing the tweets as we go
|
// Walk the document, parsing the tweets as we go
|
||||||
size_t tweet_idx, tweets_max;
|
size_t tweet_idx, tweets_max;
|
||||||
|
|
|
@ -37,7 +37,7 @@ struct yyjson_base {
|
||||||
yyjson_val *root = yyjson_doc_get_root(doc);
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
if (!yyjson_is_obj(root)) { return false; }
|
if (!yyjson_is_obj(root)) { return false; }
|
||||||
yyjson_val *statuses = yyjson_obj_get(root, "statuses");
|
yyjson_val *statuses = yyjson_obj_get(root, "statuses");
|
||||||
if (!yyjson_is_arr(statuses)) { return "Statuses is not an array!"; }
|
if (!yyjson_is_arr(statuses)) { return false; }
|
||||||
|
|
||||||
// Walk the document, parsing the tweets as we go
|
// Walk the document, parsing the tweets as we go
|
||||||
size_t tweet_idx, tweets_max;
|
size_t tweet_idx, tweets_max;
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Top Tweet Benchmark
|
||||||
|
|
||||||
|
The top_tweet benchmark finds the most-retweeted tweet in a twitter API response.
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
This scenario tends to measure an implementation's laziness: its ability to avoid parsing unneeded
|
||||||
|
values, without knowing beforehand which values are needed.
|
||||||
|
|
||||||
|
To find the top tweet, an implementation needs to iterate through all tweets, remembering which one
|
||||||
|
had the highest retweet count. While it scans, it will find many "candidate" tweets with the highest
|
||||||
|
retweet count *up to that point.* However, While the implementation iterates through tweets, it will
|
||||||
|
have many "candidate" tweets. Essentially, it has to keep track of the "top tweet so far" while it
|
||||||
|
searches. However, only the text and screen_name of the *final* top tweet need to be parsed.
|
||||||
|
Therefore, JSON parsers that can only parse values on the first pass (such as DOM or streaming
|
||||||
|
parsers) will be forced to parse text and screen_name of every candidate (if not every single
|
||||||
|
tweet). Parsers which can delay parsing of values until later will therefore shine in scenarios like
|
||||||
|
this.
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
|
||||||
|
The benchmark will be called with `run(padded_string &json, int64_t max_retweet_count, top_tweet_result &result)`.
|
||||||
|
The benchmark must:
|
||||||
|
- Find the tweet with the highest retweet_count at the top level of the "statuses" array.
|
||||||
|
- Find the *last* such tweet: if multiple tweets have the same top retweet_count, the last one
|
||||||
|
should be returned.
|
||||||
|
- Exclude tweets with retweet_count above max_retweet_count. This restriction is solely here because
|
||||||
|
the default twitter.json has a rather high retweet count in the third tweet, and to test laziness
|
||||||
|
the matching tweet needs to be further down in the file.
|
||||||
|
- Fill in top_tweet_result with the corresponding fields from the matching tweet.
|
||||||
|
|
||||||
|
### Abridged Schema
|
||||||
|
|
||||||
|
The abridged schema (objects contain more fields than listed here):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"statuses": [
|
||||||
|
{
|
||||||
|
"text": "i like to tweet", // text containing UTF-8 and escape characters
|
||||||
|
"user": {
|
||||||
|
"screen_name": "AlexanderHamilton" // string containing UTF-8 (and escape characters?)
|
||||||
|
},
|
||||||
|
"retweet_count": 2, // uint32
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
|
@ -0,0 +1,39 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||||
|
|
||||||
|
#include "top_tweet.h"
|
||||||
|
|
||||||
|
namespace top_tweet {
|
||||||
|
|
||||||
|
using namespace simdjson;
|
||||||
|
|
||||||
|
struct nlohmann_json {
|
||||||
|
using StringType=std::string;
|
||||||
|
|
||||||
|
dom::parser parser{};
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
result.retweet_count = -1;
|
||||||
|
nlohmann::json top_tweet{};
|
||||||
|
|
||||||
|
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
|
||||||
|
for (auto tweet : root["statuses"]) {
|
||||||
|
int64_t retweet_count = tweet["retweet_count"];
|
||||||
|
if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) {
|
||||||
|
result.retweet_count = retweet_count;
|
||||||
|
top_tweet = tweet;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.text = top_tweet["text"];
|
||||||
|
result.screen_name = top_tweet["user"]["screen_name"];
|
||||||
|
return result.retweet_count != -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, nlohmann_json)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace top_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -0,0 +1,69 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
|
||||||
|
|
||||||
|
#include "top_tweet.h"
|
||||||
|
|
||||||
|
namespace top_tweet {
|
||||||
|
|
||||||
|
using namespace rapidjson;
|
||||||
|
|
||||||
|
struct rapidjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
Document doc{};
|
||||||
|
|
||||||
|
bool run(Document &root, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
result.retweet_count = -1;
|
||||||
|
|
||||||
|
// Loop over the tweets
|
||||||
|
if (root.HasParseError() || !root.IsObject()) { return false; }
|
||||||
|
const auto &statuses = root.FindMember("statuses");
|
||||||
|
if (statuses == root.MemberEnd() || !statuses->value.IsArray()) { return false; }
|
||||||
|
for (const Value &tweet : statuses->value.GetArray()) {
|
||||||
|
if (!tweet.IsObject()) { return false; }
|
||||||
|
|
||||||
|
// Check if this tweet has a higher retweet count than the current top tweet
|
||||||
|
const auto &retweet_count_json = tweet.FindMember("retweet_count");
|
||||||
|
if (retweet_count_json == tweet.MemberEnd() || !retweet_count_json->value.IsInt64()) { return false; }
|
||||||
|
int64_t retweet_count = retweet_count_json->value.GetInt64();
|
||||||
|
if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) {
|
||||||
|
result.retweet_count = retweet_count;
|
||||||
|
|
||||||
|
// TODO I can't figure out if there's a way to keep the Value to use outside the loop ...
|
||||||
|
|
||||||
|
// Get text and screen_name of top tweet
|
||||||
|
const auto &text = tweet.FindMember("text");
|
||||||
|
if (text == tweet.MemberEnd() || !text->value.IsString()) { return false; }
|
||||||
|
result.text = { text->value.GetString(), text->value.GetStringLength() };
|
||||||
|
|
||||||
|
const auto &user = tweet.FindMember("user");
|
||||||
|
if (user == tweet.MemberEnd() || !user->value.IsObject()) { return false; }
|
||||||
|
const auto &screen_name = user->value.FindMember("screen_name");
|
||||||
|
if (screen_name == user->value.MemberEnd() || !screen_name->value.IsString()) { return false; }
|
||||||
|
result.screen_name = { screen_name->value.GetString(), screen_name->value.GetStringLength() };
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.retweet_count != -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rapidjson : rapidjson_base {
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
return rapidjson_base::run(doc.Parse<kParseValidateEncodingFlag>(json.data()), max_retweet_count, result);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, rapidjson)->UseManualTime();
|
||||||
|
|
||||||
|
struct rapidjson_insitu : rapidjson_base {
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
return rapidjson_base::run(doc.ParseInsitu<kParseValidateEncodingFlag>(json.data()), max_retweet_count, result);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, rapidjson_insitu)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace partial_tweets
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_RAPIDJSON
|
|
@ -0,0 +1,62 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||||
|
|
||||||
|
#include "top_tweet.h"
|
||||||
|
|
||||||
|
namespace top_tweet {
|
||||||
|
|
||||||
|
struct sajson {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
size_t ast_buffer_size{0};
|
||||||
|
size_t *ast_buffer{nullptr};
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, int32_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
if (!ast_buffer) {
|
||||||
|
ast_buffer_size = json.size();
|
||||||
|
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||||
|
}
|
||||||
|
auto doc = ::sajson::parse(
|
||||||
|
::sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
||||||
|
::sajson::mutable_string_view(json.size(), json.data())
|
||||||
|
);
|
||||||
|
if (!doc.is_valid()) { return false; }
|
||||||
|
|
||||||
|
auto root = doc.get_root();
|
||||||
|
if (root.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||||
|
auto statuses = root.get_value_of_key({ "statuses", strlen("statuses") });
|
||||||
|
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; }
|
||||||
|
|
||||||
|
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||||
|
auto tweet = statuses.get_array_element(i);
|
||||||
|
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||||
|
|
||||||
|
// We can't keep a copy of "value" around, so AFAICT we can't lazily parse
|
||||||
|
auto retweet_count_val = tweet.get_value_of_key({ "retweet_count", strlen("retweet_count") });
|
||||||
|
if (retweet_count_val.get_type() != ::sajson::TYPE_INTEGER) { return false; }
|
||||||
|
int32_t retweet_count = retweet_count_val.get_integer_value();
|
||||||
|
if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) {
|
||||||
|
result.retweet_count = retweet_count;
|
||||||
|
|
||||||
|
auto text = tweet.get_value_of_key({ "text", strlen("text") });
|
||||||
|
if (text.get_type() != ::sajson::TYPE_STRING) { return false; }
|
||||||
|
result.text = { text.as_cstring(), text.get_string_length() };
|
||||||
|
|
||||||
|
auto user = tweet.get_value_of_key({ "user", strlen("user") });
|
||||||
|
if (user.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||||
|
auto screen_name = user.get_value_of_key({ "screen_name", strlen("screen_name") });
|
||||||
|
if (screen_name.get_type() != ::sajson::TYPE_STRING) { return false; }
|
||||||
|
result.screen_name = { screen_name.as_cstring(), screen_name.get_string_length() };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.retweet_count != -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, sajson)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace top_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_SAJSON
|
|
@ -0,0 +1,39 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
|
||||||
|
#include "top_tweet.h"
|
||||||
|
|
||||||
|
namespace top_tweet {
|
||||||
|
|
||||||
|
using namespace simdjson;
|
||||||
|
|
||||||
|
struct simdjson_dom {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
dom::parser parser{};
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
result.retweet_count = -1;
|
||||||
|
dom::element top_tweet{};
|
||||||
|
|
||||||
|
auto doc = parser.parse(json);
|
||||||
|
for (auto tweet : doc["statuses"]) {
|
||||||
|
int64_t retweet_count = tweet["retweet_count"];
|
||||||
|
if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) {
|
||||||
|
result.retweet_count = retweet_count;
|
||||||
|
top_tweet = tweet;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.text = top_tweet["text"];
|
||||||
|
result.screen_name = top_tweet["user"]["screen_name"];
|
||||||
|
return result.retweet_count != -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, simdjson_dom)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace top_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,81 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if SIMDJSON_EXCEPTIONS
|
||||||
|
|
||||||
|
#include "top_tweet.h"
|
||||||
|
|
||||||
|
namespace top_tweet {
|
||||||
|
|
||||||
|
using namespace simdjson;
|
||||||
|
using namespace simdjson::builtin;
|
||||||
|
|
||||||
|
struct simdjson_ondemand {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
ondemand::parser parser{};
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
result.retweet_count = -1;
|
||||||
|
// We save these DOM values for later so we don't have to parse them
|
||||||
|
// into string_views until we're sure which ones we want to parse
|
||||||
|
// NOTE: simdjson does not presently support reuse of objects or arrays--just scalars. This is
|
||||||
|
// why we have to grab the text and screen_name fields instead of just saving the tweet object.
|
||||||
|
ondemand::value screen_name, text;
|
||||||
|
|
||||||
|
auto doc = parser.iterate(json);
|
||||||
|
for (auto tweet : doc["statuses"]) {
|
||||||
|
// Since text, user.screen_name, and retweet_count generally appear in order, it's nearly free
|
||||||
|
// for us to retrieve them here (and will cost a bit more if we do it in the if
|
||||||
|
// statement).
|
||||||
|
auto tweet_text = tweet["text"];
|
||||||
|
auto tweet_screen_name = tweet["user"]["screen_name"];
|
||||||
|
int64_t retweet_count = tweet["retweet_count"];
|
||||||
|
if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) {
|
||||||
|
result.retweet_count = retweet_count;
|
||||||
|
// TODO std::move should not be necessary
|
||||||
|
text = std::move(tweet_text);
|
||||||
|
screen_name = std::move(tweet_screen_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that we know which was the most retweeted, parse the values in it
|
||||||
|
result.screen_name = screen_name;
|
||||||
|
result.text = text;
|
||||||
|
return result.retweet_count != -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, simdjson_ondemand)->UseManualTime();
|
||||||
|
|
||||||
|
struct simdjson_ondemand_forward_only {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
ondemand::parser parser{};
|
||||||
|
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
result.retweet_count = -1;
|
||||||
|
|
||||||
|
auto doc = parser.iterate(json);
|
||||||
|
for (auto tweet : doc["statuses"]) {
|
||||||
|
// Since text, user.screen_name, and retweet_count generally appear in order, it's nearly free
|
||||||
|
// for us to retrieve them here (and will cost a bit more if we do it in the if
|
||||||
|
// statement).
|
||||||
|
auto tweet_text = tweet["text"];
|
||||||
|
auto tweet_screen_name = tweet["user"]["screen_name"];
|
||||||
|
int64_t retweet_count = tweet["retweet_count"];
|
||||||
|
if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) {
|
||||||
|
result.retweet_count = retweet_count;
|
||||||
|
result.text = tweet_text;
|
||||||
|
result.screen_name = tweet_screen_name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.retweet_count != -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, simdjson_ondemand_forward_only)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace top_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,67 @@
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "json_benchmark/file_runner.h"
|
||||||
|
|
||||||
|
namespace top_tweet {
|
||||||
|
|
||||||
|
using namespace json_benchmark;
|
||||||
|
|
||||||
|
template<typename StringType>
|
||||||
|
struct top_tweet_result {
|
||||||
|
int64_t retweet_count{};
|
||||||
|
StringType screen_name{};
|
||||||
|
StringType text{};
|
||||||
|
template<typename OtherStringType>
|
||||||
|
simdjson_really_inline bool operator==(const top_tweet_result<OtherStringType> &other) const {
|
||||||
|
return retweet_count == other.retweet_count &&
|
||||||
|
screen_name == other.screen_name &&
|
||||||
|
text == other.text;
|
||||||
|
}
|
||||||
|
template<typename OtherStringType>
|
||||||
|
simdjson_really_inline bool operator!=(const top_tweet_result<OtherStringType> &other) const { return !(*this == other); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename StringType>
|
||||||
|
simdjson_unused static std::ostream &operator<<(std::ostream &o, const top_tweet_result<StringType> &t) {
|
||||||
|
o << "retweet_count: " << t.retweet_count << std::endl;
|
||||||
|
o << "screen_name: " << t.screen_name << std::endl;
|
||||||
|
o << "text: " << t.text << std::endl;
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename I>
|
||||||
|
struct runner : public file_runner<I> {
|
||||||
|
top_tweet_result<typename I::StringType> result{};
|
||||||
|
|
||||||
|
bool setup(benchmark::State &state) {
|
||||||
|
return this->load_json(state, TWITTER_JSON);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool before_run(benchmark::State &state) {
|
||||||
|
if (!file_runner<I>::before_run(state)) { return false; }
|
||||||
|
result.retweet_count = -1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run(benchmark::State &) {
|
||||||
|
return this->implementation.run(this->json, 60, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename R>
|
||||||
|
bool diff(benchmark::State &state, runner<R> &reference) {
|
||||||
|
return diff_results(state, result, reference.result, diff_flags::NONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t items_per_iteration() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct simdjson_dom;
|
||||||
|
|
||||||
|
template<typename I> simdjson_really_inline static void top_tweet(benchmark::State &state) {
|
||||||
|
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace top_tweet
|
|
@ -0,0 +1,68 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_COMPETITION_YYJSON
|
||||||
|
|
||||||
|
#include "top_tweet.h"
|
||||||
|
|
||||||
|
namespace top_tweet {
|
||||||
|
|
||||||
|
struct yyjson_base {
|
||||||
|
using StringType=std::string_view;
|
||||||
|
|
||||||
|
bool run(yyjson_doc *doc, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
result.retweet_count = -1;
|
||||||
|
|
||||||
|
yyjson_val *top_tweet{};
|
||||||
|
|
||||||
|
if (!doc) { return false; }
|
||||||
|
yyjson_val *root = yyjson_doc_get_root(doc);
|
||||||
|
if (!yyjson_is_obj(root)) { return false; }
|
||||||
|
yyjson_val *statuses = yyjson_obj_get(root, "statuses");
|
||||||
|
if (!yyjson_is_arr(statuses)) { return false; }
|
||||||
|
|
||||||
|
// Walk the document, parsing the tweets as we go
|
||||||
|
size_t tweet_idx, tweets_max;
|
||||||
|
yyjson_val *tweet;
|
||||||
|
yyjson_arr_foreach(statuses, tweet_idx, tweets_max, tweet) {
|
||||||
|
if (!yyjson_is_obj(tweet)) { return false; }
|
||||||
|
|
||||||
|
auto retweet_count_val = yyjson_obj_get(tweet, "retweet_count");
|
||||||
|
if (!yyjson_is_uint(retweet_count_val)) { return false; }
|
||||||
|
int64_t retweet_count = yyjson_get_uint(retweet_count_val);
|
||||||
|
if (retweet_count <= max_retweet_count && retweet_count >= result.retweet_count) {
|
||||||
|
result.retweet_count = retweet_count;
|
||||||
|
top_tweet = tweet;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto text = yyjson_obj_get(top_tweet, "text");
|
||||||
|
if (!yyjson_is_str(text)) { return false; }
|
||||||
|
result.text = { yyjson_get_str(text), yyjson_get_len(text) };
|
||||||
|
|
||||||
|
auto user = yyjson_obj_get(top_tweet, "user");
|
||||||
|
if (!yyjson_is_obj(user)) { return false; }
|
||||||
|
auto screen_name = yyjson_obj_get(user, "screen_name");
|
||||||
|
if (!yyjson_is_str(screen_name)) { return false; }
|
||||||
|
result.screen_name = { yyjson_get_str(screen_name), yyjson_get_len(screen_name) };
|
||||||
|
|
||||||
|
return result.retweet_count != -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct yyjson : yyjson_base {
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
return yyjson_base::run(yyjson_read(json.data(), json.size(), 0), max_retweet_count, result);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, yyjson)->UseManualTime();
|
||||||
|
|
||||||
|
struct yyjson_insitu : yyjson_base {
|
||||||
|
bool run(simdjson::padded_string &json, int64_t max_retweet_count, top_tweet_result<StringType> &result) {
|
||||||
|
return yyjson_base::run(yyjson_read_opts(json.data(), json.size(), YYJSON_READ_INSITU, 0, 0), max_retweet_count, result);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
BENCHMARK_TEMPLATE(top_tweet, yyjson_insitu)->UseManualTime();
|
||||||
|
|
||||||
|
} // namespace top_tweet
|
||||||
|
|
||||||
|
#endif // SIMDJSON_COMPETITION_YYJSON
|
Loading…
Reference in New Issue