Add sajson benchmarks
This commit is contained in:
parent
45479558ba
commit
1b4d3bcbb6
|
@ -34,16 +34,22 @@ if (TARGET competition-all)
|
|||
target_compile_definitions(allparsingcompetition PRIVATE ALLPARSER)
|
||||
endif()
|
||||
|
||||
if (TARGET benchmark::benchmark)
|
||||
if(TARGET benchmark::benchmark)
|
||||
link_libraries(benchmark::benchmark)
|
||||
add_executable(bench_parse_call bench_parse_call.cpp)
|
||||
add_executable(bench_dom_api bench_dom_api.cpp)
|
||||
if (SIMDJSON_EXCEPTIONS)
|
||||
if(SIMDJSON_EXCEPTIONS)
|
||||
add_executable(bench_ondemand bench_ondemand.cpp)
|
||||
if (TARGET yyjson)
|
||||
target_link_libraries(bench_ondemand PRIVATE yyjson rapidjson)
|
||||
endif (TARGET yyjson)
|
||||
endif (SIMDJSON_EXCEPTIONS)
|
||||
if(TARGET yyjson)
|
||||
target_link_libraries(bench_ondemand PRIVATE yyjson)
|
||||
endif()
|
||||
if(TARGET rapidjson)
|
||||
target_link_libraries(bench_ondemand PRIVATE rapidjson)
|
||||
endif()
|
||||
if(TARGET sajson)
|
||||
target_link_libraries(bench_ondemand PRIVATE sajson)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(checkperf.cmake)
|
||||
|
|
|
@ -13,6 +13,10 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
|||
#include "rapidjson/writer.h"
|
||||
#endif
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
#include "sajson.h"
|
||||
#endif
|
||||
|
||||
// This has to be last, for reasons I don't yet understand
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
|
@ -21,27 +25,32 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
|||
#include "partial_tweets/simdjson_dom.h"
|
||||
#include "partial_tweets/simdjson_ondemand.h"
|
||||
#include "partial_tweets/yyjson.h"
|
||||
#include "partial_tweets/sajson.h"
|
||||
#include "partial_tweets/rapidjson.h"
|
||||
|
||||
#include "large_random/simdjson_dom.h"
|
||||
#include "large_random/simdjson_ondemand.h"
|
||||
#include "large_random/simdjson_ondemand_unordered.h"
|
||||
#include "large_random/yyjson.h"
|
||||
#include "large_random/sajson.h"
|
||||
#include "large_random/rapidjson.h"
|
||||
|
||||
#include "kostya/simdjson_dom.h"
|
||||
#include "kostya/simdjson_ondemand.h"
|
||||
#include "kostya/yyjson.h"
|
||||
#include "kostya/sajson.h"
|
||||
#include "kostya/rapidjson.h"
|
||||
|
||||
#include "distinct_user_id/simdjson_dom.h"
|
||||
#include "distinct_user_id/simdjson_ondemand.h"
|
||||
#include "distinct_user_id/yyjson.h"
|
||||
#include "distinct_user_id/sajson.h"
|
||||
#include "distinct_user_id/rapidjson.h"
|
||||
|
||||
#include "find_tweet/simdjson_dom.h"
|
||||
#include "find_tweet/simdjson_ondemand.h"
|
||||
#include "find_tweet/yyjson.h"
|
||||
#include "find_tweet/sajson.h"
|
||||
#include "find_tweet/rapidjson.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
|
||||
namespace distinct_user_id {
|
||||
|
||||
struct sajson {
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||
return { val.as_cstring(), val.get_string_length() };
|
||||
}
|
||||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||
using namespace sajson;
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = parse(
|
||||
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != TYPE_OBJECT) { return false; }
|
||||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||
if (statuses.get_type() != TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||
auto tweet = statuses.get_array_element(i);
|
||||
|
||||
// get tweet.user.id
|
||||
if (tweet.get_type() != TYPE_OBJECT) { return false; }
|
||||
auto user = tweet.get_value_of_key({"user", strlen("user")});
|
||||
if (user.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.push_back(get_str_uint64(user, "id_str"));
|
||||
|
||||
// get tweet.retweeted_status.user.id
|
||||
auto retweet = tweet.get_value_of_key({"retweeted_status", strlen("retweeted_status")});
|
||||
switch (retweet.get_type()) {
|
||||
case TYPE_OBJECT: {
|
||||
auto retweet_user = retweet.get_value_of_key({"user", strlen("user")});
|
||||
if (retweet_user.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.push_back(get_str_uint64(retweet_user, "id_str"));
|
||||
break;
|
||||
}
|
||||
// TODO distinguish null and missing. null is bad. missing is fine.
|
||||
case TYPE_NULL:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, sajson)->UseManualTime();
|
||||
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -20,7 +20,7 @@ struct runner : public json_benchmark::file_runner<I> {
|
|||
}
|
||||
|
||||
bool run(benchmark::State &) {
|
||||
return this->implementation.run(this->json, 505874901689851900ULL, result);
|
||||
return this->implementation.run(this->json, 505874901689851904ULL, result);
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "find_tweet.h"
|
||||
|
||||
namespace find_tweet {
|
||||
|
||||
struct sajson {
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||
return { val.as_cstring(), val.get_string_length() };
|
||||
}
|
||||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) {
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = ::sajson::parse(
|
||||
::sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
::sajson::mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != ::sajson::TYPE_OBJECT) { printf("a\n"); return false; }
|
||||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||
auto tweet = statuses.get_array_element(i);
|
||||
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { printf("b\n"); return false; }
|
||||
// TODO if there is a way to get the raw string, it might be faster to iota find_id and then
|
||||
// compare it to each id_str, instead of parsing each int and comparing to find_id.
|
||||
if (get_str_uint64(tweet, "id_str") == find_id) {
|
||||
result = get_string_view(tweet, "text");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(find_tweet, sajson)->UseManualTime();
|
||||
|
||||
} // namespace find_tweet
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
struct sajson {
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
|
||||
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) {
|
||||
using namespace sajson;
|
||||
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
switch (val.get_type()) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_DOUBLE:
|
||||
return val.get_number_value();
|
||||
default:
|
||||
throw "field not double";
|
||||
}
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
using namespace sajson;
|
||||
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = parse(
|
||||
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != TYPE_OBJECT) { return false; }
|
||||
auto points = root.get_value_of_key({"coordinates", strlen("coordinates")});
|
||||
if (points.get_type() != TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<points.get_length(); i++) {
|
||||
auto point = points.get_array_element(i);
|
||||
if (point.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.emplace_back(kostya::point{
|
||||
get_double(point, "x"),
|
||||
get_double(point, "y"),
|
||||
get_double(point, "z")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(kostya, sajson)->UseManualTime();
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "large_random.h"
|
||||
|
||||
namespace large_random {
|
||||
|
||||
struct sajson {
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
|
||||
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) {
|
||||
using namespace sajson;
|
||||
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
switch (val.get_type()) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_DOUBLE:
|
||||
return val.get_number_value();
|
||||
default:
|
||||
throw "field not double";
|
||||
}
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<point> &result) {
|
||||
using namespace sajson;
|
||||
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = parse(
|
||||
bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto points = doc.get_root();
|
||||
if (points.get_type() != TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<points.get_length(); i++) {
|
||||
auto point = points.get_array_element(i);
|
||||
if (point.get_type() != TYPE_OBJECT) { return false; }
|
||||
result.emplace_back(large_random::point{
|
||||
get_double(point, "x"),
|
||||
get_double(point, "y"),
|
||||
get_double(point, "z")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(large_random, sajson)->UseManualTime();
|
||||
|
||||
} // namespace large_random
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_SAJSON
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
struct sajson {
|
||||
size_t ast_buffer_size{0};
|
||||
size_t *ast_buffer{nullptr};
|
||||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; }
|
||||
return { val.as_cstring(), val.get_string_length() };
|
||||
}
|
||||
simdjson_really_inline uint64_t get_uint52(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
switch (val.get_type()) {
|
||||
case ::sajson::TYPE_INTEGER: {
|
||||
int64_t result;
|
||||
if (!val.get_int53_value(&result) || result < 0) { throw "field is not uint52"; }
|
||||
return uint64_t(result);
|
||||
}
|
||||
default:
|
||||
throw "field not integer";
|
||||
}
|
||||
}
|
||||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that.
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
simdjson_really_inline uint64_t get_nullable_str_uint64(const ::sajson::value &obj, std::string_view key) {
|
||||
auto val = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (val.get_type() == ::sajson::TYPE_NULL) { return 0; }
|
||||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; }
|
||||
auto str = val.as_cstring();
|
||||
char *endptr;
|
||||
uint64_t result = strtoull(str, &endptr, 10);
|
||||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; }
|
||||
return result;
|
||||
}
|
||||
simdjson_really_inline partial_tweets::twitter_user get_user(const ::sajson::value &obj, std::string_view key) {
|
||||
auto user = obj.get_value_of_key({key.data(), key.length()});
|
||||
if (user.get_type() != ::sajson::TYPE_OBJECT) { throw "user is not an object"; }
|
||||
return { get_str_uint64(user, "id_str"), get_string_view(user, "screen_name") };
|
||||
}
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<tweet> &result) {
|
||||
if (!ast_buffer) {
|
||||
ast_buffer_size = json.size();
|
||||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t));
|
||||
}
|
||||
auto doc = ::sajson::parse(
|
||||
::sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
||||
::sajson::mutable_string_view(json.size(), json.data())
|
||||
);
|
||||
if (!doc.is_valid()) { return false; }
|
||||
|
||||
auto root = doc.get_root();
|
||||
if (root.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")});
|
||||
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; }
|
||||
|
||||
for (size_t i=0; i<statuses.get_length(); i++) {
|
||||
auto tweet = statuses.get_array_element(i);
|
||||
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { return false; }
|
||||
result.emplace_back(partial_tweets::tweet{
|
||||
get_string_view(tweet, "created_at"),
|
||||
get_str_uint64 (tweet, "id_str"),
|
||||
get_string_view(tweet, "text"),
|
||||
get_nullable_str_uint64(tweet, "in_reply_to_status_id_str"),
|
||||
get_user (tweet, "user"),
|
||||
get_uint52 (tweet, "retweet_count"),
|
||||
get_uint52 (tweet, "favorite_count")
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_TEMPLATE(partial_tweets, sajson)->UseManualTime();
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_SAJSON
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue