Add RapidJSON and nlohmann_json SAX to distinct_user_id benchmark (#1593)
* Add rapidjson_sax for distinct_user_id * Add nlohmann_json_sax.h for distinct_user_id * Add flags for RapidJSON. * Fix revisions. * Fix revisions again. * Replace strcpy with memcpy. Increase performance fix.
This commit is contained in:
parent
5d2eca2363
commit
73b510225f
|
@ -55,7 +55,9 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
|||
#include "distinct_user_id/yyjson.h"
|
||||
#include "distinct_user_id/sajson.h"
|
||||
#include "distinct_user_id/rapidjson.h"
|
||||
#include "distinct_user_id/rapidjson_sax.h"
|
||||
#include "distinct_user_id/nlohmann_json.h"
|
||||
#include "distinct_user_id/nlohmann_json_sax.h"
|
||||
|
||||
#include "find_tweet/simdjson_dom.h"
|
||||
#include "find_tweet/simdjson_ondemand.h"
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
#include <string.h>
|
||||
namespace distinct_user_id {
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
struct nlohmann_json_sax {
|
||||
struct Handler : json::json_sax_t
|
||||
{
|
||||
std::vector<uint64_t>& result;
|
||||
bool user = false;
|
||||
bool user_id = false;
|
||||
Handler(std::vector<uint64_t> &r) : result(r) { }
|
||||
|
||||
bool key(string_t& val) override {
|
||||
// Assume that valid user/id pairs appear only once in main array of user objects
|
||||
if (user) { // If already found user object, find id key
|
||||
if (val.compare("id") == 0) { user_id = true; }
|
||||
}
|
||||
else if (val.compare("user") == 0) { user = true; } // Otherwise, find user object
|
||||
return true;
|
||||
}
|
||||
bool number_unsigned(number_unsigned_t val) override {
|
||||
if (user_id) {
|
||||
result.emplace_back(val);
|
||||
user = false;
|
||||
user_id = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Irrelevant events
|
||||
bool null() override { return true; }
|
||||
bool boolean(bool val) override { return true; }
|
||||
bool number_float(number_float_t val, const string_t& s) override { return true; }
|
||||
bool number_integer(number_integer_t val) override { return true; }
|
||||
bool string(string_t& val) override { return true; }
|
||||
bool start_object(std::size_t elements) override { return true; }
|
||||
bool end_object() override { return true; }
|
||||
bool start_array(std::size_t elements) override { return true; }
|
||||
bool end_array() override { return true; }
|
||||
bool binary(json::binary_t& val) override { return true; }
|
||||
bool parse_error(std::size_t position, const std::string& last_token, const json::exception& ex) override { return false; }
|
||||
}; // Handler
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||
Handler handler(result);
|
||||
json::sax_parse(json.data(), &handler);
|
||||
|
||||
return true;
|
||||
}
|
||||
}; // nlohmann_json_sax
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, nlohmann_json_sax)->UseManualTime();
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_NLOHMANN_JSON
|
|
@ -0,0 +1,61 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef SIMDJSON_COMPETITION_RAPIDJSON
|
||||
|
||||
#include "distinct_user_id.h"
|
||||
#include <string.h>
|
||||
namespace distinct_user_id {
|
||||
|
||||
using namespace rapidjson;
|
||||
|
||||
struct rapidjson_sax {
|
||||
struct Handler {
|
||||
std::vector<uint64_t>& result;
|
||||
bool user = false;
|
||||
bool user_id = false;
|
||||
Handler(std::vector<uint64_t> &r) : result(r) { }
|
||||
|
||||
bool Key(const char* key, SizeType length, bool copy) {
|
||||
// Assume that valid user/id pairs appear only once in main array of user objects
|
||||
if (user) { // If already found user object, find id key
|
||||
if ((length == 2) && memcmp(key,"id",2) == 0) { user_id = true; }
|
||||
}
|
||||
else if ((length == 4) && memcmp(key,"user",4) == 0) { user = true; } // Otherwise, find user object
|
||||
return true;
|
||||
}
|
||||
bool Uint(unsigned i) { // id values are treated as Uint (not Uint64) by the reader
|
||||
if (user_id) { // Getting id if previous key was "id" for a user
|
||||
result.emplace_back(i);
|
||||
user_id = false;
|
||||
user = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Irrelevant events
|
||||
bool Null() { return true; }
|
||||
bool Bool(bool b) { return true; }
|
||||
bool Double(double d) { return true; }
|
||||
bool Int(int i) { return true; }
|
||||
bool Int64(int64_t i) { return true; }
|
||||
bool Uint64(uint64_t i) { return true; }
|
||||
bool RawNumber(const char* str, SizeType length, bool copy) { return true; }
|
||||
bool String(const char* str, SizeType length, bool copy) { return true; }
|
||||
bool StartObject() { return true; }
|
||||
bool EndObject(SizeType memberCount) { return true; }
|
||||
bool StartArray() { return true; }
|
||||
bool EndArray(SizeType elementCount) { return true; }
|
||||
}; // handler
|
||||
|
||||
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) {
|
||||
Reader reader;
|
||||
Handler handler(result);
|
||||
InsituStringStream ss(json.data());
|
||||
reader.Parse<kParseInsituFlag | kParseValidateEncodingFlag | kParseFullPrecisionFlag>(ss,handler);
|
||||
return true;
|
||||
}
|
||||
|
||||
}; // rapid_jason_sax
|
||||
BENCHMARK_TEMPLATE(distinct_user_id, rapidjson_sax)->UseManualTime();
|
||||
} // namespace distinct_user_id
|
||||
|
||||
#endif // SIMDJSON_COMPETITION_RAPIDJSON
|
Loading…
Reference in New Issue