From 66db102c7061c14a87934cf235d46e3ace99ce59 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Wed, 6 Jan 2021 19:58:38 -0800 Subject: [PATCH] Use imprecise double comparison for sajson --- benchmark/distinct_user_id/distinct_user_id.h | 14 ++-- benchmark/find_tweet/find_tweet.h | 12 ++-- benchmark/json_benchmark/diff_results.h | 66 ++++++++++++------- benchmark/json_benchmark/point.h | 26 ++++++++ benchmark/kostya/kostya.h | 32 +++------ benchmark/kostya/nlohmann_json.h | 4 +- benchmark/kostya/rapidjson.h | 4 +- benchmark/kostya/sajson.h | 4 +- benchmark/kostya/simdjson_dom.h | 4 +- benchmark/kostya/simdjson_ondemand.h | 4 +- benchmark/kostya/yyjson.h | 4 +- benchmark/large_random/large_random.h | 26 ++------ benchmark/large_random/nlohmann_json.h | 4 +- benchmark/large_random/rapidjson.h | 4 +- benchmark/large_random/sajson.h | 4 +- benchmark/large_random/simdjson_dom.h | 4 +- benchmark/large_random/simdjson_ondemand.h | 4 +- .../simdjson_ondemand_unordered.h | 4 +- benchmark/large_random/yyjson.h | 4 +- benchmark/partial_tweets/partial_tweets.h | 12 ++-- 20 files changed, 145 insertions(+), 95 deletions(-) create mode 100644 benchmark/json_benchmark/point.h diff --git a/benchmark/distinct_user_id/distinct_user_id.h b/benchmark/distinct_user_id/distinct_user_id.h index faacdf84..029e9978 100644 --- a/benchmark/distinct_user_id/distinct_user_id.h +++ b/benchmark/distinct_user_id/distinct_user_id.h @@ -6,16 +6,18 @@ namespace distinct_user_id { +using namespace json_benchmark; + template -struct runner : public json_benchmark::file_runner { +struct runner : public file_runner { std::vector result{}; bool setup(benchmark::State &state) { - return this->load_json(state, json_benchmark::TWITTER_JSON); + return this->load_json(state, TWITTER_JSON); } bool before_run(benchmark::State &state) { - if (!json_benchmark::file_runner::before_run(state)) { return false; } + if (!file_runner::before_run(state)) { return false; } result.clear(); return true; } @@ -25,7 +27,7 @@ struct runner : public json_benchmark::file_runner { } bool after_run(benchmark::State &state) { - if (!json_benchmark::file_runner::after_run(state)) { return false; } + if (!file_runner::after_run(state)) { return false; } std::sort(result.begin(), result.end()); auto last = std::unique(result.begin(), result.end()); result.erase(last, result.end()); @@ -34,7 +36,7 @@ struct runner : public json_benchmark::file_runner { template bool diff(benchmark::State &state, runner &reference) { - return json_benchmark::diff_results(state, result, reference.result); + return diff_results(state, result, reference.result, diff_flags::NONE); } size_t items_per_iteration() { @@ -45,7 +47,7 @@ struct runner : public json_benchmark::file_runner { struct simdjson_dom; template simdjson_really_inline static void distinct_user_id(benchmark::State &state) { - json_benchmark::run_json_benchmark, runner>(state); + run_json_benchmark, runner>(state); } } // namespace distinct_user_id diff --git a/benchmark/find_tweet/find_tweet.h b/benchmark/find_tweet/find_tweet.h index 0679a0ca..39590cb7 100644 --- a/benchmark/find_tweet/find_tweet.h +++ b/benchmark/find_tweet/find_tweet.h @@ -5,16 +5,18 @@ namespace find_tweet { +using namespace json_benchmark; + template -struct runner : public json_benchmark::file_runner { +struct runner : public file_runner { typename I::StringType result; bool setup(benchmark::State &state) { - return this->load_json(state, json_benchmark::TWITTER_JSON); + return this->load_json(state, TWITTER_JSON); } bool before_run(benchmark::State &state) { - if (!json_benchmark::file_runner::before_run(state)) { return false; } + if (!file_runner::before_run(state)) { return false; } result = ""; return true; } @@ -25,14 +27,14 @@ struct runner : public json_benchmark::file_runner { template bool diff(benchmark::State &state, runner &reference) { - return json_benchmark::diff_results(state, result, reference.result); + return diff_results(state, result, reference.result, diff_flags::NONE); } }; struct simdjson_dom; template simdjson_really_inline static void find_tweet(benchmark::State &state) { - json_benchmark::run_json_benchmark, runner>(state); + run_json_benchmark, runner>(state); } } // namespace find_tweet diff --git a/benchmark/json_benchmark/diff_results.h b/benchmark/json_benchmark/diff_results.h index 2bc02195..749bdfa2 100644 --- a/benchmark/json_benchmark/diff_results.h +++ b/benchmark/json_benchmark/diff_results.h @@ -6,12 +6,17 @@ namespace json_benchmark { +enum class diff_flags { + NONE = 0, + IMPRECISE_FLOATS = 1 +}; + template -static bool diff_results(benchmark::State &state, const T &result, const U &reference); +static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags); template struct result_differ { - static bool diff(benchmark::State &state, const T &result, const U &reference) { + static bool diff(benchmark::State &state, const T &result, const U &reference, diff_flags flags) { if (result != reference) { std::stringstream str; str << "result incorrect: " << result << " ... reference: " << reference; @@ -22,30 +27,13 @@ struct result_differ { } }; -template<> -bool result_differ::diff(benchmark::State &state, const double &result, const double &reference) { - if (result != reference) { - std::stringstream str; - // We print it out using full precision. - constexpr auto precision = std::numeric_limits::max_digits10; - str << std::setprecision(precision); - str << "incorrect double result: " << std::endl; - str << " result: " << std::left << std::setw(precision+2) << result << " (hexfloat " << std::hexfloat << result << ")" << std::defaultfloat << std::endl; - str << "reference: " << std::left << std::setw(precision+2) << reference << " (hexfloat " << std::hexfloat << reference << ")" << std::defaultfloat << std::endl; - state.SkipWithError(str.str().data()); - return false; - } - return true; -} - - template struct result_differ, std::vector> { - static bool diff(benchmark::State &state, const std::vector &result, const std::vector &reference) { + static bool diff(benchmark::State &state, const std::vector &result, const std::vector &reference, diff_flags flags) { auto result_iter = result.begin(); auto reference_iter = reference.begin(); while (result_iter != result.end() && reference_iter != reference.end()) { - if (!diff_results(state, *result_iter, *reference_iter)) { return false; } + if (!diff_results(state, *result_iter, *reference_iter, flags)) { return false; } result_iter++; reference_iter++; } @@ -64,9 +52,41 @@ struct result_differ, std::vector> { } }; +template<> +struct result_differ { + static bool diff(benchmark::State &state, const double &result, const double &reference, diff_flags flags) { + bool different; + if (int(flags) & int(diff_flags::IMPRECISE_FLOATS)) { + different = f64_ulp_dist(result, reference) > 1; + } else { + different = result != reference; + } + if (different) { + std::stringstream str; + // We print it out using full precision. + constexpr auto precision = std::numeric_limits::max_digits10; + str << std::setprecision(precision); + str << "incorrect double result: " << std::endl; + str << " result: " << std::left << std::setw(precision+2) << result << " (hexfloat " << std::hexfloat << result << ")" << std::defaultfloat << std::endl; + str << "reference: " << std::left << std::setw(precision+2) << reference << " (hexfloat " << std::hexfloat << reference << ")" << std::defaultfloat << std::endl; + state.SkipWithError(str.str().data()); + } + return true; + } + + static uint64_t f64_ulp_dist(double a, double b) { + uint64_t ua, ub; + std::memcpy(&ua, &a, sizeof(ua)); + std::memcpy(&ub, &b, sizeof(ub)); + if ((int64_t)(ub ^ ua) >= 0) + return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua); + return ua + ub + 0x80000000; + } +}; + template -static bool diff_results(benchmark::State &state, const T &result, const U &reference) { - return result_differ::diff(state, result, reference); +static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags) { + return result_differ::diff(state, result, reference, flags); } } // namespace json_benchmark diff --git a/benchmark/json_benchmark/point.h b/benchmark/json_benchmark/point.h new file mode 100644 index 00000000..579c6949 --- /dev/null +++ b/benchmark/json_benchmark/point.h @@ -0,0 +1,26 @@ +#pragma once + +#include "diff_results.h" + +namespace json_benchmark { + +struct point { + double x; + double y; + double z; +}; + +template<> +struct result_differ { + static bool diff(benchmark::State &state, const point &result, const point &reference, diff_flags flags) { + return diff_results(state, result.x, reference.x, flags) + && diff_results(state, result.y, reference.y, flags) + && diff_results(state, result.z, reference.z, flags); + } +}; + +static simdjson_unused std::ostream &operator<<(std::ostream &o, const point &p) { + return o << p.x << "," << p.y << "," << p.z << std::endl; +} + +} // namespace json_benchmark diff --git a/benchmark/kostya/kostya.h b/benchmark/kostya/kostya.h index c7c9de30..6776f6df 100644 --- a/benchmark/kostya/kostya.h +++ b/benchmark/kostya/kostya.h @@ -1,31 +1,24 @@ #pragma once #include "json_benchmark/string_runner.h" +#include "json_benchmark/point.h" #include #include namespace kostya { +using namespace json_benchmark; + static const simdjson::padded_string &get_built_json_array(); -struct point { - double x; - double y; - double z; -}; - -simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) { - return o << p.x << "," << p.y << "," << p.z << std::endl; -} - template -struct runner : public json_benchmark::string_runner { +struct runner : public string_runner { std::vector result; - runner() : json_benchmark::string_runner(get_built_json_array()) {} + runner() : string_runner(get_built_json_array()) {} bool before_run(benchmark::State &state) { - if (!json_benchmark::string_runner::before_run(state)) { return false; } + if (!string_runner::before_run(state)) { return false; } result.clear(); return true; } @@ -36,7 +29,7 @@ struct runner : public json_benchmark::string_runner { template bool diff(benchmark::State &state, runner &reference) { - return json_benchmark::diff_results(state, result, reference.result); + return diff_results(state, result, reference.result, I::DiffFlags); } size_t items_per_iteration() { @@ -87,16 +80,7 @@ static const simdjson::padded_string &get_built_json_array() { struct simdjson_dom; template simdjson_really_inline static void kostya(benchmark::State &state) { - json_benchmark::run_json_benchmark, runner>(state); + run_json_benchmark, runner>(state); } } // namespace kostya - -namespace json_benchmark { - template<> - bool result_differ::diff(benchmark::State &state, const kostya::point &result, const kostya::point &reference) { - return diff_results(state, result.x, reference.x) - && diff_results(state, result.y, reference.y) - && diff_results(state, result.z, reference.z); - } -} diff --git a/benchmark/kostya/nlohmann_json.h b/benchmark/kostya/nlohmann_json.h index e9661d13..9552edca 100644 --- a/benchmark/kostya/nlohmann_json.h +++ b/benchmark/kostya/nlohmann_json.h @@ -7,10 +7,12 @@ namespace kostya { struct nlohmann_json { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + bool run(simdjson::padded_string &json, std::vector &result) { auto root = nlohmann::json::parse(json.data(), json.data() + json.size()); for (auto point : root["coordinates"]) { - result.emplace_back(kostya::point{point["x"], point["y"], point["z"]}); + result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]}); } return true; } diff --git a/benchmark/kostya/rapidjson.h b/benchmark/kostya/rapidjson.h index 34d4c7ba..09bec083 100644 --- a/benchmark/kostya/rapidjson.h +++ b/benchmark/kostya/rapidjson.h @@ -9,6 +9,8 @@ namespace kostya { using namespace rapidjson; struct rapidjson_base { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + Document doc; simdjson_really_inline double get_double(Value &object, std::string_view key) { @@ -26,7 +28,7 @@ struct rapidjson_base { if (!coords->value.IsArray()) { return false; } for (auto &coord : coords->value.GetArray()) { if (!coord.IsObject()) { return false; } - result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); + result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); } return true; diff --git a/benchmark/kostya/sajson.h b/benchmark/kostya/sajson.h index 4773d6e4..3600badf 100644 --- a/benchmark/kostya/sajson.h +++ b/benchmark/kostya/sajson.h @@ -7,6 +7,8 @@ namespace kostya { struct sajson { + static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS; + size_t ast_buffer_size{0}; size_t *ast_buffer{nullptr}; @@ -44,7 +46,7 @@ struct sajson { for (size_t i=0; i &result) { for (auto point : parser.parse(json)["coordinates"]) { - result.emplace_back(kostya::point{point["x"], point["y"], point["z"]}); + result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]}); } return true; } diff --git a/benchmark/kostya/simdjson_ondemand.h b/benchmark/kostya/simdjson_ondemand.h index c7df0973..7f8c544e 100644 --- a/benchmark/kostya/simdjson_ondemand.h +++ b/benchmark/kostya/simdjson_ondemand.h @@ -10,12 +10,14 @@ using namespace simdjson; using namespace simdjson::builtin; struct simdjson_ondemand { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + ondemand::parser parser{}; bool run(simdjson::padded_string &json, std::vector &result) { auto doc = parser.iterate(json); for (ondemand::object point : doc.find_field("coordinates")) { - result.emplace_back(kostya::point{point.find_field("x"), point.find_field("y"), point.find_field("z")}); + result.emplace_back(json_benchmark::point{point.find_field("x"), point.find_field("y"), point.find_field("z")}); } return true; } diff --git a/benchmark/kostya/yyjson.h b/benchmark/kostya/yyjson.h index 20dfb5cc..d98dd39d 100644 --- a/benchmark/kostya/yyjson.h +++ b/benchmark/kostya/yyjson.h @@ -7,6 +7,8 @@ namespace kostya { struct yyjson_base { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) { yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length()); if (!val) { throw "missing point field!"; } @@ -35,7 +37,7 @@ struct yyjson_base { yyjson_val *coord; yyjson_arr_foreach(coords, idx, max, coord) { if (!yyjson_is_obj(coord)) { return false; } - result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); + result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); } return true; diff --git a/benchmark/large_random/large_random.h b/benchmark/large_random/large_random.h index 99c2d26f..039884f4 100644 --- a/benchmark/large_random/large_random.h +++ b/benchmark/large_random/large_random.h @@ -1,30 +1,27 @@ #pragma once #include "json_benchmark/string_runner.h" +#include "json_benchmark/point.h" #include namespace large_random { static const simdjson::padded_string &get_built_json_array(); -struct point { - double x; - double y; - double z; -}; +using namespace json_benchmark; simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) { return o << p.x << "," << p.y << "," << p.z << std::endl; } template -struct runner : public json_benchmark::string_runner { +struct runner : public string_runner { std::vector result; - runner() : json_benchmark::string_runner(get_built_json_array()) {} + runner() : string_runner(get_built_json_array()) {} bool before_run(benchmark::State &state) { - if (!json_benchmark::string_runner::before_run(state)) { return false; } + if (!string_runner::before_run(state)) { return false; } result.clear(); return true; } @@ -35,7 +32,7 @@ struct runner : public json_benchmark::string_runner { template bool diff(benchmark::State &state, runner &reference) { - return json_benchmark::diff_results(state, result, reference.result); + return diff_results(state, result, reference.result, I::DiffFlags); } size_t items_per_iteration() { @@ -70,16 +67,7 @@ static const simdjson::padded_string &get_built_json_array() { struct simdjson_dom; template static void large_random(benchmark::State &state) { - json_benchmark::run_json_benchmark, runner>(state); + run_json_benchmark, runner>(state); } } // namespace large_random - -namespace json_benchmark { - template<> - bool result_differ::diff(benchmark::State &state, const large_random::point &result, const large_random::point &reference) { - return diff_results(state, result.x, reference.x) - && diff_results(state, result.y, reference.y) - && diff_results(state, result.z, reference.z); - } -} diff --git a/benchmark/large_random/nlohmann_json.h b/benchmark/large_random/nlohmann_json.h index cf9fe030..754a53f6 100644 --- a/benchmark/large_random/nlohmann_json.h +++ b/benchmark/large_random/nlohmann_json.h @@ -7,9 +7,11 @@ namespace large_random { struct nlohmann_json { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + bool run(simdjson::padded_string &json, std::vector &result) { for (auto point : nlohmann::json::parse(json.data(), json.data() + json.size())) { - result.emplace_back(large_random::point{point["x"], point["y"], point["z"]}); + result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]}); } return true; } diff --git a/benchmark/large_random/rapidjson.h b/benchmark/large_random/rapidjson.h index 47fb5885..5f667bd3 100644 --- a/benchmark/large_random/rapidjson.h +++ b/benchmark/large_random/rapidjson.h @@ -9,6 +9,8 @@ namespace large_random { using namespace rapidjson; struct rapidjson_base { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + Document doc; simdjson_really_inline double get_double(Value &object, std::string_view key) { @@ -23,7 +25,7 @@ struct rapidjson_base { if (!coords.IsArray()) { return false; } for (auto &coord : coords.GetArray()) { if (!coord.IsObject()) { return false; } - result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); + result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); } return true; diff --git a/benchmark/large_random/sajson.h b/benchmark/large_random/sajson.h index 94f7dfff..5aad5b8f 100644 --- a/benchmark/large_random/sajson.h +++ b/benchmark/large_random/sajson.h @@ -7,6 +7,8 @@ namespace large_random { struct sajson { + static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS; + size_t ast_buffer_size{0}; size_t *ast_buffer{nullptr}; @@ -42,7 +44,7 @@ struct sajson { for (size_t i=0; i &result) { for (auto point : parser.parse(json)) { - result.emplace_back(large_random::point{point["x"], point["y"], point["z"]}); + result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]}); } return true; } diff --git a/benchmark/large_random/simdjson_ondemand.h b/benchmark/large_random/simdjson_ondemand.h index 50d11cb9..acfafa01 100644 --- a/benchmark/large_random/simdjson_ondemand.h +++ b/benchmark/large_random/simdjson_ondemand.h @@ -10,12 +10,14 @@ using namespace simdjson; using namespace simdjson::builtin; struct simdjson_ondemand { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + ondemand::parser parser{}; bool run(simdjson::padded_string &json, std::vector &result) { auto doc = parser.iterate(json); for (ondemand::object coord : doc) { - result.emplace_back(point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")}); + result.emplace_back(json_benchmark::point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")}); } return true; } diff --git a/benchmark/large_random/simdjson_ondemand_unordered.h b/benchmark/large_random/simdjson_ondemand_unordered.h index e7b72bba..fd0d9c97 100644 --- a/benchmark/large_random/simdjson_ondemand_unordered.h +++ b/benchmark/large_random/simdjson_ondemand_unordered.h @@ -10,12 +10,14 @@ using namespace simdjson; using namespace simdjson::builtin; struct simdjson_ondemand_unordered { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + ondemand::parser parser{}; bool run(simdjson::padded_string &json, std::vector &result) { auto doc = parser.iterate(json); for (ondemand::object coord : doc) { - result.emplace_back(large_random::point{coord["x"], coord["y"], coord["z"]}); + result.emplace_back(json_benchmark::point{coord["x"], coord["y"], coord["z"]}); } return true; } diff --git a/benchmark/large_random/yyjson.h b/benchmark/large_random/yyjson.h index 72fadfdb..444104d4 100644 --- a/benchmark/large_random/yyjson.h +++ b/benchmark/large_random/yyjson.h @@ -7,6 +7,8 @@ namespace large_random { struct yyjson_base { + static constexpr diff_flags DiffFlags = diff_flags::NONE; + simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) { yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length()); if (!val) { throw "missing point field!"; } @@ -34,7 +36,7 @@ struct yyjson_base { yyjson_val *coord; yyjson_arr_foreach(coords, idx, max, coord) { if (!yyjson_is_obj(coord)) { return false; } - result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); + result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")}); } return true; diff --git a/benchmark/partial_tweets/partial_tweets.h b/benchmark/partial_tweets/partial_tweets.h index ebb63178..3912184f 100644 --- a/benchmark/partial_tweets/partial_tweets.h +++ b/benchmark/partial_tweets/partial_tweets.h @@ -7,16 +7,18 @@ namespace partial_tweets { +using namespace json_benchmark; + template -struct runner : public json_benchmark::file_runner { +struct runner : public file_runner { std::vector> result{}; bool setup(benchmark::State &state) { - return this->load_json(state, json_benchmark::TWITTER_JSON); + return this->load_json(state, TWITTER_JSON); } bool before_run(benchmark::State &state) { - if (!json_benchmark::file_runner::before_run(state)) { return false; } + if (!file_runner::before_run(state)) { return false; } result.clear(); return true; } @@ -27,7 +29,7 @@ struct runner : public json_benchmark::file_runner { template bool diff(benchmark::State &state, runner &reference) { - return json_benchmark::diff_results(state, result, reference.result); + return diff_results(state, result, reference.result, diff_flags::NONE); } size_t items_per_iteration() { @@ -38,7 +40,7 @@ struct runner : public json_benchmark::file_runner { struct simdjson_dom; template simdjson_really_inline static void partial_tweets(benchmark::State &state) { - json_benchmark::run_json_benchmark, runner>(state); + run_json_benchmark, runner>(state); } } // namespace partial_tweets