Use imprecise double comparison for sajson

This commit is contained in:
John Keiser 2021-01-06 19:58:38 -08:00
parent ab859f7952
commit 66db102c70
20 changed files with 145 additions and 95 deletions

View File

@ -6,16 +6,18 @@
namespace distinct_user_id {
using namespace json_benchmark;
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
struct runner : public file_runner<I> {
std::vector<uint64_t> result{};
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);
return this->load_json(state, TWITTER_JSON);
}
bool before_run(benchmark::State &state) {
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
if (!file_runner<I>::before_run(state)) { return false; }
result.clear();
return true;
}
@ -25,7 +27,7 @@ struct runner : public json_benchmark::file_runner<I> {
}
bool after_run(benchmark::State &state) {
if (!json_benchmark::file_runner<I>::after_run(state)) { return false; }
if (!file_runner<I>::after_run(state)) { return false; }
std::sort(result.begin(), result.end());
auto last = std::unique(result.begin(), result.end());
result.erase(last, result.end());
@ -34,7 +36,7 @@ struct runner : public json_benchmark::file_runner<I> {
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return json_benchmark::diff_results(state, result, reference.result);
return diff_results(state, result, reference.result, diff_flags::NONE);
}
size_t items_per_iteration() {
@ -45,7 +47,7 @@ struct runner : public json_benchmark::file_runner<I> {
struct simdjson_dom;
template<typename I> simdjson_really_inline static void distinct_user_id(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace distinct_user_id

View File

@ -5,16 +5,18 @@
namespace find_tweet {
using namespace json_benchmark;
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
struct runner : public file_runner<I> {
typename I::StringType result;
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);
return this->load_json(state, TWITTER_JSON);
}
bool before_run(benchmark::State &state) {
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
if (!file_runner<I>::before_run(state)) { return false; }
result = "";
return true;
}
@ -25,14 +27,14 @@ struct runner : public json_benchmark::file_runner<I> {
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return json_benchmark::diff_results(state, result, reference.result);
return diff_results(state, result, reference.result, diff_flags::NONE);
}
};
struct simdjson_dom;
template<typename I> simdjson_really_inline static void find_tweet(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace find_tweet

View File

@ -6,12 +6,17 @@
namespace json_benchmark {
enum class diff_flags {
NONE = 0,
IMPRECISE_FLOATS = 1
};
template<typename T, typename U>
static bool diff_results(benchmark::State &state, const T &result, const U &reference);
static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags);
template<typename T, typename U>
struct result_differ {
static bool diff(benchmark::State &state, const T &result, const U &reference) {
static bool diff(benchmark::State &state, const T &result, const U &reference, diff_flags flags) {
if (result != reference) {
std::stringstream str;
str << "result incorrect: " << result << " ... reference: " << reference;
@ -22,30 +27,13 @@ struct result_differ {
}
};
template<>
bool result_differ<double, double>::diff(benchmark::State &state, const double &result, const double &reference) {
if (result != reference) {
std::stringstream str;
// We print it out using full precision.
constexpr auto precision = std::numeric_limits<double>::max_digits10;
str << std::setprecision(precision);
str << "incorrect double result: " << std::endl;
str << " result: " << std::left << std::setw(precision+2) << result << " (hexfloat " << std::hexfloat << result << ")" << std::defaultfloat << std::endl;
str << "reference: " << std::left << std::setw(precision+2) << reference << " (hexfloat " << std::hexfloat << reference << ")" << std::defaultfloat << std::endl;
state.SkipWithError(str.str().data());
return false;
}
return true;
}
template<typename T, typename U>
struct result_differ<std::vector<T>, std::vector<U>> {
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<U> &reference) {
static bool diff(benchmark::State &state, const std::vector<T> &result, const std::vector<U> &reference, diff_flags flags) {
auto result_iter = result.begin();
auto reference_iter = reference.begin();
while (result_iter != result.end() && reference_iter != reference.end()) {
if (!diff_results(state, *result_iter, *reference_iter)) { return false; }
if (!diff_results(state, *result_iter, *reference_iter, flags)) { return false; }
result_iter++;
reference_iter++;
}
@ -64,9 +52,41 @@ struct result_differ<std::vector<T>, std::vector<U>> {
}
};
template<>
struct result_differ<double, double> {
static bool diff(benchmark::State &state, const double &result, const double &reference, diff_flags flags) {
bool different;
if (int(flags) & int(diff_flags::IMPRECISE_FLOATS)) {
different = f64_ulp_dist(result, reference) > 1;
} else {
different = result != reference;
}
if (different) {
std::stringstream str;
// We print it out using full precision.
constexpr auto precision = std::numeric_limits<double>::max_digits10;
str << std::setprecision(precision);
str << "incorrect double result: " << std::endl;
str << " result: " << std::left << std::setw(precision+2) << result << " (hexfloat " << std::hexfloat << result << ")" << std::defaultfloat << std::endl;
str << "reference: " << std::left << std::setw(precision+2) << reference << " (hexfloat " << std::hexfloat << reference << ")" << std::defaultfloat << std::endl;
state.SkipWithError(str.str().data());
}
return true;
}
static uint64_t f64_ulp_dist(double a, double b) {
uint64_t ua, ub;
std::memcpy(&ua, &a, sizeof(ua));
std::memcpy(&ub, &b, sizeof(ub));
if ((int64_t)(ub ^ ua) >= 0)
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
return ua + ub + 0x80000000;
}
};
template<typename T, typename U>
static bool diff_results(benchmark::State &state, const T &result, const U &reference) {
return result_differ<T, U>::diff(state, result, reference);
static bool diff_results(benchmark::State &state, const T &result, const U &reference, diff_flags flags) {
return result_differ<T, U>::diff(state, result, reference, flags);
}
} // namespace json_benchmark

View File

@ -0,0 +1,26 @@
#pragma once
#include "diff_results.h"
namespace json_benchmark {
struct point {
double x;
double y;
double z;
};
template<>
struct result_differ<point, point> {
static bool diff(benchmark::State &state, const point &result, const point &reference, diff_flags flags) {
return diff_results(state, result.x, reference.x, flags)
&& diff_results(state, result.y, reference.y, flags)
&& diff_results(state, result.z, reference.z, flags);
}
};
static simdjson_unused std::ostream &operator<<(std::ostream &o, const point &p) {
return o << p.x << "," << p.y << "," << p.z << std::endl;
}
} // namespace json_benchmark

View File

@ -1,31 +1,24 @@
#pragma once
#include "json_benchmark/string_runner.h"
#include "json_benchmark/point.h"
#include <vector>
#include <random>
namespace kostya {
using namespace json_benchmark;
static const simdjson::padded_string &get_built_json_array();
struct point {
double x;
double y;
double z;
};
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
return o << p.x << "," << p.y << "," << p.z << std::endl;
}
template<typename I>
struct runner : public json_benchmark::string_runner<I> {
struct runner : public string_runner<I> {
std::vector<point> result;
runner() : json_benchmark::string_runner<I>(get_built_json_array()) {}
runner() : string_runner<I>(get_built_json_array()) {}
bool before_run(benchmark::State &state) {
if (!json_benchmark::string_runner<I>::before_run(state)) { return false; }
if (!string_runner<I>::before_run(state)) { return false; }
result.clear();
return true;
}
@ -36,7 +29,7 @@ struct runner : public json_benchmark::string_runner<I> {
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return json_benchmark::diff_results(state, result, reference.result);
return diff_results(state, result, reference.result, I::DiffFlags);
}
size_t items_per_iteration() {
@ -87,16 +80,7 @@ static const simdjson::padded_string &get_built_json_array() {
struct simdjson_dom;
template<typename I> simdjson_really_inline static void kostya(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace kostya
namespace json_benchmark {
template<>
bool result_differ<kostya::point, kostya::point>::diff(benchmark::State &state, const kostya::point &result, const kostya::point &reference) {
return diff_results(state, result.x, reference.x)
&& diff_results(state, result.y, reference.y)
&& diff_results(state, result.z, reference.z);
}
}

View File

@ -7,10 +7,12 @@
namespace kostya {
struct nlohmann_json {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
bool run(simdjson::padded_string &json, std::vector<point> &result) {
auto root = nlohmann::json::parse(json.data(), json.data() + json.size());
for (auto point : root["coordinates"]) {
result.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
}
return true;
}

View File

@ -9,6 +9,8 @@ namespace kostya {
using namespace rapidjson;
struct rapidjson_base {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
Document doc;
simdjson_really_inline double get_double(Value &object, std::string_view key) {
@ -26,7 +28,7 @@ struct rapidjson_base {
if (!coords->value.IsArray()) { return false; }
for (auto &coord : coords->value.GetArray()) {
if (!coord.IsObject()) { return false; }
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;

View File

@ -7,6 +7,8 @@
namespace kostya {
struct sajson {
static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS;
size_t ast_buffer_size{0};
size_t *ast_buffer{nullptr};
@ -44,7 +46,7 @@ struct sajson {
for (size_t i=0; i<points.get_length(); i++) {
auto point = points.get_array_element(i);
if (point.get_type() != TYPE_OBJECT) { return false; }
result.emplace_back(kostya::point{
result.emplace_back(json_benchmark::point{
get_double(point, "x"),
get_double(point, "y"),
get_double(point, "z")

View File

@ -9,11 +9,13 @@ namespace kostya {
using namespace simdjson;
struct simdjson_dom {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
dom::parser parser{};
bool run(simdjson::padded_string &json, std::vector<point> &result) {
for (auto point : parser.parse(json)["coordinates"]) {
result.emplace_back(kostya::point{point["x"], point["y"], point["z"]});
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
}
return true;
}

View File

@ -10,12 +10,14 @@ using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
ondemand::parser parser{};
bool run(simdjson::padded_string &json, std::vector<point> &result) {
auto doc = parser.iterate(json);
for (ondemand::object point : doc.find_field("coordinates")) {
result.emplace_back(kostya::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
result.emplace_back(json_benchmark::point{point.find_field("x"), point.find_field("y"), point.find_field("z")});
}
return true;
}

View File

@ -7,6 +7,8 @@
namespace kostya {
struct yyjson_base {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
if (!val) { throw "missing point field!"; }
@ -35,7 +37,7 @@ struct yyjson_base {
yyjson_val *coord;
yyjson_arr_foreach(coords, idx, max, coord) {
if (!yyjson_is_obj(coord)) { return false; }
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;

View File

@ -1,30 +1,27 @@
#pragma once
#include "json_benchmark/string_runner.h"
#include "json_benchmark/point.h"
#include <random>
namespace large_random {
static const simdjson::padded_string &get_built_json_array();
struct point {
double x;
double y;
double z;
};
using namespace json_benchmark;
simdjson_unused static std::ostream &operator<<(std::ostream &o, const point &p) {
return o << p.x << "," << p.y << "," << p.z << std::endl;
}
template<typename I>
struct runner : public json_benchmark::string_runner<I> {
struct runner : public string_runner<I> {
std::vector<point> result;
runner() : json_benchmark::string_runner<I>(get_built_json_array()) {}
runner() : string_runner<I>(get_built_json_array()) {}
bool before_run(benchmark::State &state) {
if (!json_benchmark::string_runner<I>::before_run(state)) { return false; }
if (!string_runner<I>::before_run(state)) { return false; }
result.clear();
return true;
}
@ -35,7 +32,7 @@ struct runner : public json_benchmark::string_runner<I> {
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return json_benchmark::diff_results(state, result, reference.result);
return diff_results(state, result, reference.result, I::DiffFlags);
}
size_t items_per_iteration() {
@ -70,16 +67,7 @@ static const simdjson::padded_string &get_built_json_array() {
struct simdjson_dom;
template<typename T> static void large_random(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
run_json_benchmark<runner<T>, runner<simdjson_dom>>(state);
}
} // namespace large_random
namespace json_benchmark {
template<>
bool result_differ<large_random::point, large_random::point>::diff(benchmark::State &state, const large_random::point &result, const large_random::point &reference) {
return diff_results(state, result.x, reference.x)
&& diff_results(state, result.y, reference.y)
&& diff_results(state, result.z, reference.z);
}
}

View File

@ -7,9 +7,11 @@
namespace large_random {
struct nlohmann_json {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
bool run(simdjson::padded_string &json, std::vector<point> &result) {
for (auto point : nlohmann::json::parse(json.data(), json.data() + json.size())) {
result.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
}
return true;
}

View File

@ -9,6 +9,8 @@ namespace large_random {
using namespace rapidjson;
struct rapidjson_base {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
Document doc;
simdjson_really_inline double get_double(Value &object, std::string_view key) {
@ -23,7 +25,7 @@ struct rapidjson_base {
if (!coords.IsArray()) { return false; }
for (auto &coord : coords.GetArray()) {
if (!coord.IsObject()) { return false; }
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;

View File

@ -7,6 +7,8 @@
namespace large_random {
struct sajson {
static constexpr diff_flags DiffFlags = diff_flags::IMPRECISE_FLOATS;
size_t ast_buffer_size{0};
size_t *ast_buffer{nullptr};
@ -42,7 +44,7 @@ struct sajson {
for (size_t i=0; i<points.get_length(); i++) {
auto point = points.get_array_element(i);
if (point.get_type() != TYPE_OBJECT) { return false; }
result.emplace_back(large_random::point{
result.emplace_back(json_benchmark::point{
get_double(point, "x"),
get_double(point, "y"),
get_double(point, "z")

View File

@ -9,11 +9,13 @@ namespace large_random {
using namespace simdjson;
struct simdjson_dom {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
dom::parser parser{};
bool run(simdjson::padded_string &json, std::vector<point> &result) {
for (auto point : parser.parse(json)) {
result.emplace_back(large_random::point{point["x"], point["y"], point["z"]});
result.emplace_back(json_benchmark::point{point["x"], point["y"], point["z"]});
}
return true;
}

View File

@ -10,12 +10,14 @@ using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
ondemand::parser parser{};
bool run(simdjson::padded_string &json, std::vector<point> &result) {
auto doc = parser.iterate(json);
for (ondemand::object coord : doc) {
result.emplace_back(point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
result.emplace_back(json_benchmark::point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
}
return true;
}

View File

@ -10,12 +10,14 @@ using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand_unordered {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
ondemand::parser parser{};
bool run(simdjson::padded_string &json, std::vector<point> &result) {
auto doc = parser.iterate(json);
for (ondemand::object coord : doc) {
result.emplace_back(large_random::point{coord["x"], coord["y"], coord["z"]});
result.emplace_back(json_benchmark::point{coord["x"], coord["y"], coord["z"]});
}
return true;
}

View File

@ -7,6 +7,8 @@
namespace large_random {
struct yyjson_base {
static constexpr diff_flags DiffFlags = diff_flags::NONE;
simdjson_really_inline double get_double(yyjson_val *obj, std::string_view key) {
yyjson_val *val = yyjson_obj_getn(obj, key.data(), key.length());
if (!val) { throw "missing point field!"; }
@ -34,7 +36,7 @@ struct yyjson_base {
yyjson_val *coord;
yyjson_arr_foreach(coords, idx, max, coord) {
if (!yyjson_is_obj(coord)) { return false; }
result.emplace_back(point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
result.emplace_back(json_benchmark::point{get_double(coord, "x"), get_double(coord, "y"), get_double(coord, "z")});
}
return true;

View File

@ -7,16 +7,18 @@
namespace partial_tweets {
using namespace json_benchmark;
template<typename I>
struct runner : public json_benchmark::file_runner<I> {
struct runner : public file_runner<I> {
std::vector<tweet<typename I::StringType>> result{};
bool setup(benchmark::State &state) {
return this->load_json(state, json_benchmark::TWITTER_JSON);
return this->load_json(state, TWITTER_JSON);
}
bool before_run(benchmark::State &state) {
if (!json_benchmark::file_runner<I>::before_run(state)) { return false; }
if (!file_runner<I>::before_run(state)) { return false; }
result.clear();
return true;
}
@ -27,7 +29,7 @@ struct runner : public json_benchmark::file_runner<I> {
template<typename R>
bool diff(benchmark::State &state, runner<R> &reference) {
return json_benchmark::diff_results(state, result, reference.result);
return diff_results(state, result, reference.result, diff_flags::NONE);
}
size_t items_per_iteration() {
@ -38,7 +40,7 @@ struct runner : public json_benchmark::file_runner<I> {
struct simdjson_dom;
template<typename I> simdjson_really_inline static void partial_tweets(benchmark::State &state) {
json_benchmark::run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
run_json_benchmark<runner<I>, runner<simdjson_dom>>(state);
}
} // namespace partial_tweets