2020-02-15 07:20:17 +08:00
|
|
|
#include <benchmark/benchmark.h>
|
2020-03-03 06:23:19 +08:00
|
|
|
#include "simdjson.h"
|
2020-02-15 07:20:17 +08:00
|
|
|
using namespace simdjson;
|
|
|
|
using namespace benchmark;
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
const padded_string EMPTY_ARRAY("[]", 2);
|
2020-04-30 01:53:54 +08:00
|
|
|
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
|
|
|
const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-07-16 23:54:55 +08:00
|
|
|
|
|
|
|
static void fast_minify_twitter(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
std::unique_ptr<char[]> buffer{new char[docdata.size()]};
|
|
|
|
|
|
|
|
size_t bytes = 0;
|
|
|
|
for (simdjson_unused auto _ : state) {
|
|
|
|
size_t new_length{}; // It will receive the minified length.
|
|
|
|
auto error = simdjson::minify(docdata.data(), docdata.size(), buffer.get(), new_length);
|
|
|
|
bytes += docdata.size();
|
|
|
|
benchmark::DoNotOptimize(error);
|
|
|
|
}
|
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(fast_minify_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void fast_minify_gsoc(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
|
|
|
auto error = padded_string::load(GSOC_JSON).get(docdata);
|
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse gsoc-2018.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
std::unique_ptr<char[]> buffer{new char[docdata.size()]};
|
|
|
|
|
|
|
|
size_t bytes = 0;
|
|
|
|
for (simdjson_unused auto _ : state) {
|
|
|
|
size_t new_length{}; // It will receive the minified length.
|
|
|
|
auto error = simdjson::minify(docdata.data(), docdata.size(), buffer.get(), new_length);
|
|
|
|
bytes += docdata.size();
|
|
|
|
benchmark::DoNotOptimize(error);
|
|
|
|
}
|
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(fast_minify_gsoc)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
2020-07-21 06:56:39 +08:00
|
|
|
static void unicode_validate_twitter(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// we do not want mem. alloc. in the loop.
|
|
|
|
error = parser.allocate(docdata.size());
|
|
|
|
if(error) {
|
|
|
|
cout << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
size_t bytes = 0;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-07-21 06:56:39 +08:00
|
|
|
bool is_ok = simdjson::validate_utf8(docdata.data(), docdata.size());
|
|
|
|
bytes += docdata.size();
|
|
|
|
benchmark::DoNotOptimize(is_ok);
|
|
|
|
}
|
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(unicode_validate_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
2020-04-30 01:53:54 +08:00
|
|
|
static void parse_twitter(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
2020-06-21 03:04:23 +08:00
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
2020-04-30 01:53:54 +08:00
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// we do not want mem. alloc. in the loop.
|
|
|
|
error = parser.allocate(docdata.size());
|
|
|
|
if(error) {
|
|
|
|
cout << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
size_t bytes = 0;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-30 01:53:54 +08:00
|
|
|
dom::element doc;
|
|
|
|
bytes += docdata.size();
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.parse(docdata).get(doc))) {
|
2020-04-30 01:53:54 +08:00
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(doc);
|
|
|
|
}
|
2020-05-02 00:16:18 +08:00
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
2020-04-30 01:53:54 +08:00
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
2020-05-02 00:16:18 +08:00
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
2020-04-30 01:53:54 +08:00
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(parse_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
|
|
|
|
|
|
|
static void parse_gsoc(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
2020-06-21 03:04:23 +08:00
|
|
|
auto error = padded_string::load(GSOC_JSON).get(docdata);
|
2020-04-30 01:53:54 +08:00
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse gsoc-2018.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// we do not want mem. alloc. in the loop.
|
|
|
|
error = parser.allocate(docdata.size());
|
|
|
|
if(error) {
|
|
|
|
cout << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
size_t bytes = 0;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-30 01:53:54 +08:00
|
|
|
bytes += docdata.size();
|
2020-06-21 03:04:23 +08:00
|
|
|
dom::element doc;
|
|
|
|
if ((error = parser.parse(docdata).get(doc))) {
|
2020-04-30 01:53:54 +08:00
|
|
|
cerr << "could not parse gsoc-2018.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(doc);
|
|
|
|
}
|
2020-05-02 00:16:18 +08:00
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
2020-04-30 01:53:54 +08:00
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
2020-05-02 00:16:18 +08:00
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
2020-04-30 01:53:54 +08:00
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(parse_gsoc)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
|
|
|
|
2020-02-15 07:20:17 +08:00
|
|
|
|
2020-11-01 13:38:52 +08:00
|
|
|
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_PUSH_DISABLE_WARNINGS
|
|
|
|
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
2020-02-15 07:20:17 +08:00
|
|
|
static void json_parse(State& state) {
|
2020-03-31 02:54:01 +08:00
|
|
|
ParsedJson pj;
|
|
|
|
if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; }
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-03-31 02:54:01 +08:00
|
|
|
auto error = json_parse(EMPTY_ARRAY, pj);
|
2020-02-15 07:20:17 +08:00
|
|
|
if (error) { return; }
|
|
|
|
}
|
|
|
|
}
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_POP_DISABLE_WARNINGS
|
2020-02-15 07:20:17 +08:00
|
|
|
BENCHMARK(json_parse);
|
2020-11-01 13:38:52 +08:00
|
|
|
#endif // SIMDJSON_DISABLE_DEPRECATED_API
|
|
|
|
|
2020-02-15 07:20:17 +08:00
|
|
|
static void parser_parse_error_code(State& state) {
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-03-31 02:54:01 +08:00
|
|
|
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 08:26:26 +08:00
|
|
|
auto error = parser.parse(EMPTY_ARRAY).error();
|
2020-02-15 07:20:17 +08:00
|
|
|
if (error) { return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(parser_parse_error_code);
|
2020-04-30 03:59:52 +08:00
|
|
|
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
|
2020-02-15 07:20:17 +08:00
|
|
|
static void parser_parse_exception(State& state) {
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-03-31 02:54:01 +08:00
|
|
|
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-15 07:20:17 +08:00
|
|
|
try {
|
2020-10-13 21:18:54 +08:00
|
|
|
simdjson_unused dom::element doc = parser.parse(EMPTY_ARRAY);
|
2020-03-07 04:36:44 +08:00
|
|
|
} catch(simdjson_error &j) {
|
2020-04-30 01:53:54 +08:00
|
|
|
cout << j.what() << endl;
|
2020-02-15 07:20:17 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(parser_parse_exception);
|
|
|
|
|
2020-04-30 03:59:52 +08:00
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
|
2020-11-01 13:38:52 +08:00
|
|
|
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_PUSH_DISABLE_WARNINGS
|
|
|
|
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
2020-02-15 07:20:17 +08:00
|
|
|
static void build_parsed_json(State& state) {
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY);
|
2020-02-15 07:20:17 +08:00
|
|
|
if (!parser.valid) { return; }
|
|
|
|
}
|
|
|
|
}
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_POP_DISABLE_WARNINGS
|
2020-04-30 03:59:52 +08:00
|
|
|
|
2020-02-15 07:20:17 +08:00
|
|
|
BENCHMARK(build_parsed_json);
|
2020-11-01 13:38:52 +08:00
|
|
|
#endif
|
|
|
|
|
2020-02-15 07:20:17 +08:00
|
|
|
static void document_parse_error_code(State& state) {
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-04-15 08:26:26 +08:00
|
|
|
auto error = parser.parse(EMPTY_ARRAY).error();
|
2020-02-15 07:20:17 +08:00
|
|
|
if (error) { return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(document_parse_error_code);
|
2020-04-30 03:59:52 +08:00
|
|
|
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
|
2020-02-15 07:20:17 +08:00
|
|
|
static void document_parse_exception(State& state) {
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-15 07:20:17 +08:00
|
|
|
try {
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-10-13 21:18:54 +08:00
|
|
|
simdjson_unused dom::element doc = parser.parse(EMPTY_ARRAY);
|
2020-03-07 04:36:44 +08:00
|
|
|
} catch(simdjson_error &j) {
|
2020-04-30 01:53:54 +08:00
|
|
|
cout << j.what() << endl;
|
2020-02-15 07:20:17 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(document_parse_exception);
|
|
|
|
|
2020-04-30 03:59:52 +08:00
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
|
2020-05-02 00:16:18 +08:00
|
|
|
BENCHMARK_MAIN();
|