Adding independent benchmarks using Google Benchmark (#826)

* Adding independent benchmarks using Google Benchmark
This commit is contained in:
Daniel Lemire 2020-04-29 10:53:54 -07:00 committed by GitHub
parent 4cd9de5c37
commit f0d5337818
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 122 additions and 30 deletions

View File

@ -43,8 +43,21 @@ Other important files and directories:
cardinal rule is don't regress performance without knowing exactly why, and what you're trading cardinal rule is don't regress performance without knowing exactly why, and what you're trading
for it. If you're not sure what else to do to check your performance, this is always a good start: for it. If you're not sure what else to do to check your performance, this is always a good start:
```bash ```bash
make parse && ./parse jsonexamples/twitter.json mkdir build
cd build
cmake ..
cmake --build . --config=Release
benchmark/parse ../jsonexamples/twitter.json
``` ```
The last line becomes `./benchmark/Release/parse.exe ../jsonexample/twitter.json` under Windows. You may also use Google Benchmark:
```bash
mkdir build
cd build
cmake .. -DSIMDJSON_GOOGLE_BENCHMARKS=ON
cmake --build . --target bench_parse_call --config=Release
./benchmark/bench_parse_call
```
The last line becomes `./benchmark/Release/bench_parse_call.exe` under Windows. Under Windows, you can also build with the clang compiler by adding `-T ClangCL` to the call to `cmake .. `.
* **fuzz:** The source for fuzz testing. This lets us explore important edge and middle cases * **fuzz:** The source for fuzz testing. This lets us explore important edge and middle cases
automatically, and is run in CI. automatically, and is run in CI.
* **jsonchecker:** A set of JSON files used to check different functionality of the parser. * **jsonchecker:** A set of JSON files used to check different functionality of the parser.
@ -55,7 +68,6 @@ Other important files and directories:
* **singleheader:** Contains generated simdjson.h and simdjson.cpp that we release. * **singleheader:** Contains generated simdjson.h and simdjson.cpp that we release.
* **test:** The tests are here. basictests.cpp and errortests.cpp are the primary ones. * **test:** The tests are here. basictests.cpp and errortests.cpp are the primary ones.
* **tools:** Source for executables that can be distributed with simdjson * **tools:** Source for executables that can be distributed with simdjson
> **Don't modify the files in singleheader/ directly; these are automatically generated.** > **Don't modify the files in singleheader/ directly; these are automatically generated.**
> >
> While we distribute those files on release, we *maintain* the files under include/ and src/. > While we distribute those files on release, we *maintain* the files under include/ and src/.

View File

@ -19,6 +19,7 @@ if (SIMDJSON_GOOGLE_BENCHMARKS)
add_executable(bench_parse_call bench_parse_call.cpp) add_executable(bench_parse_call bench_parse_call.cpp)
add_executable(bench_dom_api bench_dom_api.cpp) add_executable(bench_dom_api bench_dom_api.cpp)
target_link_libraries(bench_dom_api test-data) target_link_libraries(bench_dom_api test-data)
target_link_libraries(bench_parse_call test-data)
endif() endif()
if (SIMDJSON_COMPETITION) if (SIMDJSON_COMPETITION)

View File

@ -23,7 +23,7 @@ static void numbers_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl; cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return; return;
} }
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (auto e : arr) { for (auto e : arr) {
double x; double x;
@ -47,7 +47,7 @@ static void numbers_size_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl; cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return; return;
} }
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -75,7 +75,7 @@ static void numbers_type_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl; cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return; return;
} }
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (auto e : arr) { for (auto e : arr) {
dom::element_type actual_type = e.type(); dom::element_type actual_type = e.type();
@ -102,7 +102,7 @@ static void numbers_type_size_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl; cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return; return;
} }
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -127,7 +127,7 @@ static void numbers_load_scan(State& state) {
dom::parser parser; dom::parser parser;
dom::array arr; dom::array arr;
simdjson::error_code error; simdjson::error_code error;
for (auto _ : state) { for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error); parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
if(error) { if(error) {
@ -152,7 +152,7 @@ static void numbers_load_size_scan(State& state) {
dom::parser parser; dom::parser parser;
dom::array arr; dom::array arr;
simdjson::error_code error; simdjson::error_code error;
for (auto _ : state) { for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error); parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
if(error) { if(error) {
@ -183,7 +183,7 @@ static void numbers_exceptions_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (double x : arr) { for (double x : arr) {
container.push_back(x); container.push_back(x);
@ -198,7 +198,7 @@ static void numbers_exceptions_size_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -218,7 +218,7 @@ static void numbers_type_exceptions_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
for (auto e : arr) { for (auto e : arr) {
dom::element_type actual_type = e.type(); dom::element_type actual_type = e.type();
@ -237,7 +237,7 @@ static void numbers_type_exceptions_size_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
@ -258,7 +258,7 @@ BENCHMARK(numbers_type_exceptions_size_scan);
static void numbers_exceptions_load_scan(State& state) { static void numbers_exceptions_load_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
for (auto _ : state) { for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
std::vector<double> container; std::vector<double> container;
@ -274,7 +274,7 @@ BENCHMARK(numbers_exceptions_load_scan);
static void numbers_exceptions_load_size_scan(State& state) { static void numbers_exceptions_load_size_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
for (auto _ : state) { for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once // this may hit the disk, but probably just once
dom::array arr = parser.load(NUMBERS_JSON); dom::array arr = parser.load(NUMBERS_JSON);
std::vector<double> container; std::vector<double> container;
@ -295,7 +295,7 @@ static void twitter_count(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
uint64_t result_count = doc["search_metadata"]["count"]; uint64_t result_count = doc["search_metadata"]["count"];
if (result_count != 100) { return; } if (result_count != 100) { return; }
} }
@ -308,7 +308,7 @@ static void iterator_twitter_count(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
padded_string json = padded_string::load(TWITTER_JSON); padded_string json = padded_string::load(TWITTER_JSON);
ParsedJson pj = build_parsed_json(json); ParsedJson pj = build_parsed_json(json);
for (auto _ : state) { for (UNUSED auto _ : state) {
ParsedJson::Iterator iter(pj); ParsedJson::Iterator iter(pj);
// uint64_t result_count = doc["search_metadata"]["count"]; // uint64_t result_count = doc["search_metadata"]["count"];
if (!iter.move_to_key("search_metadata")) { return; } if (!iter.move_to_key("search_metadata")) { return; }
@ -326,7 +326,7 @@ static void twitter_default_profile(State& state) {
// Count unique users with a default profile. // Count unique users with a default profile.
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
set<string_view> default_users; set<string_view> default_users;
for (dom::object tweet : doc["statuses"].get<dom::array>()) { for (dom::object tweet : doc["statuses"].get<dom::array>()) {
dom::object user = tweet["user"]; dom::object user = tweet["user"];
@ -343,7 +343,7 @@ static void twitter_image_sizes(State& state) {
// Count unique image sizes // Count unique image sizes
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes; set<tuple<uint64_t, uint64_t>> image_sizes;
for (dom::object tweet : doc["statuses"].get<dom::array>()) { for (dom::object tweet : doc["statuses"].get<dom::array>()) {
auto [media, not_found] = tweet["entities"]["media"]; auto [media, not_found] = tweet["entities"]["media"];
@ -366,7 +366,7 @@ static void error_code_twitter_count(State& state) noexcept {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
auto [value, error] = doc["search_metadata"]["count"].get<uint64_t>(); auto [value, error] = doc["search_metadata"]["count"].get<uint64_t>();
if (error) { return; } if (error) { return; }
if (value != 100) { return; } if (value != 100) { return; }
@ -378,7 +378,7 @@ static void error_code_twitter_default_profile(State& state) noexcept {
// Count unique users with a default profile. // Count unique users with a default profile.
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
set<string_view> default_users; set<string_view> default_users;
auto [tweets, error] = doc["statuses"].get<dom::array>(); auto [tweets, error] = doc["statuses"].get<dom::array>();
@ -406,7 +406,7 @@ static void iterator_twitter_default_profile(State& state) {
// Count unique users with a default profile. // Count unique users with a default profile.
padded_string json = padded_string::load(TWITTER_JSON); padded_string json = padded_string::load(TWITTER_JSON);
ParsedJson pj = build_parsed_json(json); ParsedJson pj = build_parsed_json(json);
for (auto _ : state) { for (UNUSED auto _ : state) {
set<string_view> default_users; set<string_view> default_users;
ParsedJson::Iterator iter(pj); ParsedJson::Iterator iter(pj);
@ -445,7 +445,7 @@ static void error_code_twitter_image_sizes(State& state) noexcept {
// Count unique image sizes // Count unique image sizes
dom::parser parser; dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON); dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) { for (UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes; set<tuple<uint64_t, uint64_t>> image_sizes;
auto [statuses, error] = doc["statuses"].get<dom::array>(); auto [statuses, error] = doc["statuses"].get<dom::array>();
if (error) { return; } if (error) { return; }
@ -475,7 +475,7 @@ static void iterator_twitter_image_sizes(State& state) {
// Count unique image sizes // Count unique image sizes
padded_string json = padded_string::load(TWITTER_JSON); padded_string json = padded_string::load(TWITTER_JSON);
ParsedJson pj = build_parsed_json(json); ParsedJson pj = build_parsed_json(json);
for (auto _ : state) { for (UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes; set<tuple<uint64_t, uint64_t>> image_sizes;
ParsedJson::Iterator iter(pj); ParsedJson::Iterator iter(pj);
@ -534,7 +534,7 @@ static void print_json(State& state) noexcept {
padded_string json = get_corpus(TWITTER_JSON); padded_string json = get_corpus(TWITTER_JSON);
dom::parser parser; dom::parser parser;
if (int error = json_parse(json, parser); error != SUCCESS) { cerr << error_message(error) << endl; return; } if (int error = json_parse(json, parser); error != SUCCESS) { cerr << error_message(error) << endl; return; }
for (auto _ : state) { for (UNUSED auto _ : state) {
std::stringstream s; std::stringstream s;
if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; } if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; }
} }

View File

@ -5,13 +5,90 @@ using namespace benchmark;
using namespace std; using namespace std;
const padded_string EMPTY_ARRAY("[]", 2); const padded_string EMPTY_ARRAY("[]", 2);
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
static void parse_twitter(State& state) {
dom::parser parser;
padded_string docdata;
simdjson::error_code error;
padded_string::load(TWITTER_JSON).tie(docdata, error);
if(error) {
cerr << "could not parse twitter.json" << error << endl;
return;
}
// we do not want mem. alloc. in the loop.
error = parser.allocate(docdata.size());
if(error) {
cout << error << endl;
return;
}
size_t bytes = 0;
for (UNUSED auto _ : state) {
dom::element doc;
bytes += docdata.size();
parser.parse(docdata).tie(doc,error);
if(error) {
cerr << "could not parse twitter.json" << error << endl;
return;
}
benchmark::DoNotOptimize(doc);
}
state.counters["Bytes"] = benchmark::Counter(
double(bytes), benchmark::Counter::kIsRate,
benchmark::Counter::OneK::kIs1024);
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
}
BENCHMARK(parse_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
return *(std::max_element(std::begin(v), std::end(v)));
})->DisplayAggregatesOnly(true);
static void parse_gsoc(State& state) {
dom::parser parser;
padded_string docdata;
simdjson::error_code error;
padded_string::load(GSOC_JSON).tie(docdata, error);
if(error) {
cerr << "could not parse gsoc-2018.json" << error << endl;
return;
}
// we do not want mem. alloc. in the loop.
error = parser.allocate(docdata.size());
if(error) {
cout << error << endl;
return;
}
size_t bytes = 0;
for (UNUSED auto _ : state) {
dom::element doc;
bytes += docdata.size();
parser.parse(docdata).tie(doc,error);
if(error) {
cerr << "could not parse gsoc-2018.json" << error << endl;
return;
}
benchmark::DoNotOptimize(doc);
}
state.counters["Bytes"] = benchmark::Counter(
double(bytes), benchmark::Counter::kIsRate,
benchmark::Counter::OneK::kIs1024);
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
}
BENCHMARK(parse_gsoc)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
return *(std::max_element(std::begin(v), std::end(v)));
})->DisplayAggregatesOnly(true);
SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_DEPRECATED_WARNING
static void json_parse(State& state) { static void json_parse(State& state) {
ParsedJson pj; ParsedJson pj;
if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; } if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; }
for (auto _ : state) { for (UNUSED auto _ : state) {
auto error = json_parse(EMPTY_ARRAY, pj); auto error = json_parse(EMPTY_ARRAY, pj);
if (error) { return; } if (error) { return; }
} }
@ -21,7 +98,7 @@ BENCHMARK(json_parse);
static void parser_parse_error_code(State& state) { static void parser_parse_error_code(State& state) {
dom::parser parser; dom::parser parser;
if (parser.allocate(EMPTY_ARRAY.length())) { return; } if (parser.allocate(EMPTY_ARRAY.length())) { return; }
for (auto _ : state) { for (UNUSED auto _ : state) {
auto error = parser.parse(EMPTY_ARRAY).error(); auto error = parser.parse(EMPTY_ARRAY).error();
if (error) { return; } if (error) { return; }
} }
@ -30,10 +107,11 @@ BENCHMARK(parser_parse_error_code);
static void parser_parse_exception(State& state) { static void parser_parse_exception(State& state) {
dom::parser parser; dom::parser parser;
if (parser.allocate(EMPTY_ARRAY.length())) { return; } if (parser.allocate(EMPTY_ARRAY.length())) { return; }
for (auto _ : state) { for (UNUSED auto _ : state) {
try { try {
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY); UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
} catch(simdjson_error &j) { } catch(simdjson_error &j) {
cout << j.what() << endl;
return; return;
} }
} }
@ -43,7 +121,7 @@ BENCHMARK(parser_parse_exception);
SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_DEPRECATED_WARNING
static void build_parsed_json(State& state) { static void build_parsed_json(State& state) {
for (auto _ : state) { for (UNUSED auto _ : state) {
dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY); dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY);
if (!parser.valid) { return; } if (!parser.valid) { return; }
} }
@ -51,7 +129,7 @@ static void build_parsed_json(State& state) {
SIMDJSON_POP_DISABLE_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS
BENCHMARK(build_parsed_json); BENCHMARK(build_parsed_json);
static void document_parse_error_code(State& state) { static void document_parse_error_code(State& state) {
for (auto _ : state) { for (UNUSED auto _ : state) {
dom::parser parser; dom::parser parser;
auto error = parser.parse(EMPTY_ARRAY).error(); auto error = parser.parse(EMPTY_ARRAY).error();
if (error) { return; } if (error) { return; }
@ -59,11 +137,12 @@ static void document_parse_error_code(State& state) {
} }
BENCHMARK(document_parse_error_code); BENCHMARK(document_parse_error_code);
static void document_parse_exception(State& state) { static void document_parse_exception(State& state) {
for (auto _ : state) { for (UNUSED auto _ : state) {
try { try {
dom::parser parser; dom::parser parser;
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY); UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
} catch(simdjson_error &j) { } catch(simdjson_error &j) {
cout << j.what() << endl;
return; return;
} }
} }