Adding independent benchmarks using Google Benchmark (#826)

* Adding independent benchmarks using Google Benchmark
This commit is contained in:
Daniel Lemire 2020-04-29 10:53:54 -07:00 committed by GitHub
parent 4cd9de5c37
commit f0d5337818
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 122 additions and 30 deletions

View File

@ -43,8 +43,21 @@ Other important files and directories:
cardinal rule is don't regress performance without knowing exactly why, and what you're trading
for it. If you're not sure what else to do to check your performance, this is always a good start:
```bash
make parse && ./parse jsonexamples/twitter.json
mkdir build
cd build
cmake ..
cmake --build . --config=Release
benchmark/parse ../jsonexamples/twitter.json
```
The last line becomes `./benchmark/Release/parse.exe ../jsonexample/twitter.json` under Windows. You may also use Google Benchmark:
```bash
mkdir build
cd build
cmake .. -DSIMDJSON_GOOGLE_BENCHMARKS=ON
cmake --build . --target bench_parse_call --config=Release
./benchmark/bench_parse_call
```
The last line becomes `./benchmark/Release/bench_parse_call.exe` under Windows. Under Windows, you can also build with the clang compiler by adding `-T ClangCL` to the call to `cmake .. `.
* **fuzz:** The source for fuzz testing. This lets us explore important edge and middle cases
automatically, and is run in CI.
* **jsonchecker:** A set of JSON files used to check different functionality of the parser.
@ -55,7 +68,6 @@ Other important files and directories:
* **singleheader:** Contains generated simdjson.h and simdjson.cpp that we release.
* **test:** The tests are here. basictests.cpp and errortests.cpp are the primary ones.
* **tools:** Source for executables that can be distributed with simdjson
> **Don't modify the files in singleheader/ directly; these are automatically generated.**
>
> While we distribute those files on release, we *maintain* the files under include/ and src/.

View File

@ -19,6 +19,7 @@ if (SIMDJSON_GOOGLE_BENCHMARKS)
add_executable(bench_parse_call bench_parse_call.cpp)
add_executable(bench_dom_api bench_dom_api.cpp)
target_link_libraries(bench_dom_api test-data)
target_link_libraries(bench_parse_call test-data)
endif()
if (SIMDJSON_COMPETITION)

View File

@ -23,7 +23,7 @@ static void numbers_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return;
}
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
for (auto e : arr) {
double x;
@ -47,7 +47,7 @@ static void numbers_size_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return;
}
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
container.resize(arr.size());
size_t pos = 0;
@ -75,7 +75,7 @@ static void numbers_type_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return;
}
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
for (auto e : arr) {
dom::element_type actual_type = e.type();
@ -102,7 +102,7 @@ static void numbers_type_size_scan(State& state) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return;
}
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
container.resize(arr.size());
size_t pos = 0;
@ -127,7 +127,7 @@ static void numbers_load_scan(State& state) {
dom::parser parser;
dom::array arr;
simdjson::error_code error;
for (auto _ : state) {
for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
if(error) {
@ -152,7 +152,7 @@ static void numbers_load_size_scan(State& state) {
dom::parser parser;
dom::array arr;
simdjson::error_code error;
for (auto _ : state) {
for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
if(error) {
@ -183,7 +183,7 @@ static void numbers_exceptions_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
for (double x : arr) {
container.push_back(x);
@ -198,7 +198,7 @@ static void numbers_exceptions_size_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
container.resize(arr.size());
size_t pos = 0;
@ -218,7 +218,7 @@ static void numbers_type_exceptions_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
for (auto e : arr) {
dom::element_type actual_type = e.type();
@ -237,7 +237,7 @@ static void numbers_type_exceptions_size_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::vector<double> container;
container.resize(arr.size());
size_t pos = 0;
@ -258,7 +258,7 @@ BENCHMARK(numbers_type_exceptions_size_scan);
static void numbers_exceptions_load_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
for (auto _ : state) {
for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once
dom::array arr = parser.load(NUMBERS_JSON);
std::vector<double> container;
@ -274,7 +274,7 @@ BENCHMARK(numbers_exceptions_load_scan);
static void numbers_exceptions_load_size_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
for (auto _ : state) {
for (UNUSED auto _ : state) {
// this may hit the disk, but probably just once
dom::array arr = parser.load(NUMBERS_JSON);
std::vector<double> container;
@ -295,7 +295,7 @@ static void twitter_count(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
uint64_t result_count = doc["search_metadata"]["count"];
if (result_count != 100) { return; }
}
@ -308,7 +308,7 @@ static void iterator_twitter_count(State& state) {
// Prints the number of results in twitter.json
padded_string json = padded_string::load(TWITTER_JSON);
ParsedJson pj = build_parsed_json(json);
for (auto _ : state) {
for (UNUSED auto _ : state) {
ParsedJson::Iterator iter(pj);
// uint64_t result_count = doc["search_metadata"]["count"];
if (!iter.move_to_key("search_metadata")) { return; }
@ -326,7 +326,7 @@ static void twitter_default_profile(State& state) {
// Count unique users with a default profile.
dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
set<string_view> default_users;
for (dom::object tweet : doc["statuses"].get<dom::array>()) {
dom::object user = tweet["user"];
@ -343,7 +343,7 @@ static void twitter_image_sizes(State& state) {
// Count unique image sizes
dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes;
for (dom::object tweet : doc["statuses"].get<dom::array>()) {
auto [media, not_found] = tweet["entities"]["media"];
@ -366,7 +366,7 @@ static void error_code_twitter_count(State& state) noexcept {
// Prints the number of results in twitter.json
dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
auto [value, error] = doc["search_metadata"]["count"].get<uint64_t>();
if (error) { return; }
if (value != 100) { return; }
@ -378,7 +378,7 @@ static void error_code_twitter_default_profile(State& state) noexcept {
// Count unique users with a default profile.
dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
set<string_view> default_users;
auto [tweets, error] = doc["statuses"].get<dom::array>();
@ -406,7 +406,7 @@ static void iterator_twitter_default_profile(State& state) {
// Count unique users with a default profile.
padded_string json = padded_string::load(TWITTER_JSON);
ParsedJson pj = build_parsed_json(json);
for (auto _ : state) {
for (UNUSED auto _ : state) {
set<string_view> default_users;
ParsedJson::Iterator iter(pj);
@ -445,7 +445,7 @@ static void error_code_twitter_image_sizes(State& state) noexcept {
// Count unique image sizes
dom::parser parser;
dom::element doc = parser.load(TWITTER_JSON);
for (auto _ : state) {
for (UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes;
auto [statuses, error] = doc["statuses"].get<dom::array>();
if (error) { return; }
@ -475,7 +475,7 @@ static void iterator_twitter_image_sizes(State& state) {
// Count unique image sizes
padded_string json = padded_string::load(TWITTER_JSON);
ParsedJson pj = build_parsed_json(json);
for (auto _ : state) {
for (UNUSED auto _ : state) {
set<tuple<uint64_t, uint64_t>> image_sizes;
ParsedJson::Iterator iter(pj);
@ -534,7 +534,7 @@ static void print_json(State& state) noexcept {
padded_string json = get_corpus(TWITTER_JSON);
dom::parser parser;
if (int error = json_parse(json, parser); error != SUCCESS) { cerr << error_message(error) << endl; return; }
for (auto _ : state) {
for (UNUSED auto _ : state) {
std::stringstream s;
if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; }
}

View File

@ -5,13 +5,90 @@ using namespace benchmark;
using namespace std;
const padded_string EMPTY_ARRAY("[]", 2);
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
static void parse_twitter(State& state) {
dom::parser parser;
padded_string docdata;
simdjson::error_code error;
padded_string::load(TWITTER_JSON).tie(docdata, error);
if(error) {
cerr << "could not parse twitter.json" << error << endl;
return;
}
// we do not want mem. alloc. in the loop.
error = parser.allocate(docdata.size());
if(error) {
cout << error << endl;
return;
}
size_t bytes = 0;
for (UNUSED auto _ : state) {
dom::element doc;
bytes += docdata.size();
parser.parse(docdata).tie(doc,error);
if(error) {
cerr << "could not parse twitter.json" << error << endl;
return;
}
benchmark::DoNotOptimize(doc);
}
state.counters["Bytes"] = benchmark::Counter(
double(bytes), benchmark::Counter::kIsRate,
benchmark::Counter::OneK::kIs1024);
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
}
BENCHMARK(parse_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
return *(std::max_element(std::begin(v), std::end(v)));
})->DisplayAggregatesOnly(true);
static void parse_gsoc(State& state) {
dom::parser parser;
padded_string docdata;
simdjson::error_code error;
padded_string::load(GSOC_JSON).tie(docdata, error);
if(error) {
cerr << "could not parse gsoc-2018.json" << error << endl;
return;
}
// we do not want mem. alloc. in the loop.
error = parser.allocate(docdata.size());
if(error) {
cout << error << endl;
return;
}
size_t bytes = 0;
for (UNUSED auto _ : state) {
dom::element doc;
bytes += docdata.size();
parser.parse(docdata).tie(doc,error);
if(error) {
cerr << "could not parse gsoc-2018.json" << error << endl;
return;
}
benchmark::DoNotOptimize(doc);
}
state.counters["Bytes"] = benchmark::Counter(
double(bytes), benchmark::Counter::kIsRate,
benchmark::Counter::OneK::kIs1024);
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
}
BENCHMARK(parse_gsoc)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
return *(std::max_element(std::begin(v), std::end(v)));
})->DisplayAggregatesOnly(true);
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING
static void json_parse(State& state) {
ParsedJson pj;
if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; }
for (auto _ : state) {
for (UNUSED auto _ : state) {
auto error = json_parse(EMPTY_ARRAY, pj);
if (error) { return; }
}
@ -21,7 +98,7 @@ BENCHMARK(json_parse);
static void parser_parse_error_code(State& state) {
dom::parser parser;
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
for (auto _ : state) {
for (UNUSED auto _ : state) {
auto error = parser.parse(EMPTY_ARRAY).error();
if (error) { return; }
}
@ -30,10 +107,11 @@ BENCHMARK(parser_parse_error_code);
static void parser_parse_exception(State& state) {
dom::parser parser;
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
for (auto _ : state) {
for (UNUSED auto _ : state) {
try {
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
} catch(simdjson_error &j) {
cout << j.what() << endl;
return;
}
}
@ -43,7 +121,7 @@ BENCHMARK(parser_parse_exception);
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING
static void build_parsed_json(State& state) {
for (auto _ : state) {
for (UNUSED auto _ : state) {
dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY);
if (!parser.valid) { return; }
}
@ -51,7 +129,7 @@ static void build_parsed_json(State& state) {
SIMDJSON_POP_DISABLE_WARNINGS
BENCHMARK(build_parsed_json);
static void document_parse_error_code(State& state) {
for (auto _ : state) {
for (UNUSED auto _ : state) {
dom::parser parser;
auto error = parser.parse(EMPTY_ARRAY).error();
if (error) { return; }
@ -59,11 +137,12 @@ static void document_parse_error_code(State& state) {
}
BENCHMARK(document_parse_error_code);
static void document_parse_exception(State& state) {
for (auto _ : state) {
for (UNUSED auto _ : state) {
try {
dom::parser parser;
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
} catch(simdjson_error &j) {
cout << j.what() << endl;
return;
}
}