Adding independent benchmarks using Google Benchmark (#826)
* Adding independent benchmarks using Google Benchmark
This commit is contained in:
parent
4cd9de5c37
commit
f0d5337818
16
HACKING.md
16
HACKING.md
|
@ -43,8 +43,21 @@ Other important files and directories:
|
|||
cardinal rule is don't regress performance without knowing exactly why, and what you're trading
|
||||
for it. If you're not sure what else to do to check your performance, this is always a good start:
|
||||
```bash
|
||||
make parse && ./parse jsonexamples/twitter.json
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cmake --build . --config=Release
|
||||
benchmark/parse ../jsonexamples/twitter.json
|
||||
```
|
||||
The last line becomes `./benchmark/Release/parse.exe ../jsonexample/twitter.json` under Windows. You may also use Google Benchmark:
|
||||
```bash
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DSIMDJSON_GOOGLE_BENCHMARKS=ON
|
||||
cmake --build . --target bench_parse_call --config=Release
|
||||
./benchmark/bench_parse_call
|
||||
```
|
||||
The last line becomes `./benchmark/Release/bench_parse_call.exe` under Windows. Under Windows, you can also build with the clang compiler by adding `-T ClangCL` to the call to `cmake .. `.
|
||||
* **fuzz:** The source for fuzz testing. This lets us explore important edge and middle cases
|
||||
automatically, and is run in CI.
|
||||
* **jsonchecker:** A set of JSON files used to check different functionality of the parser.
|
||||
|
@ -55,7 +68,6 @@ Other important files and directories:
|
|||
* **singleheader:** Contains generated simdjson.h and simdjson.cpp that we release.
|
||||
* **test:** The tests are here. basictests.cpp and errortests.cpp are the primary ones.
|
||||
* **tools:** Source for executables that can be distributed with simdjson
|
||||
|
||||
> **Don't modify the files in singleheader/ directly; these are automatically generated.**
|
||||
>
|
||||
> While we distribute those files on release, we *maintain* the files under include/ and src/.
|
||||
|
|
|
@ -19,6 +19,7 @@ if (SIMDJSON_GOOGLE_BENCHMARKS)
|
|||
add_executable(bench_parse_call bench_parse_call.cpp)
|
||||
add_executable(bench_dom_api bench_dom_api.cpp)
|
||||
target_link_libraries(bench_dom_api test-data)
|
||||
target_link_libraries(bench_parse_call test-data)
|
||||
endif()
|
||||
|
||||
if (SIMDJSON_COMPETITION)
|
||||
|
|
|
@ -23,7 +23,7 @@ static void numbers_scan(State& state) {
|
|||
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
|
||||
return;
|
||||
}
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
for (auto e : arr) {
|
||||
double x;
|
||||
|
@ -47,7 +47,7 @@ static void numbers_size_scan(State& state) {
|
|||
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
|
||||
return;
|
||||
}
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
container.resize(arr.size());
|
||||
size_t pos = 0;
|
||||
|
@ -75,7 +75,7 @@ static void numbers_type_scan(State& state) {
|
|||
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
|
||||
return;
|
||||
}
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
for (auto e : arr) {
|
||||
dom::element_type actual_type = e.type();
|
||||
|
@ -102,7 +102,7 @@ static void numbers_type_size_scan(State& state) {
|
|||
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
|
||||
return;
|
||||
}
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
container.resize(arr.size());
|
||||
size_t pos = 0;
|
||||
|
@ -127,7 +127,7 @@ static void numbers_load_scan(State& state) {
|
|||
dom::parser parser;
|
||||
dom::array arr;
|
||||
simdjson::error_code error;
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
// this may hit the disk, but probably just once
|
||||
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
|
||||
if(error) {
|
||||
|
@ -152,7 +152,7 @@ static void numbers_load_size_scan(State& state) {
|
|||
dom::parser parser;
|
||||
dom::array arr;
|
||||
simdjson::error_code error;
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
// this may hit the disk, but probably just once
|
||||
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
|
||||
if(error) {
|
||||
|
@ -183,7 +183,7 @@ static void numbers_exceptions_scan(State& state) {
|
|||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
dom::array arr = parser.load(NUMBERS_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
for (double x : arr) {
|
||||
container.push_back(x);
|
||||
|
@ -198,7 +198,7 @@ static void numbers_exceptions_size_scan(State& state) {
|
|||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
dom::array arr = parser.load(NUMBERS_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
container.resize(arr.size());
|
||||
size_t pos = 0;
|
||||
|
@ -218,7 +218,7 @@ static void numbers_type_exceptions_scan(State& state) {
|
|||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
dom::array arr = parser.load(NUMBERS_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
for (auto e : arr) {
|
||||
dom::element_type actual_type = e.type();
|
||||
|
@ -237,7 +237,7 @@ static void numbers_type_exceptions_size_scan(State& state) {
|
|||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
dom::array arr = parser.load(NUMBERS_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::vector<double> container;
|
||||
container.resize(arr.size());
|
||||
size_t pos = 0;
|
||||
|
@ -258,7 +258,7 @@ BENCHMARK(numbers_type_exceptions_size_scan);
|
|||
static void numbers_exceptions_load_scan(State& state) {
|
||||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
// this may hit the disk, but probably just once
|
||||
dom::array arr = parser.load(NUMBERS_JSON);
|
||||
std::vector<double> container;
|
||||
|
@ -274,7 +274,7 @@ BENCHMARK(numbers_exceptions_load_scan);
|
|||
static void numbers_exceptions_load_size_scan(State& state) {
|
||||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
// this may hit the disk, but probably just once
|
||||
dom::array arr = parser.load(NUMBERS_JSON);
|
||||
std::vector<double> container;
|
||||
|
@ -295,7 +295,7 @@ static void twitter_count(State& state) {
|
|||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
dom::element doc = parser.load(TWITTER_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
uint64_t result_count = doc["search_metadata"]["count"];
|
||||
if (result_count != 100) { return; }
|
||||
}
|
||||
|
@ -308,7 +308,7 @@ static void iterator_twitter_count(State& state) {
|
|||
// Prints the number of results in twitter.json
|
||||
padded_string json = padded_string::load(TWITTER_JSON);
|
||||
ParsedJson pj = build_parsed_json(json);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
ParsedJson::Iterator iter(pj);
|
||||
// uint64_t result_count = doc["search_metadata"]["count"];
|
||||
if (!iter.move_to_key("search_metadata")) { return; }
|
||||
|
@ -326,7 +326,7 @@ static void twitter_default_profile(State& state) {
|
|||
// Count unique users with a default profile.
|
||||
dom::parser parser;
|
||||
dom::element doc = parser.load(TWITTER_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
set<string_view> default_users;
|
||||
for (dom::object tweet : doc["statuses"].get<dom::array>()) {
|
||||
dom::object user = tweet["user"];
|
||||
|
@ -343,7 +343,7 @@ static void twitter_image_sizes(State& state) {
|
|||
// Count unique image sizes
|
||||
dom::parser parser;
|
||||
dom::element doc = parser.load(TWITTER_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
set<tuple<uint64_t, uint64_t>> image_sizes;
|
||||
for (dom::object tweet : doc["statuses"].get<dom::array>()) {
|
||||
auto [media, not_found] = tweet["entities"]["media"];
|
||||
|
@ -366,7 +366,7 @@ static void error_code_twitter_count(State& state) noexcept {
|
|||
// Prints the number of results in twitter.json
|
||||
dom::parser parser;
|
||||
dom::element doc = parser.load(TWITTER_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
auto [value, error] = doc["search_metadata"]["count"].get<uint64_t>();
|
||||
if (error) { return; }
|
||||
if (value != 100) { return; }
|
||||
|
@ -378,7 +378,7 @@ static void error_code_twitter_default_profile(State& state) noexcept {
|
|||
// Count unique users with a default profile.
|
||||
dom::parser parser;
|
||||
dom::element doc = parser.load(TWITTER_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
set<string_view> default_users;
|
||||
|
||||
auto [tweets, error] = doc["statuses"].get<dom::array>();
|
||||
|
@ -406,7 +406,7 @@ static void iterator_twitter_default_profile(State& state) {
|
|||
// Count unique users with a default profile.
|
||||
padded_string json = padded_string::load(TWITTER_JSON);
|
||||
ParsedJson pj = build_parsed_json(json);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
set<string_view> default_users;
|
||||
ParsedJson::Iterator iter(pj);
|
||||
|
||||
|
@ -445,7 +445,7 @@ static void error_code_twitter_image_sizes(State& state) noexcept {
|
|||
// Count unique image sizes
|
||||
dom::parser parser;
|
||||
dom::element doc = parser.load(TWITTER_JSON);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
set<tuple<uint64_t, uint64_t>> image_sizes;
|
||||
auto [statuses, error] = doc["statuses"].get<dom::array>();
|
||||
if (error) { return; }
|
||||
|
@ -475,7 +475,7 @@ static void iterator_twitter_image_sizes(State& state) {
|
|||
// Count unique image sizes
|
||||
padded_string json = padded_string::load(TWITTER_JSON);
|
||||
ParsedJson pj = build_parsed_json(json);
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
set<tuple<uint64_t, uint64_t>> image_sizes;
|
||||
ParsedJson::Iterator iter(pj);
|
||||
|
||||
|
@ -534,7 +534,7 @@ static void print_json(State& state) noexcept {
|
|||
padded_string json = get_corpus(TWITTER_JSON);
|
||||
dom::parser parser;
|
||||
if (int error = json_parse(json, parser); error != SUCCESS) { cerr << error_message(error) << endl; return; }
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
std::stringstream s;
|
||||
if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; }
|
||||
}
|
||||
|
|
|
@ -5,13 +5,90 @@ using namespace benchmark;
|
|||
using namespace std;
|
||||
|
||||
const padded_string EMPTY_ARRAY("[]", 2);
|
||||
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||
const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
|
||||
|
||||
|
||||
|
||||
static void parse_twitter(State& state) {
|
||||
dom::parser parser;
|
||||
padded_string docdata;
|
||||
simdjson::error_code error;
|
||||
padded_string::load(TWITTER_JSON).tie(docdata, error);
|
||||
if(error) {
|
||||
cerr << "could not parse twitter.json" << error << endl;
|
||||
return;
|
||||
}
|
||||
// we do not want mem. alloc. in the loop.
|
||||
error = parser.allocate(docdata.size());
|
||||
if(error) {
|
||||
cout << error << endl;
|
||||
return;
|
||||
}
|
||||
size_t bytes = 0;
|
||||
for (UNUSED auto _ : state) {
|
||||
dom::element doc;
|
||||
bytes += docdata.size();
|
||||
parser.parse(docdata).tie(doc,error);
|
||||
if(error) {
|
||||
cerr << "could not parse twitter.json" << error << endl;
|
||||
return;
|
||||
}
|
||||
benchmark::DoNotOptimize(doc);
|
||||
}
|
||||
state.counters["Bytes"] = benchmark::Counter(
|
||||
double(bytes), benchmark::Counter::kIsRate,
|
||||
benchmark::Counter::OneK::kIs1024);
|
||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||
}
|
||||
BENCHMARK(parse_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||
return *(std::max_element(std::begin(v), std::end(v)));
|
||||
})->DisplayAggregatesOnly(true);
|
||||
|
||||
|
||||
static void parse_gsoc(State& state) {
|
||||
dom::parser parser;
|
||||
padded_string docdata;
|
||||
simdjson::error_code error;
|
||||
padded_string::load(GSOC_JSON).tie(docdata, error);
|
||||
if(error) {
|
||||
cerr << "could not parse gsoc-2018.json" << error << endl;
|
||||
return;
|
||||
}
|
||||
// we do not want mem. alloc. in the loop.
|
||||
error = parser.allocate(docdata.size());
|
||||
if(error) {
|
||||
cout << error << endl;
|
||||
return;
|
||||
}
|
||||
size_t bytes = 0;
|
||||
for (UNUSED auto _ : state) {
|
||||
dom::element doc;
|
||||
bytes += docdata.size();
|
||||
parser.parse(docdata).tie(doc,error);
|
||||
if(error) {
|
||||
cerr << "could not parse gsoc-2018.json" << error << endl;
|
||||
return;
|
||||
}
|
||||
benchmark::DoNotOptimize(doc);
|
||||
}
|
||||
state.counters["Bytes"] = benchmark::Counter(
|
||||
double(bytes), benchmark::Counter::kIsRate,
|
||||
benchmark::Counter::OneK::kIs1024);
|
||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||
}
|
||||
BENCHMARK(parse_gsoc)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||
return *(std::max_element(std::begin(v), std::end(v)));
|
||||
})->DisplayAggregatesOnly(true);
|
||||
|
||||
|
||||
|
||||
SIMDJSON_PUSH_DISABLE_WARNINGS
|
||||
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
||||
static void json_parse(State& state) {
|
||||
ParsedJson pj;
|
||||
if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; }
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
auto error = json_parse(EMPTY_ARRAY, pj);
|
||||
if (error) { return; }
|
||||
}
|
||||
|
@ -21,7 +98,7 @@ BENCHMARK(json_parse);
|
|||
static void parser_parse_error_code(State& state) {
|
||||
dom::parser parser;
|
||||
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
auto error = parser.parse(EMPTY_ARRAY).error();
|
||||
if (error) { return; }
|
||||
}
|
||||
|
@ -30,10 +107,11 @@ BENCHMARK(parser_parse_error_code);
|
|||
static void parser_parse_exception(State& state) {
|
||||
dom::parser parser;
|
||||
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
try {
|
||||
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
|
||||
} catch(simdjson_error &j) {
|
||||
cout << j.what() << endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -43,7 +121,7 @@ BENCHMARK(parser_parse_exception);
|
|||
SIMDJSON_PUSH_DISABLE_WARNINGS
|
||||
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
||||
static void build_parsed_json(State& state) {
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY);
|
||||
if (!parser.valid) { return; }
|
||||
}
|
||||
|
@ -51,7 +129,7 @@ static void build_parsed_json(State& state) {
|
|||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
BENCHMARK(build_parsed_json);
|
||||
static void document_parse_error_code(State& state) {
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
dom::parser parser;
|
||||
auto error = parser.parse(EMPTY_ARRAY).error();
|
||||
if (error) { return; }
|
||||
|
@ -59,11 +137,12 @@ static void document_parse_error_code(State& state) {
|
|||
}
|
||||
BENCHMARK(document_parse_error_code);
|
||||
static void document_parse_exception(State& state) {
|
||||
for (auto _ : state) {
|
||||
for (UNUSED auto _ : state) {
|
||||
try {
|
||||
dom::parser parser;
|
||||
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
|
||||
} catch(simdjson_error &j) {
|
||||
cout << j.what() << endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue