Add option to make buffers hot and remove recent benchmarking changes (#443)

* This revert the code back to how it was prior to the silly "run two stages" routine and instead
adds an option to benchmark the code over hot buffers. It turns out that it can be expensive,
when the files are large, to allocate the pages.
This commit is contained in:
Daniel Lemire 2020-01-15 19:48:00 -05:00 committed by GitHub
parent 27861f6358
commit f87e64f988
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 31 deletions

View File

@ -280,13 +280,19 @@ struct benchmarker {
return all_stages.iterations; return all_stages.iterations;
} }
really_inline void run_iteration(bool stage1_only, bool rerunbothstages) { really_inline void run_iteration(bool stage1_only, bool hotbuffers) {
// Allocate ParsedJson // Allocate ParsedJson
collector.start(); collector.start();
ParsedJson pj; ParsedJson pj;
bool allocok = pj.allocate_capacity(json.size()); bool allocok = pj.allocate_capacity(json.size());
event_count allocate_count = collector.end(); event_count allocate_count = collector.end();
allocate_stage << allocate_count; allocate_stage << allocate_count;
if(hotbuffers) {
int result = parser.parse((const uint8_t *)json.data(), json.size(), pj);
if (result != simdjson::SUCCESS) {
exit_error(string("Failed to parse ") + filename + string(":") + pj.get_error_message());
}
}
if (!allocok) { if (!allocok) {
exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result."); exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result.");
@ -316,21 +322,7 @@ struct benchmarker {
} }
stage2_count = collector.end(); stage2_count = collector.end();
stage2 << stage2_count; stage2 << stage2_count;
if(rerunbothstages) { all_stages << allocate_count + stage1_count + stage2_count;
// You would think that the entire processing is just stage 1 + stage 2, but
// empirically, that's not true! Not even close to be true in some instances.
event_count allstages_count;
collector.start();
result = parser.parse((const uint8_t *)json.data(), json.size(), pj);
if (result != simdjson::SUCCESS) {
exit_error(string("Failed to parse ") + filename + " during overall parsing " + pj.get_error_message());
}
allstages_count = collector.end();
all_stages << allstages_count;
} else {
// we are optimistic
all_stages << stage1_count + stage2_count;
}
} }
// Calculate stats the first time we parse // Calculate stats the first time we parse
if (stats == NULL) { if (stats == NULL) {
@ -344,9 +336,9 @@ struct benchmarker {
} }
} }
really_inline void run_iterations(size_t iterations, bool stage1_only, bool rerunbothstages) { really_inline void run_iterations(size_t iterations, bool stage1_only, bool hotbuffers) {
for (size_t i = 0; i<iterations; i++) { for (size_t i = 0; i<iterations; i++) {
run_iteration(stage1_only, rerunbothstages); run_iteration(stage1_only, hotbuffers);
} }
} }
@ -449,8 +441,11 @@ struct benchmarker {
printf("\n"); printf("\n");
printf("All Stages\n"); printf("All Stages\n");
print_aggregate("| " , all_stages.best); print_aggregate("| " , all_stages.best);
// printf("|- Allocation\n"); // frequently, allocation is a tiny fraction of the running time so we omit it
// print_aggregate("| ", allocate_stage.best); if(allocate_stage.best.elapsed_sec() > 0.01 * all_stages.best.elapsed_sec()) {
printf("|- Allocation\n");
print_aggregate("| ", allocate_stage.best);
}
printf("|- Stage 1\n"); printf("|- Stage 1\n");
print_aggregate("| ", stage1.best); print_aggregate("| ", stage1.best);
printf("|- Stage 2\n"); printf("|- Stage 2\n");

View File

@ -70,12 +70,10 @@ void print_usage(ostream& out) {
out << "-s STAGE - Stop after the given stage." << endl; out << "-s STAGE - Stop after the given stage." << endl;
out << " -s stage1 - Stop after find_structural_bits." << endl; out << " -s stage1 - Stop after find_structural_bits." << endl;
out << " -s all - Run all stages." << endl; out << " -s all - Run all stages." << endl;
out << " -s allfast - Run all stages." << endl; out << "-H - Make the buffers hot (reduce page allocation during parsing)" << endl;
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl; out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
out << " or ARM64). By default, detects best supported architecture." << endl; out << " or ARM64). By default, detects best supported architecture." << endl;
out << "-o - Estimate the overall speed as stage 1 + stage 2 instead of a rerun of both" << endl;
} }
void exit_usage(string message) { void exit_usage(string message) {
@ -95,13 +93,13 @@ struct option_struct {
bool verbose = false; bool verbose = false;
bool tabbed_output = false; bool tabbed_output = false;
bool rerunbothstages = true; bool hotbuffers = false;
option_struct(int argc, char **argv) { option_struct(int argc, char **argv) {
#ifndef _MSC_VER #ifndef _MSC_VER
int c; int c;
while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) { while ((c = getopt(argc, argv, "vtn:i:a:s:H")) != -1) {
switch (c) { switch (c) {
case 'n': case 'n':
iterations = atoi(optarg); iterations = atoi(optarg);
@ -121,15 +119,14 @@ struct option_struct {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64"); exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
} }
break; break;
case 'H':
hotbuffers = true;
break;
case 's': case 's':
if (!strcmp(optarg, "stage1")) { if (!strcmp(optarg, "stage1")) {
stage1_only = true; stage1_only = true;
} else if (!strcmp(optarg, "all")) { } else if (!strcmp(optarg, "all")) {
stage1_only = false; stage1_only = false;
rerunbothstages = true; // for safety
} else if (!strcmp(optarg, "allfast")) {
stage1_only = false;
rerunbothstages = false;
} else { } else {
exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all"); exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
} }
@ -204,7 +201,7 @@ int main(int argc, char *argv[]) {
// Benchmark each file once per iteration // Benchmark each file once per iteration
for (size_t f=0; f<options.files.size(); f++) { for (size_t f=0; f<options.files.size(); f++) {
verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl; verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
benchmarkers[f]->run_iterations(options.iteration_step, true, false); benchmarkers[f]->run_iterations(options.iteration_step, true, options.hotbuffers);
} }
} }
} else { } else {
@ -213,7 +210,7 @@ int main(int argc, char *argv[]) {
// Benchmark each file once per iteration // Benchmark each file once per iteration
for (size_t f=0; f<options.files.size(); f++) { for (size_t f=0; f<options.files.size(); f++) {
verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl; verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
benchmarkers[f]->run_iterations(options.iteration_step, false, options.rerunbothstages); benchmarkers[f]->run_iterations(options.iteration_step, false, options.hotbuffers);
} }
} }
} }