Add option to make buffers hot and remove recent benchmarking changes (#443)

* This revert the code back to how it was prior to the silly "run two stages" routine and instead
adds an option to benchmark the code over hot buffers. It turns out that it can be expensive,
when the files are large, to allocate the pages.
This commit is contained in:
Daniel Lemire 2020-01-15 19:48:00 -05:00 committed by GitHub
parent 27861f6358
commit f87e64f988
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 31 deletions

View File

@ -280,13 +280,19 @@ struct benchmarker {
return all_stages.iterations;
}
really_inline void run_iteration(bool stage1_only, bool rerunbothstages) {
really_inline void run_iteration(bool stage1_only, bool hotbuffers) {
// Allocate ParsedJson
collector.start();
ParsedJson pj;
bool allocok = pj.allocate_capacity(json.size());
event_count allocate_count = collector.end();
allocate_stage << allocate_count;
if(hotbuffers) {
int result = parser.parse((const uint8_t *)json.data(), json.size(), pj);
if (result != simdjson::SUCCESS) {
exit_error(string("Failed to parse ") + filename + string(":") + pj.get_error_message());
}
}
if (!allocok) {
exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result.");
@ -316,21 +322,7 @@ struct benchmarker {
}
stage2_count = collector.end();
stage2 << stage2_count;
if(rerunbothstages) {
// You would think that the entire processing is just stage 1 + stage 2, but
// empirically, that's not true! Not even close to be true in some instances.
event_count allstages_count;
collector.start();
result = parser.parse((const uint8_t *)json.data(), json.size(), pj);
if (result != simdjson::SUCCESS) {
exit_error(string("Failed to parse ") + filename + " during overall parsing " + pj.get_error_message());
}
allstages_count = collector.end();
all_stages << allstages_count;
} else {
// we are optimistic
all_stages << stage1_count + stage2_count;
}
all_stages << allocate_count + stage1_count + stage2_count;
}
// Calculate stats the first time we parse
if (stats == NULL) {
@ -344,9 +336,9 @@ struct benchmarker {
}
}
really_inline void run_iterations(size_t iterations, bool stage1_only, bool rerunbothstages) {
really_inline void run_iterations(size_t iterations, bool stage1_only, bool hotbuffers) {
for (size_t i = 0; i<iterations; i++) {
run_iteration(stage1_only, rerunbothstages);
run_iteration(stage1_only, hotbuffers);
}
}
@ -449,8 +441,11 @@ struct benchmarker {
printf("\n");
printf("All Stages\n");
print_aggregate("| " , all_stages.best);
// printf("|- Allocation\n");
// print_aggregate("| ", allocate_stage.best);
// frequently, allocation is a tiny fraction of the running time so we omit it
if(allocate_stage.best.elapsed_sec() > 0.01 * all_stages.best.elapsed_sec()) {
printf("|- Allocation\n");
print_aggregate("| ", allocate_stage.best);
}
printf("|- Stage 1\n");
print_aggregate("| ", stage1.best);
printf("|- Stage 2\n");

View File

@ -70,12 +70,10 @@ void print_usage(ostream& out) {
out << "-s STAGE - Stop after the given stage." << endl;
out << " -s stage1 - Stop after find_structural_bits." << endl;
out << " -s all - Run all stages." << endl;
out << " -s allfast - Run all stages." << endl;
out << "-H - Make the buffers hot (reduce page allocation during parsing)" << endl;
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
out << " or ARM64). By default, detects best supported architecture." << endl;
out << "-o - Estimate the overall speed as stage 1 + stage 2 instead of a rerun of both" << endl;
}
void exit_usage(string message) {
@ -95,13 +93,13 @@ struct option_struct {
bool verbose = false;
bool tabbed_output = false;
bool rerunbothstages = true;
bool hotbuffers = false;
option_struct(int argc, char **argv) {
#ifndef _MSC_VER
int c;
while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) {
while ((c = getopt(argc, argv, "vtn:i:a:s:H")) != -1) {
switch (c) {
case 'n':
iterations = atoi(optarg);
@ -121,15 +119,14 @@ struct option_struct {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
}
break;
case 'H':
hotbuffers = true;
break;
case 's':
if (!strcmp(optarg, "stage1")) {
stage1_only = true;
} else if (!strcmp(optarg, "all")) {
stage1_only = false;
rerunbothstages = true; // for safety
} else if (!strcmp(optarg, "allfast")) {
stage1_only = false;
rerunbothstages = false;
} else {
exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
}
@ -204,7 +201,7 @@ int main(int argc, char *argv[]) {
// Benchmark each file once per iteration
for (size_t f=0; f<options.files.size(); f++) {
verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
benchmarkers[f]->run_iterations(options.iteration_step, true, false);
benchmarkers[f]->run_iterations(options.iteration_step, true, options.hotbuffers);
}
}
} else {
@ -213,7 +210,7 @@ int main(int argc, char *argv[]) {
// Benchmark each file once per iteration
for (size_t f=0; f<options.files.size(); f++) {
verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
benchmarkers[f]->run_iterations(options.iteration_step, false, options.rerunbothstages);
benchmarkers[f]->run_iterations(options.iteration_step, false, options.hotbuffers);
}
}
}