Benchmark escapes (#464)

* Add escapes as a feature we benchmark

* Don't print effectiveness metric unless verbose is on
This commit is contained in:
John Keiser 2020-01-27 06:58:14 -08:00 committed by Daniel Lemire
parent 6784530b8b
commit 6978a0b8d4
4 changed files with 104 additions and 44 deletions

View File

@ -77,7 +77,7 @@ LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/jsonstream.cpp src/simdjson.c
MINIFIERHEADERS=include/simdjson/jsonminifier.h MINIFIERHEADERS=include/simdjson/jsonminifier.h
MINIFIERLIBFILES=src/jsonminifier.cpp MINIFIERLIBFILES=src/jsonminifier.cpp
FEATURE_JSON_FILES=jsonexamples/generated/0-structurals-full.json jsonexamples/generated/15-structurals-miss.json jsonexamples/generated/7-structurals.json jsonexamples/generated/0-structurals.json jsonexamples/generated/23-structurals-full.json jsonexamples/generated/7-structurals-miss.json jsonexamples/generated/0-structurals-miss.json jsonexamples/generated/23-structurals.json jsonexamples/generated/utf-8-full.json jsonexamples/generated/15-structurals-full.json jsonexamples/generated/23-structurals-miss.json jsonexamples/generated/utf-8.json jsonexamples/generated/15-structurals.json jsonexamples/generated/7-structurals-full.json jsonexamples/generated/utf-8-miss.json FEATURE_JSON_FILES=jsonexamples/generated/0-structurals-full.json jsonexamples/generated/0-structurals-miss.json jsonexamples/generated/0-structurals.json jsonexamples/generated/15-structurals-full.json jsonexamples/generated/15-structurals-miss.json jsonexamples/generated/15-structurals.json jsonexamples/generated/23-structurals-full.json jsonexamples/generated/23-structurals-miss.json jsonexamples/generated/23-structurals.json jsonexamples/generated/7-structurals-full.json jsonexamples/generated/7-structurals-miss.json jsonexamples/generated/7-structurals.json jsonexamples/generated/escape-full.json jsonexamples/generated/escape-miss.json jsonexamples/generated/escape.json jsonexamples/generated/utf-8-full.json jsonexamples/generated/utf-8-miss.json jsonexamples/generated/utf-8.json
RAPIDJSON_INCLUDE:=dependencies/rapidjson/include RAPIDJSON_INCLUDE:=dependencies/rapidjson/include
SAJSON_INCLUDE:=dependencies/sajson/include SAJSON_INCLUDE:=dependencies/sajson/include

View File

@ -148,6 +148,8 @@ struct option_struct {
struct feature_benchmarker { struct feature_benchmarker {
benchmarker utf8; benchmarker utf8;
benchmarker utf8_miss; benchmarker utf8_miss;
benchmarker escape;
benchmarker escape_miss;
benchmarker empty; benchmarker empty;
benchmarker empty_miss; benchmarker empty_miss;
benchmarker struct7; benchmarker struct7;
@ -161,6 +163,8 @@ struct feature_benchmarker {
feature_benchmarker(json_parser& parser, event_collector& collector) : feature_benchmarker(json_parser& parser, event_collector& collector) :
utf8 ("jsonexamples/generated/utf-8.json", parser, collector), utf8 ("jsonexamples/generated/utf-8.json", parser, collector),
utf8_miss ("jsonexamples/generated/utf-8-miss.json", parser, collector), utf8_miss ("jsonexamples/generated/utf-8-miss.json", parser, collector),
escape ("jsonexamples/generated/escape.json", parser, collector),
escape_miss ("jsonexamples/generated/escape-miss.json", parser, collector),
empty ("jsonexamples/generated/0-structurals.json", parser, collector), empty ("jsonexamples/generated/0-structurals.json", parser, collector),
empty_miss ("jsonexamples/generated/0-structurals-miss.json", parser, collector), empty_miss ("jsonexamples/generated/0-structurals-miss.json", parser, collector),
struct7 ("jsonexamples/generated/7-structurals.json", parser, collector), struct7 ("jsonexamples/generated/7-structurals.json", parser, collector),
@ -180,6 +184,8 @@ struct feature_benchmarker {
struct7_full.run_iterations(iterations, stage1_only); struct7_full.run_iterations(iterations, stage1_only);
utf8.run_iterations(iterations, stage1_only); utf8.run_iterations(iterations, stage1_only);
utf8_miss.run_iterations(iterations, stage1_only); utf8_miss.run_iterations(iterations, stage1_only);
escape.run_iterations(iterations, stage1_only);
escape_miss.run_iterations(iterations, stage1_only);
empty.run_iterations(iterations, stage1_only); empty.run_iterations(iterations, stage1_only);
empty_miss.run_iterations(iterations, stage1_only); empty_miss.run_iterations(iterations, stage1_only);
struct15.run_iterations(iterations, stage1_only); struct15.run_iterations(iterations, stage1_only);
@ -258,12 +264,27 @@ struct feature_benchmarker {
return double(utf8_miss[stage].best.branch_misses() - utf8[stage].best.branch_misses()) / utf8_miss.stats->blocks_with_utf8_flipped; return double(utf8_miss[stage].best.branch_misses() - utf8[stage].best.branch_misses()) / utf8_miss.stats->blocks_with_utf8_flipped;
} }
// Extra cost of having escapes in a block
double escape_cost(BenchmarkStage stage) const {
return cost_per_block(stage, escape, escape.stats->blocks_with_escapes, struct7_full);
}
// Extra cost of an escape miss
double escape_miss_cost(BenchmarkStage stage) const {
return cost_per_block(stage, escape_miss, escape_miss.stats->blocks_with_escapes_flipped, escape);
}
// Rate of escape misses per escape flip
double escape_miss_rate(BenchmarkStage stage) const {
if (!has_events()) { return 1; }
return double(escape_miss[stage].best.branch_misses() - escape[stage].best.branch_misses()) / escape_miss.stats->blocks_with_escapes_flipped;
}
double calc_expected_feature_cost(BenchmarkStage stage, const benchmarker& file) const { double calc_expected_feature_cost(BenchmarkStage stage, const benchmarker& file) const {
// Expected base ns/block (empty) // Expected base ns/block (empty)
json_stats& stats = *file.stats; json_stats& stats = *file.stats;
double expected = base_cost(stage) * stats.blocks; double expected = base_cost(stage) * stats.blocks;
expected += struct1_7_cost(stage) * stats.blocks_with_1_structural; expected += struct1_7_cost(stage) * stats.blocks_with_1_structural;
expected += utf8_cost(stage) * stats.blocks_with_utf8; expected += utf8_cost(stage) * stats.blocks_with_utf8;
expected += escape_cost(stage) * stats.blocks_with_escapes;
expected += struct8_15_cost(stage) * stats.blocks_with_8_structurals; expected += struct8_15_cost(stage) * stats.blocks_with_8_structurals;
expected += struct16_cost(stage) * stats.blocks_with_16_structurals; expected += struct16_cost(stage) * stats.blocks_with_16_structurals;
return expected / stats.blocks; return expected / stats.blocks;
@ -274,6 +295,7 @@ struct feature_benchmarker {
json_stats& stats = *file.stats; json_stats& stats = *file.stats;
double expected = struct1_7_miss_cost(stage) * stats.blocks_with_1_structural_flipped * struct1_7_miss_rate(stage); double expected = struct1_7_miss_cost(stage) * stats.blocks_with_1_structural_flipped * struct1_7_miss_rate(stage);
expected += utf8_miss_cost(stage) * stats.blocks_with_utf8_flipped * utf8_miss_rate(stage); expected += utf8_miss_cost(stage) * stats.blocks_with_utf8_flipped * utf8_miss_rate(stage);
expected += escape_miss_cost(stage) * stats.blocks_with_escapes_flipped * escape_miss_rate(stage);
expected += struct8_15_miss_cost(stage) * stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate(stage); expected += struct8_15_miss_cost(stage) * stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate(stage);
expected += struct16_miss_cost(stage) * stats.blocks_with_16_structurals_flipped * struct16_miss_rate(stage); expected += struct16_miss_cost(stage) * stats.blocks_with_16_structurals_flipped * struct16_miss_rate(stage);
return expected / stats.blocks; return expected / stats.blocks;
@ -283,6 +305,7 @@ struct feature_benchmarker {
json_stats& stats = *file.stats; json_stats& stats = *file.stats;
double expected = stats.blocks_with_1_structural_flipped * struct1_7_miss_rate(stage); double expected = stats.blocks_with_1_structural_flipped * struct1_7_miss_rate(stage);
expected += stats.blocks_with_utf8_flipped * utf8_miss_rate(stage); expected += stats.blocks_with_utf8_flipped * utf8_miss_rate(stage);
expected += stats.blocks_with_escapes_flipped * escape_miss_rate(stage);
expected += stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate(stage); expected += stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate(stage);
expected += stats.blocks_with_16_structurals_flipped * struct16_miss_rate(stage); expected += stats.blocks_with_16_structurals_flipped * struct16_miss_rate(stage);
return expected; return expected;
@ -300,10 +323,12 @@ struct feature_benchmarker {
printf("| %8s ", "Base"); printf("| %8s ", "Base");
printf("| %8s ", "7 Struct"); printf("| %8s ", "7 Struct");
printf("| %8s ", "UTF-8"); printf("| %8s ", "UTF-8");
printf("| %8s ", "Escape");
printf("| %8s ", "15 Str."); printf("| %8s ", "15 Str.");
printf("| %8s ", "16+ Str."); printf("| %8s ", "16+ Str.");
printf("| %15s ", "7 Struct Miss"); printf("| %15s ", "7 Struct Miss");
printf("| %15s ", "UTF-8 Miss"); printf("| %15s ", "UTF-8 Miss");
printf("| %15s ", "Escape Miss");
printf("| %15s ", "15 Str. Miss"); printf("| %15s ", "15 Str. Miss");
printf("| %15s ", "16+ Str. Miss"); printf("| %15s ", "16+ Str. Miss");
printf("|\n"); printf("|\n");
@ -314,6 +339,8 @@ struct feature_benchmarker {
printf("|%.10s", "---------------------------------------"); printf("|%.10s", "---------------------------------------");
printf("|%.10s", "---------------------------------------"); printf("|%.10s", "---------------------------------------");
printf("|%.10s", "---------------------------------------"); printf("|%.10s", "---------------------------------------");
printf("|%.10s", "---------------------------------------");
printf("|%.17s", "---------------------------------------");
printf("|%.17s", "---------------------------------------"); printf("|%.17s", "---------------------------------------");
printf("|%.17s", "---------------------------------------"); printf("|%.17s", "---------------------------------------");
printf("|%.17s", "---------------------------------------"); printf("|%.17s", "---------------------------------------");
@ -325,16 +352,19 @@ struct feature_benchmarker {
printf("| %8.3g ", base_cost(stage)); printf("| %8.3g ", base_cost(stage));
printf("| %8.3g ", struct1_7_cost(stage)); printf("| %8.3g ", struct1_7_cost(stage));
printf("| %8.3g ", utf8_cost(stage)); printf("| %8.3g ", utf8_cost(stage));
printf("| %8.3g ", escape_cost(stage));
printf("| %8.3g ", struct8_15_cost(stage)); printf("| %8.3g ", struct8_15_cost(stage));
printf("| %8.3g ", struct16_cost(stage)); printf("| %8.3g ", struct16_cost(stage));
if (has_events()) { if (has_events()) {
printf("| %8.3g (%3d%%) ", struct1_7_miss_cost(stage), int(struct1_7_miss_rate(stage)*100)); printf("| %8.3g (%3d%%) ", struct1_7_miss_cost(stage), int(struct1_7_miss_rate(stage)*100));
printf("| %8.3g (%3d%%) ", utf8_miss_cost(stage), int(utf8_miss_rate(stage)*100)); printf("| %8.3g (%3d%%) ", utf8_miss_cost(stage), int(utf8_miss_rate(stage)*100));
printf("| %8.3g (%3d%%) ", escape_miss_cost(stage), int(escape_miss_rate(stage)*100));
printf("| %8.3g (%3d%%) ", struct8_15_miss_cost(stage), int(struct8_15_miss_rate(stage)*100)); printf("| %8.3g (%3d%%) ", struct8_15_miss_cost(stage), int(struct8_15_miss_rate(stage)*100));
printf("| %8.3g (%3d%%) ", struct16_miss_cost(stage), int(struct16_miss_rate(stage)*100)); printf("| %8.3g (%3d%%) ", struct16_miss_cost(stage), int(struct16_miss_rate(stage)*100));
} else { } else {
printf("| %8.3g ", struct1_7_miss_cost(stage)); printf("| %8.3g ", struct1_7_miss_cost(stage));
printf("| %8.3g ", utf8_miss_cost(stage)); printf("| %8.3g ", utf8_miss_cost(stage));
printf("| %8.3g ", escape_miss_cost(stage));
printf("| %8.3g ", struct8_15_miss_cost(stage)); printf("| %8.3g ", struct8_15_miss_cost(stage));
printf("| %8.3g ", struct16_miss_cost(stage)); printf("| %8.3g ", struct16_miss_cost(stage));
} }
@ -412,8 +442,9 @@ int main(int argc, char *argv[]) {
features.print(options); features.print(options);
// Gauge effectiveness // Gauge effectiveness
if (options.verbose) {
printf("\n"); printf("\n");
printf("Estimated vs. Actual ns/block for real files:\n"); printf(" Effectiveness Check: Estimated vs. Actual ns/block for real files:\n");
printf("\n"); printf("\n");
printf(" | %8s ", "Stage"); printf(" | %8s ", "Stage");
printf("| %-15s ", "File"); printf("| %-15s ", "File");
@ -451,6 +482,7 @@ int main(int argc, char *argv[]) {
print_file_effectiveness(stage, "twitter.json", twitter, features); print_file_effectiveness(stage, "twitter.json", twitter, features);
print_file_effectiveness(stage, "random.json", random, features); print_file_effectiveness(stage, "random.json", random, features);
}); });
}
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -75,6 +75,8 @@ struct json_stats {
size_t structurals = 0; size_t structurals = 0;
size_t blocks_with_utf8 = 0; size_t blocks_with_utf8 = 0;
size_t blocks_with_utf8_flipped = 0; size_t blocks_with_utf8_flipped = 0;
size_t blocks_with_escapes = 0;
size_t blocks_with_escapes_flipped = 0;
size_t blocks_with_0_structurals = 0; size_t blocks_with_0_structurals = 0;
size_t blocks_with_0_structurals_flipped = 0; size_t blocks_with_0_structurals_flipped = 0;
size_t blocks_with_1_structural = 0; size_t blocks_with_1_structural = 0;
@ -113,6 +115,29 @@ struct json_stats {
last_block_has_utf8 = block_has_utf8; last_block_has_utf8 = block_has_utf8;
} }
// Calculate stats on blocks that will trigger escape if statements / mispredictions
bool last_block_has_escapes = false;
for (size_t block=0; block<blocks; block++) {
// Find utf-8 in the block
size_t block_start = block*BYTES_PER_BLOCK;
size_t block_end = block_start+BYTES_PER_BLOCK;
if (block_end > json.size()) { block_end = json.size(); }
bool block_has_escapes = false;
for (size_t i=block_start; i<block_end; i++) {
if (json.data()[i] == '\\') {
block_has_escapes = true;
break;
}
}
if (block_has_escapes) {
blocks_with_escapes++;
}
if (block > 0 && last_block_has_escapes != block_has_escapes) {
blocks_with_escapes_flipped++;
}
last_block_has_escapes = block_has_escapes;
}
// Calculate stats on blocks that will trigger structural count if statements / mispredictions // Calculate stats on blocks that will trigger structural count if statements / mispredictions
bool last_block_has_0_structurals = false; bool last_block_has_0_structurals = false;
bool last_block_has_1_structural = false; bool last_block_has_1_structural = false;
@ -280,7 +305,7 @@ struct benchmarker {
return all_stages.iterations; return all_stages.iterations;
} }
really_inline void run_iteration(bool stage1_only, bool hotbuffers) { really_inline void run_iteration(bool stage1_only, bool hotbuffers=false) {
// Allocate ParsedJson // Allocate ParsedJson
collector.start(); collector.start();
ParsedJson pj; ParsedJson pj;
@ -336,7 +361,7 @@ struct benchmarker {
} }
} }
really_inline void run_iterations(size_t iterations, bool stage1_only, bool hotbuffers) { really_inline void run_iterations(size_t iterations, bool stage1_only, bool hotbuffers=false) {
for (size_t i = 0; i<iterations; i++) { for (size_t i = 0; i<iterations; i++) {
run_iteration(stage1_only, hotbuffers); run_iteration(stage1_only, hotbuffers);
} }
@ -425,16 +450,18 @@ struct benchmarker {
printf("%s\n", string(strlen(filename), '=').c_str()); printf("%s\n", string(strlen(filename), '=').c_str());
printf("%9zu blocks - %10zu bytes - %5zu structurals (%5.1f %%)\n", stats->bytes / BYTES_PER_BLOCK, stats->bytes, stats->structurals, 100.0 * stats->structurals / stats->bytes); printf("%9zu blocks - %10zu bytes - %5zu structurals (%5.1f %%)\n", stats->bytes / BYTES_PER_BLOCK, stats->bytes, stats->structurals, 100.0 * stats->structurals / stats->bytes);
if (stats) { if (stats) {
printf("special blocks with: utf8 %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n", printf("special blocks with: utf8 %9zu (%5.1f %%) - escape %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n",
stats->blocks_with_utf8, 100.0 * stats->blocks_with_utf8 / stats->blocks, stats->blocks_with_utf8, 100.0 * stats->blocks_with_utf8 / stats->blocks,
stats->blocks_with_escapes, 100.0 * stats->blocks_with_escapes / stats->blocks,
stats->blocks_with_0_structurals, 100.0 * stats->blocks_with_0_structurals / stats->blocks, stats->blocks_with_0_structurals, 100.0 * stats->blocks_with_0_structurals / stats->blocks,
stats->blocks_with_1_structural, 100.0 * stats->blocks_with_1_structural / stats->blocks, stats->blocks_with_1_structural, 100.0 * stats->blocks_with_1_structural / stats->blocks,
stats->blocks_with_8_structurals, 100.0 * stats->blocks_with_8_structurals / stats->blocks, stats->blocks_with_8_structurals, 100.0 * stats->blocks_with_8_structurals / stats->blocks,
stats->blocks_with_16_structurals, 100.0 * stats->blocks_with_16_structurals / stats->blocks); stats->blocks_with_16_structurals, 100.0 * stats->blocks_with_16_structurals / stats->blocks);
printf("special block flips: utf8 %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n", printf("special block flips: utf8 %9zu (%5.1f %%) - escape %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n",
stats->blocks_with_utf8_flipped, 100.0 * stats->blocks_with_utf8_flipped / stats->blocks, stats->blocks_with_utf8_flipped, 100.0 * stats->blocks_with_utf8_flipped / stats->blocks,
stats->blocks_with_1_structural_flipped, 100.0 * stats->blocks_with_1_structural_flipped / stats->blocks, stats->blocks_with_escapes_flipped, 100.0 * stats->blocks_with_escapes_flipped / stats->blocks,
stats->blocks_with_0_structurals_flipped, 100.0 * stats->blocks_with_0_structurals_flipped / stats->blocks, stats->blocks_with_0_structurals_flipped, 100.0 * stats->blocks_with_0_structurals_flipped / stats->blocks,
stats->blocks_with_1_structural_flipped, 100.0 * stats->blocks_with_1_structural_flipped / stats->blocks,
stats->blocks_with_8_structurals_flipped, 100.0 * stats->blocks_with_8_structurals_flipped / stats->blocks, stats->blocks_with_8_structurals_flipped, 100.0 * stats->blocks_with_8_structurals_flipped / stats->blocks,
stats->blocks_with_16_structurals_flipped, 100.0 * stats->blocks_with_16_structurals_flipped / stats->blocks); stats->blocks_with_16_structurals_flipped, 100.0 * stats->blocks_with_16_structurals_flipped / stats->blocks);
} }

View File

@ -88,6 +88,7 @@ miss_templates = File.expand_path("miss-templates", File.dirname(__FILE__))
Dir.mkdir(output_dir) unless File.directory?(output_dir) Dir.mkdir(output_dir) unless File.directory?(output_dir)
w = ChunkWriter.new(output_dir, miss_templates) w = ChunkWriter.new(output_dir, miss_templates)
w.write_files "utf-8", '["֏","֏",{}', ',"֏","֏",{}', ',"֏","֏","֏"]', repeat2: ',"ab","ab",{}' w.write_files "utf-8", '["֏","֏",{}', ',"֏","֏",{}', ',"֏","֏","֏"]', repeat2: ',"ab","ab",{}'
w.write_files "escape", '["\\"","\\"",{}', ',"\\"","\\"",{}', ',"\\"","\\"","\\""]', repeat2: ',"ab","ab",{}'
w.write_files "0-structurals", '"ab"', '', '' w.write_files "0-structurals", '"ab"', '', ''
# w.write_files "1-structurals", [ '[', '"ab"' ], [ ',', '"ab"' ], [ ',', '{', '}', ']' ] # w.write_files "1-structurals", [ '[', '"ab"' ], [ ',', '"ab"' ], [ ',', '{', '}', ']' ]
# w.write_files "2-structurals", '["ab"', ',"ab"', [',{', '}]'] # w.write_files "2-structurals", '["ab"', ',"ab"', [',{', '}]']