Print stage 2 information in feature benchmarker

This commit is contained in:
John Keiser 2019-12-27 12:52:16 -07:00
parent a2d05b21ff
commit 3b9e6bff3c
2 changed files with 121 additions and 80 deletions

View File

@ -8,6 +8,7 @@
#include <unistd.h>
#endif
#include <cinttypes>
#include <initializer_list>
#include <cstdio>
#include <cstdlib>
@ -132,6 +133,16 @@ struct option_struct {
architecture = find_best_supported_architecture();
}
}
template<typename F>
void each_stage(const F& f) const {
f(BenchmarkStage::STAGE1);
if (!this->stage1_only) {
f(BenchmarkStage::STAGE2);
f(BenchmarkStage::ALL);
}
}
};
struct feature_benchmarker {
@ -177,8 +188,8 @@ struct feature_benchmarker {
struct23_miss.run_iterations(iterations, stage1_only);
}
double cost_per_block(const benchmarker& feature, size_t feature_blocks, const benchmarker& base) const {
return (feature.stage1.best.elapsed_ns() - base.stage1.best.elapsed_ns()) / feature_blocks;
double cost_per_block(BenchmarkStage stage, const benchmarker& feature, size_t feature_blocks, const benchmarker& base) const {
return (feature[stage].best.elapsed_ns() - base[stage].best.elapsed_ns()) / feature_blocks;
}
// Whether we're recording cache miss and branch miss events
@ -187,101 +198,101 @@ struct feature_benchmarker {
}
// Base cost of any block (including empty ones)
double base_cost() const {
return (empty.stage1.best.elapsed_ns() / empty.stats->blocks);
double base_cost(BenchmarkStage stage) const {
return (empty[stage].best.elapsed_ns() / empty.stats->blocks);
}
// Extra cost of a 1-7 structural block over an empty block
double struct1_7_cost() const {
return cost_per_block(struct7, struct7.stats->blocks_with_1_structural, empty);
double struct1_7_cost(BenchmarkStage stage) const {
return cost_per_block(stage, struct7, struct7.stats->blocks_with_1_structural, empty);
}
// Extra cost of an 1-7-structural miss
double struct1_7_miss_cost() const {
return cost_per_block(struct7_miss, struct7_miss.stats->blocks_with_1_structural, struct7);
double struct1_7_miss_cost(BenchmarkStage stage) const {
return cost_per_block(stage, struct7_miss, struct7_miss.stats->blocks_with_1_structural, struct7);
}
// Rate of 1-7-structural misses per 8-structural flip
double struct1_7_miss_rate() const {
double struct1_7_miss_rate(BenchmarkStage stage) const {
if (!has_events()) { return 1; }
return double(struct7_miss.stage1.best.branch_misses() - struct7.stage1.best.branch_misses()) / struct7_miss.stats->blocks_with_1_structural_flipped;
return double(struct7_miss[stage].best.branch_misses() - struct7[stage].best.branch_misses()) / struct7_miss.stats->blocks_with_1_structural_flipped;
}
// Extra cost of an 8-15 structural block over a 1-7 structural block
double struct8_15_cost() const {
return cost_per_block(struct15, struct15.stats->blocks_with_8_structurals, struct7);
double struct8_15_cost(BenchmarkStage stage) const {
return cost_per_block(stage, struct15, struct15.stats->blocks_with_8_structurals, struct7);
}
// Extra cost of an 8-15-structural miss over a 1-7 miss
double struct8_15_miss_cost() const {
return cost_per_block(struct15_miss, struct15_miss.stats->blocks_with_8_structurals_flipped, struct15);
double struct8_15_miss_cost(BenchmarkStage stage) const {
return cost_per_block(stage, struct15_miss, struct15_miss.stats->blocks_with_8_structurals_flipped, struct15);
}
// Rate of 8-15-structural misses per 8-structural flip
double struct8_15_miss_rate() const {
double struct8_15_miss_rate(BenchmarkStage stage) const {
if (!has_events()) { return 1; }
return double(struct15_miss.stage1.best.branch_misses() - struct15.stage1.best.branch_misses()) / struct15_miss.stats->blocks_with_8_structurals_flipped;
return double(struct15_miss[stage].best.branch_misses() - struct15[stage].best.branch_misses()) / struct15_miss.stats->blocks_with_8_structurals_flipped;
}
// Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
double struct16_cost() const {
return cost_per_block(struct23, struct23.stats->blocks_with_16_structurals, struct15);
double struct16_cost(BenchmarkStage stage) const {
return cost_per_block(stage, struct23, struct23.stats->blocks_with_16_structurals, struct15);
}
// Extra cost of a 16-structural miss over an 8-15 miss
double struct16_miss_cost() const {
return cost_per_block(struct23_miss, struct23_miss.stats->blocks_with_16_structurals_flipped, struct23);
double struct16_miss_cost(BenchmarkStage stage) const {
return cost_per_block(stage, struct23_miss, struct23_miss.stats->blocks_with_16_structurals_flipped, struct23);
}
// Rate of 16-structural misses per 16-structural flip
double struct16_miss_rate() const {
double struct16_miss_rate(BenchmarkStage stage) const {
if (!has_events()) { return 1; }
return double(struct23_miss.stage1.best.branch_misses() - struct23.stage1.best.branch_misses()) / struct23_miss.stats->blocks_with_16_structurals_flipped;
return double(struct23_miss[stage].best.branch_misses() - struct23[stage].best.branch_misses()) / struct23_miss.stats->blocks_with_16_structurals_flipped;
}
// Extra cost of having UTF-8 in a block
double utf8_cost() const {
return cost_per_block(utf8, utf8.stats->blocks_with_utf8, struct7_full);
double utf8_cost(BenchmarkStage stage) const {
return cost_per_block(stage, utf8, utf8.stats->blocks_with_utf8, struct7_full);
}
// Extra cost of a UTF-8 miss
double utf8_miss_cost() const {
return cost_per_block(utf8_miss, utf8_miss.stats->blocks_with_utf8_flipped, utf8);
double utf8_miss_cost(BenchmarkStage stage) const {
return cost_per_block(stage, utf8_miss, utf8_miss.stats->blocks_with_utf8_flipped, utf8);
}
// Rate of UTF-8 misses per UTF-8 flip
double utf8_miss_rate() const {
double utf8_miss_rate(BenchmarkStage stage) const {
if (!has_events()) { return 1; }
return double(utf8_miss.stage1.best.branch_misses() - utf8.stage1.best.branch_misses()) / utf8_miss.stats->blocks_with_utf8_flipped;
return double(utf8_miss[stage].best.branch_misses() - utf8[stage].best.branch_misses()) / utf8_miss.stats->blocks_with_utf8_flipped;
}
double calc_expected_feature_cost(const benchmarker& file) const {
double calc_expected_feature_cost(BenchmarkStage stage, const benchmarker& file) const {
// Expected base ns/block (empty)
json_stats& stats = *file.stats;
double expected = base_cost() * stats.blocks;
expected += struct1_7_cost() * stats.blocks_with_1_structural;
expected += utf8_cost() * stats.blocks_with_utf8;
expected += struct8_15_cost() * stats.blocks_with_8_structurals;
expected += struct16_cost() * stats.blocks_with_16_structurals;
double expected = base_cost(stage) * stats.blocks;
expected += struct1_7_cost(stage) * stats.blocks_with_1_structural;
expected += utf8_cost(stage) * stats.blocks_with_utf8;
expected += struct8_15_cost(stage) * stats.blocks_with_8_structurals;
expected += struct16_cost(stage) * stats.blocks_with_16_structurals;
return expected / stats.blocks;
}
double calc_expected_miss_cost(const benchmarker& file) const {
double calc_expected_miss_cost(BenchmarkStage stage, const benchmarker& file) const {
// Expected base ns/block (empty)
json_stats& stats = *file.stats;
double expected = struct1_7_miss_cost() * stats.blocks_with_1_structural_flipped * struct1_7_miss_rate();
expected += utf8_miss_cost() * stats.blocks_with_utf8_flipped * utf8_miss_rate();
expected += struct8_15_miss_cost() * stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate();
expected += struct16_miss_cost() * stats.blocks_with_16_structurals_flipped * struct16_miss_rate();
double expected = struct1_7_miss_cost(stage) * stats.blocks_with_1_structural_flipped * struct1_7_miss_rate(stage);
expected += utf8_miss_cost(stage) * stats.blocks_with_utf8_flipped * utf8_miss_rate(stage);
expected += struct8_15_miss_cost(stage) * stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate(stage);
expected += struct16_miss_cost(stage) * stats.blocks_with_16_structurals_flipped * struct16_miss_rate(stage);
return expected / stats.blocks;
}
double calc_expected_misses(const benchmarker& file) const {
double calc_expected_misses(BenchmarkStage stage, const benchmarker& file) const {
json_stats& stats = *file.stats;
double expected = stats.blocks_with_1_structural_flipped * struct1_7_miss_rate();
expected += stats.blocks_with_utf8_flipped * utf8_miss_rate();
expected += stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate();
expected += stats.blocks_with_16_structurals_flipped * struct16_miss_rate();
double expected = stats.blocks_with_1_structural_flipped * struct1_7_miss_rate(stage);
expected += stats.blocks_with_utf8_flipped * utf8_miss_rate(stage);
expected += stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate(stage);
expected += stats.blocks_with_16_structurals_flipped * struct16_miss_rate(stage);
return expected;
}
double calc_expected(const benchmarker& file) const {
return calc_expected_feature_cost(file) + calc_expected_miss_cost(file);
double calc_expected(BenchmarkStage stage, const benchmarker& file) const {
return calc_expected_feature_cost(stage, file) + calc_expected_miss_cost(stage, file);
}
void print() {
void print(const option_struct& options) {
printf("\n");
printf("Features in ns/block (64 bytes):\n");
printf("\n");
@ -309,35 +320,38 @@ struct feature_benchmarker {
printf("|%.17s", "---------------------------------------");
printf("|\n");
printf("| %-8s ", "Stage 1");
printf("| %8.3g ", base_cost());
printf("| %8.3g ", struct1_7_cost());
printf("| %8.3g ", utf8_cost());
printf("| %8.3g ", struct8_15_cost());
printf("| %8.3g ", struct16_cost());
if (has_events()) {
printf("| %8.3g (%3d%%) ", struct1_7_miss_cost(), int(struct1_7_miss_rate()*100));
printf("| %8.3g (%3d%%) ", utf8_miss_cost(), int(utf8_miss_rate()*100));
printf("| %8.3g (%3d%%) ", struct8_15_miss_cost(), int(struct8_15_miss_rate()*100));
printf("| %8.3g (%3d%%) ", struct16_miss_cost(), int(struct16_miss_rate()*100));
} else {
printf("| %8.3g ", struct1_7_miss_cost());
printf("| %8.3g ", utf8_miss_cost());
printf("| %8.3g ", struct8_15_miss_cost());
printf("| %8.3g ", struct16_miss_cost());
}
printf("|\n");
options.each_stage([&](auto stage) {
printf("| %-8s ", benchmark_stage_name(stage));
printf("| %8.3g ", base_cost(stage));
printf("| %8.3g ", struct1_7_cost(stage));
printf("| %8.3g ", utf8_cost(stage));
printf("| %8.3g ", struct8_15_cost(stage));
printf("| %8.3g ", struct16_cost(stage));
if (has_events()) {
printf("| %8.3g (%3d%%) ", struct1_7_miss_cost(stage), int(struct1_7_miss_rate(stage)*100));
printf("| %8.3g (%3d%%) ", utf8_miss_cost(stage), int(utf8_miss_rate(stage)*100));
printf("| %8.3g (%3d%%) ", struct8_15_miss_cost(stage), int(struct8_15_miss_rate(stage)*100));
printf("| %8.3g (%3d%%) ", struct16_miss_cost(stage), int(struct16_miss_rate(stage)*100));
} else {
printf("| %8.3g ", struct1_7_miss_cost(stage));
printf("| %8.3g ", utf8_miss_cost(stage));
printf("| %8.3g ", struct8_15_miss_cost(stage));
printf("| %8.3g ", struct16_miss_cost(stage));
}
printf("|\n");
});
}
};
void print_file_effectiveness(const char* filename, const benchmarker& results, const feature_benchmarker& features) {
double actual = results.stage1.best.elapsed_ns() / results.stats->blocks;
double calc = features.calc_expected(results);
uint64_t actual_misses = results.stage1.best.branch_misses();
uint64_t calc_misses = uint64_t(features.calc_expected_misses(results));
double calc_miss_cost = features.calc_expected_miss_cost(results);
void print_file_effectiveness(BenchmarkStage stage, const char* filename, const benchmarker& results, const feature_benchmarker& features) {
double actual = results[stage].best.elapsed_ns() / results.stats->blocks;
double calc = features.calc_expected(stage, results);
uint64_t actual_misses = results[stage].best.branch_misses();
uint64_t calc_misses = uint64_t(features.calc_expected_misses(stage, results));
double calc_miss_cost = features.calc_expected_miss_cost(stage, results);
printf("| %-8s ", benchmark_stage_name(stage));
printf("| %-15s ", filename);
printf("| %8.3g ", features.calc_expected_feature_cost(results));
printf("| %8.3g ", features.calc_expected_feature_cost(stage, results));
printf("| %8.3g ", calc_miss_cost);
printf("| %8.3g ", calc);
printf("| %8.3g ", actual);
@ -395,12 +409,13 @@ int main(int argc, char *argv[]) {
}
if (!options.verbose) { progress.erase(); }
features.print();
features.print(options);
// Gauge effectiveness
printf("\n");
printf("Estimated vs. Actual ns/block for real files:\n");
printf("\n");
printf("| %8s ", "Stage");
printf("| %-15s ", "File");
printf("| %11s ", "Est. (Base)");
printf("| %11s ", "Est. (Miss)");
@ -415,6 +430,7 @@ int main(int argc, char *argv[]) {
printf("| %13s ", "Adjusted Diff");
}
printf("|\n");
printf("|%.10s", "---------------------------------------");
printf("|%.17s", "---------------------------------------");
printf("|%.13s", "---------------------------------------");
printf("|%.13s", "---------------------------------------");
@ -430,9 +446,11 @@ int main(int argc, char *argv[]) {
}
printf("|\n");
print_file_effectiveness("gsoc-2018.json", gsoc_2018, features);
print_file_effectiveness("twitter.json", twitter, features);
print_file_effectiveness("random.json", random, features);
options.each_stage([&](auto stage) {
print_file_effectiveness(stage, "gsoc-2018.json", gsoc_2018, features);
print_file_effectiveness(stage, "twitter.json", twitter, features);
print_file_effectiveness(stage, "random.json", random, features);
});
return EXIT_SUCCESS;
}

View File

@ -218,6 +218,23 @@ struct progress_bar {
}
};
enum class BenchmarkStage {
ALL,
ALLOCATE,
STAGE1,
STAGE2
};
const char* benchmark_stage_name(BenchmarkStage stage) {
switch (stage) {
case BenchmarkStage::ALL: return "All";
case BenchmarkStage::ALLOCATE: return "Allocate";
case BenchmarkStage::STAGE1: return "Stage 1";
case BenchmarkStage::STAGE2: return "Stage 2";
default: return "Unknown";
}
}
struct benchmarker {
// JSON text from loading the file. Owns the memory.
const padded_string json;
@ -249,6 +266,16 @@ struct benchmarker {
}
}
const event_aggregate& operator[](BenchmarkStage stage) const {
switch (stage) {
case BenchmarkStage::ALL: return this->all_stages;
case BenchmarkStage::STAGE1: return this->stage1;
case BenchmarkStage::STAGE2: return this->stage2;
case BenchmarkStage::ALLOCATE: return this->allocate_stage;
default: exit_error("Unknown stage"); return this->all_stages;
}
}
int iterations() const {
return all_stages.iterations;
}
@ -307,10 +334,6 @@ struct benchmarker {
}
}
double stage1_ns_per_block() {
return stage1.elapsed_ns() / stats->blocks;
}
template<typename T>
void print_aggregate(const char* prefix, const T& stage) const {
printf("%s%-13s: %8.4f ns per block (%6.2f%%) - %8.4f ns per byte - %8.4f ns per structural - %8.3f GB/s\n",