Measure impact of utf-8 blocks and structurals per block directly

This commit is contained in:
John Keiser 2019-09-11 08:38:13 -07:00
parent 102262c7ab
commit e2f349e7bd
14 changed files with 21366 additions and 384 deletions

4
.gitignore vendored
View File

@ -53,6 +53,7 @@ objs
# Build outputs (TODO build to a subdir so we can exclude that instead) # Build outputs (TODO build to a subdir so we can exclude that instead)
/allparserscheckfile /allparserscheckfile
/basictests /basictests
/benchfeatures
/benchmark/parse /benchmark/parse
/benchmark/perfdiff /benchmark/perfdiff
/benchmark/statisticalmodel /benchmark/statisticalmodel
@ -86,6 +87,9 @@ objs
/tools/jsonstats /tools/jsonstats
/tools/minify /tools/minify
# Don't check in generated examples
/jsonexamples/generated
# C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore # C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
# Prerequisites # Prerequisites

View File

@ -126,6 +126,12 @@ run_issue150_sh: allparserscheckfile
run_testjson2json_sh: minify json2json run_testjson2json_sh: minify json2json
./scripts/testjson2json.sh ./scripts/testjson2json.sh
generate_featurejson:
ruby ./benchmark/genfeaturejson.rb
run_benchfeatures: benchfeatures generate_featurejson
./benchfeatures -n 1000
test: run_basictests run_jsoncheck run_numberparsingcheck run_integer_tests run_stringparsingcheck run_jsonstream_test run_pointercheck run_testjson2json_sh run_issue150_sh run_jsoncheck_noavx test: run_basictests run_jsoncheck run_numberparsingcheck run_integer_tests run_stringparsingcheck run_jsonstream_test run_pointercheck run_testjson2json_sh run_issue150_sh run_jsoncheck_noavx
@echo "It looks like the code is good!" @echo "It looks like the code is good!"
@ -145,9 +151,12 @@ submodules:
$(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJSON_INCLUDE) $(GASON_INCLUDE) $(UJSON4C_INCLUDE) $(CJSON_INCLUDE) $(JSMN_INCLUDE) : submodules $(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJSON_INCLUDE) $(GASON_INCLUDE) $(UJSON4C_INCLUDE) $(CJSON_INCLUDE) $(JSMN_INCLUDE) : submodules
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES) parse: benchmark/parse.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS) $(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
benchfeatures: benchmark/benchfeatures.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o benchfeatures $(LIBFILES) benchmark/benchfeatures.cpp $(LIBFLAGS)
perfdiff: benchmark/perfdiff.cpp perfdiff: benchmark/perfdiff.cpp
$(CXX) $(CXXFLAGS) -o perfdiff benchmark/perfdiff.cpp $(LIBFLAGS) $(CXX) $(CXXFLAGS) -o perfdiff benchmark/perfdiff.cpp $(LIBFLAGS)

326
benchmark/benchfeatures.cpp Normal file
View File

@ -0,0 +1,326 @@
#include "json_parser.h"
#include "event_counter.h"
#include <cassert>
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#include <unistd.h>
#endif
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <chrono>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
//#define DEBUG
#include "simdjson/common_defs.h"
#include "simdjson/isadetection.h"
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
#include "simdjson/parsedjson.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage2_build_tape.h"
#include <functional>
#include "benchmarker.h"
using namespace simdjson;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
using std::to_string;
using std::vector;
using std::ostream;
using std::ofstream;
using std::exception;
// Stash the exe_name in main() for functions to use
char* exe_name;
void print_usage(ostream& out) {
out << "Usage: " << exe_name << " [-v] [-n #] [-s STAGE] [-a ARCH]" << endl;
out << endl;
out << "Runs the parser against jsonexamples/generated json files in a loop, measuring speed and other statistics." << endl;
out << endl;
out << "Options:" << endl;
out << endl;
out << "-n # - Number of iterations per file. Default: 400" << endl;
out << "-i # - Number of times to iterate a single file before moving to the next. Default: 20" << endl;
out << "-v - Verbose output." << endl;
out << "-s STAGE - Stop after the given stage." << endl;
out << " -s stage1 - Stop after find_structural_bits." << endl;
out << " -s all - Run all stages." << endl;
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
out << " or ARM64). By default, detects best supported architecture." << endl;
}
void exit_usage(string message) {
cerr << message << endl;
cerr << endl;
print_usage(cerr);
exit(EXIT_FAILURE);
}
struct option_struct {
Architecture architecture = Architecture::UNSUPPORTED;
bool stage1_only = false;
int32_t iterations = 400;
int32_t iteration_step = 50;
bool verbose = false;
option_struct(int argc, char **argv) {
#ifndef _MSC_VER
int c;
while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) {
switch (c) {
case 'n':
iterations = atoi(optarg);
break;
case 'i':
iteration_step = atoi(optarg);
break;
case 'v':
verbose = true;
break;
case 'a':
architecture = parse_architecture(optarg);
if (architecture == Architecture::UNSUPPORTED) {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
}
break;
case 's':
if (!strcmp(optarg, "stage1")) {
stage1_only = true;
} else if (!strcmp(optarg, "all")) {
stage1_only = false;
} else {
exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
}
break;
default:
exit_error("Unexpected argument " + c);
}
}
#else
int optind = 1;
#endif
// If architecture is not specified, pick the best supported architecture by default
if (architecture == Architecture::UNSUPPORTED) {
architecture = find_best_supported_architecture();
}
}
};
double actual(const benchmarker& feature) {
return feature.stage1.best.elapsed_ns() / feature.stats->blocks;
}
double diff(const benchmarker& feature, const benchmarker& struct7) {
if (feature.stats->blocks == struct7.stats->blocks) {
return (feature.stage1.best.elapsed_ns() - struct7.stage1.best.elapsed_ns()) / struct7.stats->blocks;
} else {
return (feature.stage1.best.elapsed_ns() / feature.stats->blocks) - (struct7.stage1.best.elapsed_ns() / struct7.stats->blocks);
}
}
double diff_miss(const benchmarker& feature, const benchmarker& struct7) {
// There are roughly 2650 branch mispredicts, so we have to scale it so it represents a per block amount
return diff(feature, struct7) * 10000.0 / 2650.0;
}
struct feature_benchmarker {
benchmarker utf8;
benchmarker utf8_miss;
benchmarker empty;
benchmarker empty_miss;
benchmarker struct7;
benchmarker struct7_miss;
benchmarker struct7_full;
benchmarker struct15;
benchmarker struct15_miss;
benchmarker struct23;
benchmarker struct23_miss;
feature_benchmarker(json_parser& parser, event_collector& collector) :
utf8 ("jsonexamples/generated/utf-8.json", parser, collector),
utf8_miss ("jsonexamples/generated/utf-8-miss.json", parser, collector),
empty ("jsonexamples/generated/0-structurals.json", parser, collector),
empty_miss ("jsonexamples/generated/0-structurals-miss.json", parser, collector),
struct7 ("jsonexamples/generated/7-structurals.json", parser, collector),
struct7_miss ("jsonexamples/generated/7-structurals-miss.json", parser, collector),
struct7_full ("jsonexamples/generated/7-structurals-full.json", parser, collector),
struct15 ("jsonexamples/generated/15-structurals.json", parser, collector),
struct15_miss("jsonexamples/generated/15-structurals-miss.json", parser, collector),
struct23 ("jsonexamples/generated/23-structurals.json", parser, collector),
struct23_miss("jsonexamples/generated/23-structurals-miss.json", parser, collector)
{
}
really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
struct7.run_iterations(iterations, stage1_only);
struct7_miss.run_iterations(iterations, stage1_only);
struct7_full.run_iterations(iterations, stage1_only);
utf8.run_iterations(iterations, stage1_only);
utf8_miss.run_iterations(iterations, stage1_only);
empty.run_iterations(iterations, stage1_only);
empty_miss.run_iterations(iterations, stage1_only);
struct15.run_iterations(iterations, stage1_only);
struct15_miss.run_iterations(iterations, stage1_only);
struct23.run_iterations(iterations, stage1_only);
struct23_miss.run_iterations(iterations, stage1_only);
}
void print() {
printf("base (ns/block)");
printf(",struct 1-7");
printf(",struct 1-7 miss");
printf(",utf-8");
printf(",utf-8 miss");
printf(",struct 8-15");
printf(",struct 8-15 miss");
printf(",struct 16+");
printf(",struct 16+ miss");
printf("\n");
printf("%g", actual(empty));
printf(",%+g", diff(struct7, empty));
printf(",%+g", diff(struct7_miss, struct7));
printf(",%+g", diff(utf8, struct7));
printf(",%+g", diff(utf8_miss, utf8));
printf(",%+g", diff(struct15, struct7));
printf(",%+g", diff(struct15_miss, struct15));
printf(",%+g", diff(struct23, struct15));
printf(",%+g", diff(struct23_miss, struct23));
printf("\n");
}
double cost_per_block(benchmarker& feature, size_t feature_blocks, benchmarker& base) {
return (feature.stage1.best.elapsed_ns() - base.stage1.best.elapsed_ns()) / feature_blocks;
}
// Base cost of any block (including empty ones)
double base_cost() {
return (empty.stage1.best.elapsed_ns() / empty.stats->blocks);
}
// Extra cost of a 1-7 structural block over an empty block
double struct1_7_cost() {
return cost_per_block(struct7, struct7.stats->blocks_with_1_structural, empty);
}
// Extra cost of an 1-7-structural miss
double struct1_7_miss_cost() {
return cost_per_block(struct7_miss, struct7_miss.stats->blocks_with_1_structural, struct7);
}
// Extra cost of an 8-15 structural block over a 1-7 structural block
double struct8_15_cost() {
return cost_per_block(struct15, struct15.stats->blocks_with_8_structurals, struct7);
}
// Extra cost of an 8-15-structural miss over a 1-7 miss
double struct8_15_miss_cost() {
return cost_per_block(struct15_miss, struct15_miss.stats->blocks_with_8_structurals_flipped, struct15);
}
// Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
double struct16_cost() {
return cost_per_block(struct23, struct23.stats->blocks_with_16_structurals, struct15);
}
// Extra cost of a 16-structural miss over an 8-15 miss
double struct16_miss_cost() {
return cost_per_block(struct23_miss, struct23_miss.stats->blocks_with_16_structurals_flipped, struct23);
}
// Extra cost of having UTF-8 in a block
double utf8_cost() {
return cost_per_block(utf8, utf8.stats->blocks_with_utf8, struct7_full);
}
// Extra cost of a UTF-8 miss
double utf8_miss_cost() {
return cost_per_block(utf8_miss, utf8_miss.stats->blocks_with_utf8_flipped, utf8);
}
double calc_expected(benchmarker& file) {
// Expected base ns/block (empty)
json_stats& stats = *file.stats;
double expected = base_cost() * stats.blocks;
expected += struct1_7_cost() * stats.blocks_with_1_structural;
expected += struct1_7_miss_cost() * stats.blocks_with_1_structural_flipped;
expected += utf8_cost() * stats.blocks_with_utf8;
expected += utf8_miss_cost() * stats.blocks_with_utf8_flipped;
expected += struct8_15_cost() * stats.blocks_with_8_structurals;
expected += struct8_15_miss_cost() * stats.blocks_with_8_structurals_flipped;
expected += struct16_cost() * stats.blocks_with_16_structurals;
expected += struct16_miss_cost() * stats.blocks_with_16_structurals_flipped;
return expected / stats.blocks;
}
};
int main(int argc, char *argv[]) {
// Read options
exe_name = argv[0];
option_struct options(argc, argv);
if (options.verbose) {
verbose_stream = &cout;
}
// Initialize the event collector. We put this early so if it prints an error message, it's the
// first thing printed.
event_collector collector;
// Set up benchmarkers by reading all files
json_parser parser(options.architecture);
feature_benchmarker features(parser, collector);
benchmarker gsoc_2018("jsonexamples/gsoc-2018.json", parser, collector);
benchmarker twitter("jsonexamples/twitter.json", parser, collector);
benchmarker random("jsonexamples/random.json", parser, collector);
// Run the benchmarks
progress_bar progress(options.iterations, 100);
// Put the if (options.stage1_only) *outside* the loop so that run_iterations will be optimized
if (options.stage1_only) {
for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
if (!options.verbose) { progress.print(iteration); }
features.run_iterations(options.iteration_step, true);
gsoc_2018.run_iterations(options.iteration_step, true);
twitter.run_iterations(options.iteration_step, true);
random.run_iterations(options.iteration_step, true);
}
} else {
for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
if (!options.verbose) { progress.print(iteration); }
features.run_iterations(options.iteration_step, false);
gsoc_2018.run_iterations(options.iteration_step, false);
twitter.run_iterations(options.iteration_step, false);
random.run_iterations(options.iteration_step, false);
}
}
if (!options.verbose) { progress.erase(); }
features.print();
// Gauge effectiveness
printf("gsoc-2018.json expected/actual: %g/%g\n", features.calc_expected(gsoc_2018), actual(gsoc_2018));
printf("twitter.json expected/actual: %g/%g\n", features.calc_expected(twitter), actual(twitter));
printf("random.json expected/actual: %g/%g\n", features.calc_expected(random), actual(random));
return EXIT_SUCCESS;
}

424
benchmark/benchmarker.h Normal file
View File

@ -0,0 +1,424 @@
#ifndef __BENCHMARKER_H
#define __BENCHMARKER_H
#include "json_parser.h"
#include "event_counter.h"
#include <cassert>
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#include <unistd.h>
#endif
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <chrono>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
//#define DEBUG
#include "simdjson/common_defs.h"
#include "simdjson/isadetection.h"
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
#include "simdjson/parsedjson.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage2_build_tape.h"
#include <functional>
using namespace simdjson;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
using std::to_string;
using std::vector;
using std::ostream;
using std::ofstream;
using std::exception;
// Initialize "verbose" to go nowhere. We'll read options in main() and set to cout if verbose is true.
std::ofstream dev_null;
ostream *verbose_stream = &dev_null;
const size_t BYTES_PER_BLOCK = 64;
ostream& verbose() {
return *verbose_stream;
}
void exit_error(string message) {
cerr << message << endl;
exit(EXIT_FAILURE);
abort();
}
struct json_stats {
size_t bytes = 0;
size_t blocks = 0;
size_t structurals = 0;
size_t blocks_with_utf8 = 0;
size_t blocks_with_utf8_flipped = 0;
size_t blocks_with_0_structurals = 0;
size_t blocks_with_0_structurals_flipped = 0;
size_t blocks_with_1_structural = 0;
size_t blocks_with_1_structural_flipped = 0;
size_t blocks_with_8_structurals = 0;
size_t blocks_with_8_structurals_flipped = 0;
size_t blocks_with_16_structurals = 0;
size_t blocks_with_16_structurals_flipped = 0;
json_stats(const padded_string& json, const ParsedJson& pj) {
bytes = json.size();
blocks = bytes / BYTES_PER_BLOCK;
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
structurals = pj.n_structural_indexes-1;
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
bool last_block_has_utf8 = false;
for (size_t block=0; block<blocks; block++) {
// Find utf-8 in the block
size_t block_start = block*BYTES_PER_BLOCK;
size_t block_end = block_start+BYTES_PER_BLOCK;
if (block_end > json.size()) { block_end = json.size(); }
bool block_has_utf8 = false;
for (size_t i=block_start; i<block_end; i++) {
if (json.data()[i] & 0x80) {
block_has_utf8 = true;
break;
}
}
if (block_has_utf8) {
blocks_with_utf8++;
}
if (block > 0 && last_block_has_utf8 != block_has_utf8) {
blocks_with_utf8_flipped++;
}
last_block_has_utf8 = block_has_utf8;
}
// Calculate stats on blocks that will trigger structural count if statements / mispredictions
bool last_block_has_0_structurals = false;
bool last_block_has_1_structural = false;
bool last_block_has_8_structurals = false;
bool last_block_has_16_structurals = false;
size_t structural=0;
for (size_t block=0; block<blocks; block++) {
// Count structurals in the block
int block_structurals=0;
while (structural < pj.n_structural_indexes && pj.structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
block_structurals++;
structural++;
}
bool block_has_0_structurals = block_structurals == 0;
if (block_has_0_structurals) {
blocks_with_0_structurals++;
}
if (block > 0 && last_block_has_0_structurals != block_has_0_structurals) {
blocks_with_0_structurals_flipped++;
}
last_block_has_0_structurals = block_has_0_structurals;
bool block_has_1_structural = block_structurals >= 1;
if (block_has_1_structural) {
blocks_with_1_structural++;
}
if (block > 0 && last_block_has_1_structural != block_has_1_structural) {
blocks_with_1_structural_flipped++;
}
last_block_has_1_structural = block_has_1_structural;
bool block_has_8_structurals = block_structurals >= 8;
if (block_has_8_structurals) {
blocks_with_8_structurals++;
}
if (block > 0 && last_block_has_8_structurals != block_has_8_structurals) {
blocks_with_8_structurals_flipped++;
}
last_block_has_8_structurals = block_has_8_structurals;
bool block_has_16_structurals = block_structurals >= 16;
if (block_has_16_structurals) {
blocks_with_16_structurals++;
}
if (block > 0 && last_block_has_16_structurals != block_has_16_structurals) {
blocks_with_16_structurals_flipped++;
}
last_block_has_16_structurals = block_has_16_structurals;
}
}
};
padded_string load_json(const char *filename) {
try {
verbose() << "[verbose] loading " << filename << endl;
padded_string json = simdjson::get_corpus(filename);
verbose() << "[verbose] loaded " << filename << " (" << json.size() << " bytes)" << endl;
return json;
} catch (const exception &) { // caught by reference to base
exit_error(string("Could not load the file ") + filename);
exit(EXIT_FAILURE); // This is not strictly necessary but removes the warning
}
}
struct progress_bar {
int max_value;
int total_ticks;
double ticks_per_value;
int next_tick;
progress_bar(int _max_value, int _total_ticks) : max_value(_max_value), total_ticks(_total_ticks), ticks_per_value(double(_total_ticks)/_max_value), next_tick(0) {
fprintf(stderr, "[");
for (int i=0;i<total_ticks;i++) {
fprintf(stderr, " ");
}
fprintf(stderr, "]");
for (int i=0;i<total_ticks+1;i++) {
fprintf(stderr, "\b");
}
}
void print(int value) {
double ticks = value*ticks_per_value;
if (ticks >= total_ticks) {
ticks = total_ticks-1;
}
int tick;
for (tick=next_tick; tick <= ticks && tick <= total_ticks; tick++) {
fprintf(stderr, "=");
}
next_tick = tick;
}
void erase() {
for (int i=0;i<next_tick+1;i++) {
fprintf(stderr, "\b");
}
for (int tick=0; tick<=total_ticks+2; tick++) {
fprintf(stderr, " ");
}
for (int tick=0; tick<=total_ticks+2; tick++) {
fprintf(stderr, "\b");
}
}
};
struct benchmarker {
// JSON text from loading the file. Owns the memory.
const padded_string json;
// JSON filename
const char *filename;
// Parser that will parse the JSON file
const json_parser& parser;
// Event collector that can be turned on to measure cycles, missed branches, etc.
event_collector& collector;
// Statistics about the JSON file independent of its speed (amount of utf-8, structurals, etc.).
// Loaded on first parse.
json_stats* stats;
// Speed and event summary for full parse (not including allocation)
event_aggregate all_stages;
// Speed and event summary for stage 1
event_aggregate stage1;
// Speed and event summary for stage 2
event_aggregate stage2;
// Speed and event summary for allocation
event_aggregate allocate_stage;
benchmarker(const char *_filename, const json_parser& _parser, event_collector& _collector)
: json(load_json(_filename)), filename(_filename), parser(_parser), collector(_collector), stats(NULL) {}
~benchmarker() {
if (stats) {
delete stats;
}
}
int iterations() const {
return all_stages.iterations;
}
really_inline void run_iteration(bool stage1_only=false) {
// Allocate ParsedJson
collector.start();
ParsedJson pj;
bool allocok = pj.allocate_capacity(json.size());
event_count allocate_count = collector.end();
allocate_stage << allocate_count;
if (!allocok) {
exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result.");
}
verbose() << "[verbose] allocated memory for parsed JSON " << endl;
// Stage 1 (find structurals)
collector.start();
int result = parser.stage1((const uint8_t *)json.data(), json.size(), pj);
event_count stage1_count = collector.end();
stage1 << stage1_count;
if (result != simdjson::SUCCESS) {
exit_error(string("Failed to parse ") + filename + " during stage 1: " + pj.get_error_message());
}
// Stage 2 (unified machine)
event_count stage2_count;
if (!stage1_only || stats == NULL) {
if (!stage1_only) {
collector.start();
}
result = parser.stage2((const uint8_t *)json.data(), json.size(), pj);
if (!stage1_only) {
stage2_count = collector.end();
stage2 << stage2_count;
}
if (result != simdjson::SUCCESS) {
exit_error(string("Failed to parse ") + filename + " during stage 2: " + pj.get_error_message());
}
}
all_stages << (stage1_count + stage2_count);
// Calculate stats the first time we parse
if (stats == NULL) {
stats = new json_stats(json, pj);
}
}
really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
for (size_t i = 0; i<iterations; i++) {
run_iteration(stage1_only);
}
}
double stage1_ns_per_block() {
return stage1.elapsed_ns() / stats->blocks;
}
template<typename T>
void print_aggregate(const char* prefix, const T& stage) const {
printf("%s%-13s: %8.4f ns per block (%5.1f %%) - %8.4f ns per byte - %8.4f ns per structural - %8.3f GB/s\n",
prefix,
"Speed",
stage.elapsed_ns() / stats->blocks, // per block
100.0 * stage.elapsed_sec() / all_stages.elapsed_sec(), // %
stage.elapsed_ns() / stats->bytes, // per byte
stage.elapsed_ns() / stats->structurals, // per structural
(json.size() / 1000000000.0) / stage.elapsed_sec() // GB/s
);
if (collector.has_events()) {
printf("%s%-13s: %2.3f per block (%5.2f %%) - %2.3f per byte - %2.3f per structural - %2.3f GHz est. frequency\n",
prefix,
"Cycles",
stage.cycles() / stats->blocks,
100.0 * stage.cycles() / all_stages.cycles(),
stage.cycles() / stats->bytes,
stage.cycles() / stats->structurals,
(stage.cycles() / stage.elapsed_sec()) / 1000000000.0
);
printf("%s%-13s: %2.2f per block (%5.2f %%) - %2.2f per byte - %2.2f per structural - %2.2f per cycle\n",
prefix,
"Instructions",
stage.instructions() / stats->blocks,
100.0 * stage.instructions() / all_stages.instructions(),
stage.instructions() / stats->bytes,
stage.instructions() / stats->structurals,
stage.instructions() / stage.cycles()
);
// NOTE: removed cycles/miss because it is a somewhat misleading stat
printf("%s%-13s: %2.2f branch misses (%5.2f %%) - %2.2f cache misses (%5.2f %%) - %2.2f cache references\n",
prefix,
"Misses",
stage.branch_misses(),
100.0 * stage.branch_misses() / all_stages.branch_misses(),
stage.cache_misses(),
100.0 * stage.cache_misses() / all_stages.cache_misses(),
stage.cache_references()
);
}
}
void print(bool tabbed_output) const {
if (tabbed_output) {
char* filename_copy = (char*)malloc(strlen(filename)+1);
strcpy(filename_copy, filename);
#if defined(__linux__)
char* base = ::basename(filename_copy);
#else
char* base = filename_copy;
#endif
if (strlen(base) >= 5 && !strcmp(base+strlen(base)-5, ".json")) {
base[strlen(base)-5] = '\0';
}
double gb = json.size() / 1000000000.0;
if (collector.has_events()) {
printf("\"%s\"\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n",
base,
allocate_stage.best.cycles() / json.size(),
stage1.best.cycles() / json.size(),
stage2.best.cycles() / json.size(),
all_stages.best.cycles() / json.size(),
gb / all_stages.best.elapsed_sec(),
gb / stage1.best.elapsed_sec(),
gb / stage2.best.elapsed_sec());
} else {
printf("\"%s\"\t\t\t\t\t%f\t%f\t%f\n",
base,
gb / all_stages.best.elapsed_sec(),
gb / stage1.best.elapsed_sec(),
gb / stage2.best.elapsed_sec());
}
free(filename_copy);
} else {
printf("\n");
printf("%s\n", filename);
printf("%s\n", string(strlen(filename), '=').c_str());
printf("%9zu blocks - %10zu bytes - %5zu structurals (%5.1f %%)\n", stats->bytes / BYTES_PER_BLOCK, stats->bytes, stats->structurals, 100.0 * stats->structurals / stats->bytes);
if (stats) {
printf("special blocks with: utf8 %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n",
stats->blocks_with_utf8, 100.0 * stats->blocks_with_utf8 / stats->blocks,
stats->blocks_with_0_structurals, 100.0 * stats->blocks_with_0_structurals / stats->blocks,
stats->blocks_with_1_structural, 100.0 * stats->blocks_with_1_structural / stats->blocks,
stats->blocks_with_8_structurals, 100.0 * stats->blocks_with_8_structurals / stats->blocks,
stats->blocks_with_16_structurals, 100.0 * stats->blocks_with_16_structurals / stats->blocks);
printf("special block flips: utf8 %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n",
stats->blocks_with_utf8_flipped, 100.0 * stats->blocks_with_utf8_flipped / stats->blocks,
stats->blocks_with_1_structural_flipped, 100.0 * stats->blocks_with_1_structural_flipped / stats->blocks,
stats->blocks_with_0_structurals_flipped, 100.0 * stats->blocks_with_0_structurals_flipped / stats->blocks,
stats->blocks_with_8_structurals_flipped, 100.0 * stats->blocks_with_8_structurals_flipped / stats->blocks,
stats->blocks_with_16_structurals_flipped, 100.0 * stats->blocks_with_16_structurals_flipped / stats->blocks);
}
printf("\n");
printf("All Stages\n");
print_aggregate("| " , all_stages.best);
// printf("|- Allocation\n");
// print_aggregate("| ", allocate_stage.best);
printf("|- Stage 1\n");
print_aggregate("| ", stage1.best);
printf("|- Stage 2\n");
print_aggregate("| ", stage2.best);
}
}
};
#endif

152
benchmark/event_counter.h Normal file
View File

@ -0,0 +1,152 @@
#ifndef __EVENT_COUNTER_H
#define __EVENT_COUNTER_H
#include <cassert>
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#include <unistd.h>
#endif
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <chrono>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
//#define DEBUG
#include "simdjson/common_defs.h"
#include "simdjson/isadetection.h"
using std::string;
using std::vector;
using std::chrono::steady_clock;
using std::chrono::time_point;
using std::chrono::duration;
struct event_count {
duration<double> elapsed;
vector<unsigned long long> event_counts;
event_count() : elapsed(0), event_counts{0,0,0,0,0} {}
event_count(const duration<double> _elapsed, const vector<unsigned long long> _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {}
event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { }
// The types of counters (so we can read the getter more easily)
enum event_counter_types {
CPU_CYCLES,
INSTRUCTIONS,
BRANCH_MISSES,
CACHE_REFERENCES,
CACHE_MISSES
};
double elapsed_sec() const { return duration<double>(elapsed).count(); }
double elapsed_ns() const { return duration<double, std::nano>(elapsed).count(); }
double cycles() const { return event_counts[CPU_CYCLES]; }
double instructions() const { return event_counts[INSTRUCTIONS]; }
double branch_misses() const { return event_counts[BRANCH_MISSES]; }
double cache_references() const { return event_counts[CACHE_REFERENCES]; }
double cache_misses() const { return event_counts[CACHE_MISSES]; }
event_count& operator=(const event_count other) {
this->elapsed = other.elapsed;
this->event_counts = other.event_counts;
return *this;
}
event_count operator+(const event_count other) const {
return event_count(elapsed+other.elapsed, {
event_counts[0]+other.event_counts[0],
event_counts[1]+other.event_counts[1],
event_counts[2]+other.event_counts[2],
event_counts[3]+other.event_counts[3],
event_counts[4]+other.event_counts[4],
});
}
void operator+=(const event_count other) {
*this = *this + other;
}
};
struct event_aggregate {
int iterations = 0;
event_count total;
event_count best;
event_count worst;
event_aggregate() {}
void operator<<(const event_count other) {
if (iterations == 0 || other.elapsed < best.elapsed) {
best = other;
}
if (iterations == 0 || other.elapsed > worst.elapsed) {
worst = other;
}
iterations++;
total += other;
}
double elapsed_sec() const { return total.elapsed_sec() / iterations; }
double elapsed_ns() const { return total.elapsed_ns() / iterations; }
double cycles() const { return total.cycles() / iterations; }
double instructions() const { return total.instructions() / iterations; }
double branch_misses() const { return total.branch_misses() / iterations; }
double cache_references() const { return total.cache_references() / iterations; }
double cache_misses() const { return total.cache_misses() / iterations; }
};
struct event_collector {
event_count count;
time_point<steady_clock> start_clock;
#if defined(__linux__)
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
event_collector() : linux_events(vector<int>{
PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_MISSES,
PERF_COUNT_HW_CACHE_REFERENCES,
PERF_COUNT_HW_CACHE_MISSES
}) {}
bool has_events() {
return linux_events.is_working();
}
#else
bool has_events() {
return false;
}
#endif
really_inline void start() {
#if defined(__linux)
linux_events.start();
#endif
start_clock = steady_clock::now();
}
really_inline event_count& end() {
time_point<steady_clock> end_clock = steady_clock::now();
#if defined(__linux)
linux_events.end(count.event_counts);
#endif
count.elapsed = end_clock - start_clock;
return count;
}
};
#endif

114
benchmark/genfeaturejson.rb Normal file
View File

@ -0,0 +1,114 @@
class ChunkWriter
def initialize(output_dir, miss_templates, file_size=640*1000, block_size=64)
@@output_dir = output_dir
@@miss_templates = miss_templates
@@file_size = file_size
@@block_size = block_size
end
def prepare_chunk(chunks, include_newline)
Array(chunks).map do |chunk|
"#{chunk}#{' '*(@@block_size-chunk.bytesize-1)}#{include_newline ? "\n" : " "}"
end.join("")
end
def write_files(filename, start1, repeat1, end1, repeat2: '', include_newline: true)
start1 = prepare_chunk(start1, include_newline)
repeat1 = prepare_chunk(repeat1, include_newline)
end1 = prepare_chunk(end1, include_newline)
write_full(File.join(@@output_dir, "#{filename}-full.json"), start1, repeat1, end1)
repeat2 = prepare_chunk(repeat2, include_newline)
repeat2 = repeat2 * (repeat1.bytesize/repeat2.bytesize)
write_half(File.join(@@output_dir, "#{filename}.json"), start1, repeat1, end1, repeat2)
write_half_miss(File.join(@@output_dir, "#{filename}-miss.json"), start1, repeat1, end1, repeat2)
end
def write_full(filename, start1, repeat1, end1)
puts "Writing #{filename} ..."
File.open(filename, "w") do |file|
write_chunks(file, start1, repeat1, end1, @@file_size)
end
raise "OMG wrong file size #{File.size(filename)} (should be #{@@file_size})" if File.size(filename) != @@file_size
end
def write_half(filename, start1, repeat1, end1, repeat2)
# repeat1 is already represented in start1 and end1, so it doesn't need quite
# half the iterations.
repeat1_len = (@@file_size/2) - start1.bytesize - end1.bytesize
halfway_point = start1.bytesize + repeat1_len + repeat2.bytesize
puts "Writing #{filename} ..."
File.open(filename, "w") do |file|
write_chunks(file, start1, repeat1, repeat2, halfway_point)
write_chunks(file, repeat2, repeat2, end1, @@file_size-halfway_point)
end
raise "OMG wrong file size #{File.size(filename)} (should be #{@@file_size})" if File.size(filename) != @@file_size
end
def write_half_miss(filename, start1, repeat1, end1, repeat2)
miss_template = Array(File.read(File.join(@@miss_templates, "#{repeat1.bytesize}.txt")).chomp.split("\n"))
# Take the start and end out of the template
repeat_template = miss_template[(start1.bytesize/64)..(-end1.bytesize/64-1)]
# If repeat is 128 bytes, each *pair* of elements is set. Use that.
repeat_chunks = repeat1.bytesize/64
repeat_template = (repeat_chunks - 1).step(repeat_template.size - 1, repeat_chunks).map { |i| repeat_template[i] }
puts "Writing #{filename} ..."
File.open(filename, "w") do |file|
file.write(start1)
repeat_template.each do |should_repeat|
file.write(should_repeat == "1" ? repeat1 : repeat2)
end
file.write(end1)
end
raise "OMG wrong file size #{File.size(filename)} (should be #{@@file_size})" if File.size(filename) != @@file_size
end
def write_chunks(file, start1, repeat1, end1, size)
pos = 0
file.write(start1)
pos += start1.bytesize
repeat_end = size-end1.bytesize
loop do
file.write(repeat1)
pos += repeat1.bytesize
break if pos >= repeat_end
end
file.write(end1)
pos += end1.bytesize
return pos
end
end
output_dir = File.expand_path("../jsonexamples/generated", File.dirname(__FILE__))
miss_templates = File.expand_path("miss-templates", File.dirname(__FILE__))
Dir.mkdir(output_dir) unless File.directory?(output_dir)
w = ChunkWriter.new(output_dir, miss_templates)
w.write_files "utf-8", '["֏","֏",{}', ',"֏","֏",{}', ',"֏","֏","֏"]', repeat2: ',"ab","ab",{}'
w.write_files "0-structurals", '"ab"', '', ''
# w.write_files "1-structurals", [ '[', '"ab"' ], [ ',', '"ab"' ], [ ',', '{', '}', ']' ]
# w.write_files "2-structurals", '["ab"', ',"ab"', [',{', '}]']
# w.write_files "3-structurals", '[{}', ',{}', ',"ab"]'
# w.write_files "4-structurals", '["ab","ab"', ',"ab","ab"', ',{}]'
# w.write_files "5-structurals", '["ab",{}', ',"ab",{}', ',"ab","ab"]'
# w.write_files "6-structurals", '["ab","ab","ab"', ',"ab","ab","ab"', ',"ab",{}]'
w.write_files "7-structurals", '["ab","ab",{}', ',"ab","ab",{}', ',"ab","ab","ab"]'
# w.write_files "8-structurals", '["ab","ab","ab","ab"', ',"ab","ab","ab","ab"', ',"ab","ab",{}]'
# w.write_files "9-structurals", '["ab","ab","ab",{}', ',"ab","ab","ab",{}', ',"ab","ab","ab","ab"]'
# w.write_files "10-structurals", '["ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab"', ',"ab","ab","ab",{}]'
# w.write_files "11-structurals", '["ab","ab","ab","ab",{}', ',"ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab"]'
# w.write_files "12-structurals", '["ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab",{}]'
# w.write_files "13-structurals", '["ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab"]'
# w.write_files "14-structurals", '["ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab",{}]'
w.write_files "15-structurals", '["ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab"]'
# w.write_files "16-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab",{}]'
# w.write_files "17-structurals", '["ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab"]'
# w.write_files "18-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab",{}]'
# w.write_files "19-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab"]'
# w.write_files "20-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab",{}]'
# w.write_files "21-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"]'
# w.write_files "22-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab",{}]'
w.write_files "23-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"]'

View File

@ -0,0 +1,49 @@
def gen_seeds(start_blocks, repeat_blocks, end_blocks)
total_size = 640*1000
total_blocks = total_size/64
seed_space = 1..1000000
target_blocks = total_blocks*0.5
target_flips = total_blocks*0.25
percent_flips = 0.25*repeat_blocks
puts "Seeds for #{start_blocks} start blocks, #{end_blocks} end blocks and #{repeat_blocks} repeat blocks: #{percent_flips*100}% flips"
closest_flips = nil
closest_seeds = []
seed_space.each do |seed|
r = Random.new(seed)
# First block is always type 1
flips = 0
type1 = true
type1_blocks = start_blocks
finished_blocks = start_blocks
last_repeat = total_blocks-end_blocks
while finished_blocks < last_repeat
if r.rand < percent_flips
flips += 1
type1 = !type1
end
type1_blocks += repeat_blocks if type1
finished_blocks += repeat_blocks
end
# Last one is always type 1
flips += 1 if !type1
type1 = true
type1_blocks += end_blocks
finished_blocks += end_blocks
raise "simulated the wrong number of blocks #{finished_blocks}" if finished_blocks != total_blocks
if type1_blocks == target_blocks
if flips == target_flips
puts seed
closest_seeds << seed
end
end
end
puts closest_seeds
end
gen_seeds(1,1,1)
gen_seeds(1,1,2)
gen_seeds(2,2,4)

113
benchmark/json_parser.h Normal file
View File

@ -0,0 +1,113 @@
#ifndef __JSON_PARSER_H
#define __JSON_PARSER_H
#include <cassert>
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#include <unistd.h>
#endif
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <chrono>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
//#define DEBUG
#include "simdjson/common_defs.h"
#include "simdjson/isadetection.h"
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
#include "simdjson/parsedjson.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage2_build_tape.h"
using namespace simdjson;
using std::string;
using stage2_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
stage1_functype* get_stage1_func(const Architecture architecture) {
switch (architecture) {
#ifdef IS_X86_64
case Architecture::HASWELL:
return &find_structural_bits<Architecture::HASWELL>;
case Architecture::WESTMERE:
return &find_structural_bits<Architecture::WESTMERE>;
#endif
#ifdef IS_ARM64
case Architecture::ARM64:
return &find_structural_bits<Architecture::ARM64>;
#endif
default:
std::cerr << "The processor is not supported by simdjson." << std::endl;
exit(EXIT_FAILURE);
}
}
stage2_functype* get_stage2_func(const Architecture architecture) {
switch (architecture) {
#ifdef IS_X86_64
case Architecture::HASWELL:
return &unified_machine<Architecture::HASWELL>;
break;
case Architecture::WESTMERE:
return &unified_machine<Architecture::WESTMERE>;
break;
#endif
#ifdef IS_ARM64
case Architecture::ARM64:
return &unified_machine<Architecture::ARM64>;
break;
#endif
default:
std::cerr << "The processor is not supported by simdjson." << std::endl;
exit(EXIT_FAILURE);
}
}
struct json_parser {
const Architecture architecture;
const stage1_functype *stage1_func;
const stage2_functype *stage2_func;
json_parser(const Architecture _architecture) : architecture(_architecture) {
this->stage1_func = get_stage1_func(architecture);
this->stage2_func = get_stage2_func(architecture);
}
json_parser() : json_parser(find_best_supported_architecture()) {}
int stage1(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
return this->stage1_func(buf, len, pj);
}
int stage2(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
return this->stage2_func(buf, len, pj);
}
int parse(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
int result = this->stage1(buf, len, pj);
if (result == SUCCESS) {
result = this->stage2(buf, len, pj);
}
return result;
}
};
#endif

View File

@ -83,6 +83,10 @@ public:
} }
} }
bool is_working() {
return working;
}
private: private:
void report_error(const std::string &context) { void report_error(const std::string &context) {
if (working) if (working)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,6 @@
#include "json_parser.h"
#include "event_counter.h"
#include <cassert> #include <cassert>
#include <cctype> #include <cctype>
#ifndef _MSC_VER #ifndef _MSC_VER
@ -35,405 +38,179 @@
#include "simdjson/stage1_find_marks.h" #include "simdjson/stage1_find_marks.h"
#include "simdjson/stage2_build_tape.h" #include "simdjson/stage2_build_tape.h"
// Global arguments #include <functional>
bool find_marks_only = false;
bool verbose = false;
bool dump = false;
bool json_output = false;
bool force_one_iteration = false;
bool just_data = false;
bool force_sse = false;
int32_t iterations = -1;
int32_t warmup_iterations = -1;
namespace simdjson { #include "benchmarker.h"
Architecture _find_best_supported_implementation() {
constexpr uint32_t haswell_flags =
instruction_set::AVX2 | instruction_set::PCLMULQDQ |
instruction_set::BMI1 | instruction_set::BMI2;
constexpr uint32_t westmere_flags =
instruction_set::SSE42 | instruction_set::PCLMULQDQ;
uint32_t supports = detect_supported_architectures();
// Order from best to worst (within architecture)
if ((haswell_flags & supports) == haswell_flags && !force_sse) {
return Architecture::HASWELL;
}
if ((westmere_flags & supports) == westmere_flags) {
return Architecture::WESTMERE;
}
if (instruction_set::NEON)
return Architecture::ARM64;
return Architecture::NONE; using namespace simdjson;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
using std::to_string;
using std::vector;
using std::ostream;
using std::ofstream;
using std::exception;
// Stash the exe_name in main() for functions to use
char* exe_name;
void print_usage(ostream& out) {
out << "Usage: " << exe_name << " [-vt] [-n #] [-s STAGE] [-a ARCH] <jsonfile> ..." << endl;
out << endl;
out << "Runs the parser against the given json files in a loop, measuring speed and other statistics." << endl;
out << endl;
out << "Options:" << endl;
out << endl;
out << "-n # - Number of iterations per file. Default: 200" << endl;
out << "-i # - Number of times to iterate a single file before moving to the next. Default: 20" << endl;
out << "-t - Tabbed data output" << endl;
out << "-v - Verbose output." << endl;
out << "-s STAGE - Stop after the given stage." << endl;
out << " -s stage1 - Stop after find_structural_bits." << endl;
out << " -s all - Run all stages." << endl;
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
out << " or ARM64). By default, detects best supported architecture." << endl;
} }
using unified_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj); void exit_usage(string message) {
using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj); cerr << message << endl;
cerr << endl;
extern unified_functype *unified_ptr; print_usage(cerr);
exit(EXIT_FAILURE);
extern stage1_functype *stage1_ptr;
int unified_machine_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
if (find_marks_only) {
return simdjson::SUCCESS;
}
Architecture best_implementation = _find_best_supported_implementation();
// Selecting the best implementation
switch (best_implementation) {
#ifdef IS_X86_64
case Architecture::HASWELL:
unified_ptr = &unified_machine<Architecture::HASWELL>;
break;
case Architecture::WESTMERE:
unified_ptr = &unified_machine<Architecture::WESTMERE>;
break;
#endif
#ifdef IS_ARM64
case Architecture::ARM64:
unified_ptr = &unified_machine<Architecture::ARM64>;
break;
#endif
default:
std::cerr << "The processor is not supported by simdjson." << std::endl;
return simdjson::UNEXPECTED_ERROR;
}
return unified_ptr(buf, len, pj);
} }
// Responsible to select the best json_parse implementation struct option_struct {
int find_structural_bits_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) { vector<char*> files;
Architecture best_implementation = _find_best_supported_implementation(); Architecture architecture = Architecture::UNSUPPORTED;
// Selecting the best implementation bool stage1_only = false;
switch (best_implementation) {
#ifdef IS_X86_64 int32_t iterations = 200;
case Architecture::HASWELL: int32_t iteration_step = 50;
stage1_ptr = &find_structural_bits<Architecture::HASWELL>;
break; bool verbose = false;
case Architecture::WESTMERE: bool tabbed_output = false;
stage1_ptr = &find_structural_bits<Architecture::WESTMERE>;
break; option_struct(int argc, char **argv) {
#endif #ifndef _MSC_VER
#ifdef IS_ARM64 int c;
case Architecture::ARM64:
stage1_ptr = &find_structural_bits<Architecture::ARM64>; while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) {
break; switch (c) {
#endif case 'n':
default: iterations = atoi(optarg);
std::cerr << "The processor is not supported by simdjson." << std::endl; break;
return simdjson::UNEXPECTED_ERROR; case 'i':
iteration_step = atoi(optarg);
break;
case 't':
tabbed_output = true;
break;
case 'v':
verbose = true;
break;
case 'a':
architecture = parse_architecture(optarg);
if (architecture == Architecture::UNSUPPORTED) {
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
}
break;
case 's':
if (!strcmp(optarg, "stage1")) {
stage1_only = true;
} else if (!strcmp(optarg, "all")) {
stage1_only = false;
} else {
exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
}
break;
default:
exit_error("Unexpected argument " + c);
}
}
#else
int optind = 1;
#endif
// If architecture is not specified, pick the best supported architecture by default
if (architecture == Architecture::UNSUPPORTED) {
architecture = find_best_supported_architecture();
}
// All remaining arguments are considered to be files
for (int i=optind; i<argc; i++) {
files.push_back(argv[i]);
}
if (files.empty()) {
exit_usage("No files specified");
}
// Keeps the numbers the same for CI (old ./parse didn't have a two-stage loop)
if (files.size() == 1) {
iteration_step = iterations;
}
#if !defined(__linux__)
if (tabbed_output) {
exit_error("tabbed_output (-t) flag only works under linux.\n");
}
#endif
} }
};
return stage1_ptr(buf, len, pj);
}
stage1_functype *stage1_ptr = &find_structural_bits_dispatch;
unified_functype *unified_ptr = &unified_machine_dispatch;
} // namespace simdjson
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
// Read options
#ifndef _MSC_VER exe_name = argv[0];
int c; option_struct options(argc, argv);
if (options.verbose) {
while ((c = getopt(argc, argv, "1vdtn:w:fs")) != -1) { verbose_stream = &cout;
switch (c) {
case 'n':
iterations = atoi(optarg);
break;
case 'w':
warmup_iterations = atoi(optarg);
break;
case 's':
force_sse = true;
break;
case 't':
just_data = true;
break;
case 'v':
verbose = true;
break;
case 'd':
dump = true;
break;
case 'j':
json_output = true;
break;
case '1':
force_one_iteration = true;
break;
case 'f':
find_marks_only = true;
break;
default:
abort();
}
}
#else
int optind = 1;
#endif
if (optind >= argc) {
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
exit(1);
}
const char *filename = argv[optind];
if (optind + 1 < argc) {
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
<< std::endl;
}
if (verbose) {
std::cout << "[verbose] loading " << filename << std::endl;
}
simdjson::padded_string p;
try {
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
if (verbose) {
std::cout << "[verbose] loaded " << filename << " (" << p.size()
<< " bytes)" << std::endl;
}
if (iterations == -1) {
#if defined(DEBUG)
iterations = 1;
#else
iterations = force_one_iteration ? 1 : (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
#endif
}
if (warmup_iterations == -1) {
#if defined(DEBUG)
warmup_iterations = 0;
#else
warmup_iterations = (p.size() < 1 * 1000 * 1000) ? 10 : 1;
#endif
} }
std::vector<double> res; // Start collecting events. We put this early so if it prints an error message, it's the
res.resize(iterations); // first thing printed.
if (!just_data) event_collector collector;
printf("number of iterations %u \n", iterations);
#if !defined(__linux__)
#define SQUASH_COUNTERS
if (just_data) {
printf("just_data (-t) flag only works under linux.\n");
}
#endif
{ // practice run
simdjson::ParsedJson pj;
bool allocok = pj.allocate_capacity(p.size());
if (allocok) {
simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj);
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj);
}
}
#ifndef SQUASH_COUNTERS
std::vector<int> evts;
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
evts.push_back(PERF_COUNT_HW_BRANCH_MISSES);
evts.push_back(PERF_COUNT_HW_CACHE_REFERENCES);
evts.push_back(PERF_COUNT_HW_CACHE_MISSES);
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
std::vector<unsigned long long> results;
results.resize(evts.size());
unsigned long cy0 = 0, cy1 = 0, cy2 = 0;
unsigned long cl0 = 0, cl1 = 0, cl2 = 0;
unsigned long mis0 = 0, mis1 = 0, mis2 = 0;
unsigned long cref0 = 0, cref1 = 0, cref2 = 0;
unsigned long cmis0 = 0, cmis1 = 0, cmis2 = 0;
#endif
// Do warmup iterations // Print preamble
bool isok = true; if (!options.tabbed_output) {
for (int32_t i = 0; i < warmup_iterations; i++) { printf("number of iterations %u \n", options.iterations);
if (verbose) {
std::cout << "[verbose] warmup iteration # " << i << std::endl;
}
simdjson::ParsedJson pj;
bool allocok = pj.allocate_capacity(p.size());
if (!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
return EXIT_FAILURE;
}
isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
simdjson::SUCCESS);
isok = isok &&
(simdjson::SUCCESS ==
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
if (!isok) {
std::cerr << pj.get_error_message() << std::endl;
std::cerr << "Could not parse. " << std::endl;
return EXIT_FAILURE;
}
} }
#ifndef SQUASH_COUNTERS // Set up benchmarkers by reading all files
for (int32_t i = 0; i < iterations; i++) { json_parser parser(options.architecture);
if (verbose) { vector<benchmarker*> benchmarkers;
std::cout << "[verbose] iteration # " << i << std::endl; for (size_t i=0; i<options.files.size(); i++) {
} benchmarkers.push_back(new benchmarker(options.files[i], parser, collector));
unified.start();
simdjson::ParsedJson pj;
bool allocok = pj.allocate_capacity(p.size());
if (!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
return EXIT_FAILURE;
}
unified.end(results);
cy0 += results[0];
cl0 += results[1];
mis0 += results[2];
cref0 += results[3];
cmis0 += results[4];
if (verbose) {
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
}
unified.start();
isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
simdjson::SUCCESS);
unified.end(results);
cy1 += results[0];
cl1 += results[1];
mis1 += results[2];
cref1 += results[3];
cmis1 += results[4];
if (!isok) {
std::cout << "Failed during stage 1" << std::endl;
break;
}
unified.start();
isok = isok &&
(simdjson::SUCCESS ==
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
unified.end(results);
cy2 += results[0];
cl2 += results[1];
mis2 += results[2];
cref2 += results[3];
cmis2 += results[4];
if (!isok) {
std::cout << "Failed during stage 2" << std::endl;
break;
}
} }
#endif
// we do it again, this time just measuring the elapsed time // Run the benchmarks
for (int32_t i = 0; i < iterations; i++) { progress_bar progress(options.iterations, 50);
if (verbose) { // Put the if (options.stage1_only) *outside* the loop so that run_iterations will be optimized
std::cout << "[verbose] iteration # " << i << std::endl; if (options.stage1_only) {
} for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
simdjson::ParsedJson pj; if (!options.verbose) { progress.print(iteration); }
bool allocok = pj.allocate_capacity(p.size()); // Benchmark each file once per iteration
if (!allocok) { for (size_t f=0; f<options.files.size(); f++) {
std::cerr << "failed to allocate memory" << std::endl; verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
return EXIT_FAILURE; benchmarkers[f]->run_iterations(options.iteration_step, true);
}
if (verbose) {
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
}
auto start = std::chrono::steady_clock::now();
isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
simdjson::SUCCESS);
isok = isok &&
(simdjson::SUCCESS ==
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;
res[i] = secs.count();
if (!isok) {
std::cerr << pj.get_error_message() << std::endl;
std::cerr << "Could not parse. " << std::endl;
return EXIT_FAILURE;
}
}
simdjson::ParsedJson pj =
build_parsed_json(p); // do the parsing again to get the stats
if (!pj.is_valid()) {
std::cerr << pj.get_error_message() << std::endl;
std::cerr << "Could not parse. " << std::endl;
return EXIT_FAILURE;
}
double min_result = *min_element(res.begin(), res.end());
double speedinGBs = (p.size()) / (min_result * 1000000000.0);
#ifndef SQUASH_COUNTERS
unsigned long total = cy0 + cy1 + cy2;
if (just_data) {
float cpb0 = (double)cy0 / (iterations * p.size());
float cpb1 = (double)cy1 / (iterations * p.size());
float cpb2 = (double)cy2 / (iterations * p.size());
float cpbtotal = (double)total / (iterations * p.size());
char *newfile = (char *)malloc(strlen(filename) + 1);
if (newfile == NULL) {
return EXIT_FAILURE;
}
::strcpy(newfile, filename);
char *snewfile = ::basename(newfile);
size_t nl = strlen(snewfile);
for (size_t j = nl - 1; j > 0; j--) {
if (snewfile[j] == '.') {
snewfile[j] = '\0';
break;
} }
} }
printf("\"%s\"\t%f\t%f\t%f\t%f\t%f\n", snewfile, cpb0, cpb1, cpb2, cpbtotal,
speedinGBs);
free(newfile);
} else { } else {
printf("number of bytes %ld number of structural chars %u ratio %.3f\n", for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
p.size(), pj.n_structural_indexes, if (!options.verbose) { progress.print(iteration); }
(double)pj.n_structural_indexes / p.size()); // Benchmark each file once per iteration
printf("mem alloc instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: " for (size_t f=0; f<options.files.size(); f++) {
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache accesses: " verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
"%10lu (failure %10lu)\n", benchmarkers[f]->run_iterations(options.iteration_step, false);
cl0 / iterations, cy0 / iterations, 100. * cy0 / total, }
(double)cl0 / cy0, mis0 / iterations, (double)cy0 / mis0, }
cref1 / iterations, cmis0 / iterations); }
printf(" mem alloc runs at %.2f cycles per input byte.\n", if (!options.verbose) { progress.erase(); }
(double)cy0 / (iterations * p.size()));
printf("stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache accesses: "
"%10lu (failure %10lu)\n",
cl1 / iterations, cy1 / iterations, 100. * cy1 / total,
(double)cl1 / cy1, mis1 / iterations, (double)cy1 / mis1,
cref1 / iterations, cmis1 / iterations);
printf(" stage 1 runs at %.2f cycles per input byte.\n",
(double)cy1 / (iterations * p.size()));
printf("stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: " for (size_t i=0; i<options.files.size(); i++) {
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache " benchmarkers[i]->print(options.tabbed_output);
"accesses: %10lu (failure %10lu)\n", delete benchmarkers[i];
cl2 / iterations, cy2 / iterations, 100. * cy2 / total, }
(double)cl2 / cy2, mis2 / iterations, (double)cy2 / mis2,
cref2 / iterations, cmis2 / iterations);
printf(" stage 2 runs at %.2f cycles per input byte and ",
(double)cy2 / (iterations * p.size()));
printf("%.2f cycles per structural character.\n",
(double)cy2 / (iterations * pj.n_structural_indexes));
printf(" all stages: %.2f cycles per input byte.\n",
(double)total / (iterations * p.size()));
printf("Estimated average frequency: %.3f GHz.\n",
(double)total / (iterations * min_result * 1000000000.0));
}
#endif
if (!just_data) {
std::cout << "Min: " << min_result << " bytes read: " << p.size()
<< " Gigabytes/second: " << speedinGBs << std::endl;
}
if (json_output) {
isok = isok && pj.print_json(std::cout);
}
if (dump) {
isok = isok && pj.dump_raw_tape(std::cout);
}
if (!isok) {
fprintf(stderr, " Parsing failed. \n ");
return EXIT_FAILURE;
}
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -6,10 +6,10 @@
namespace simdjson { namespace simdjson {
// Represents the minimal architecture that would support an implementation // Represents the minimal architecture that would support an implementation
enum class Architecture { enum class Architecture {
UNSUPPORTED,
WESTMERE, WESTMERE,
HASWELL, HASWELL,
ARM64, ARM64,
NONE,
// TODO remove 'native' in favor of runtime dispatch? // TODO remove 'native' in favor of runtime dispatch?
// the 'native' enum class value should point at a good default on the current // the 'native' enum class value should point at a good default on the current
// machine // machine
@ -20,6 +20,9 @@ enum class Architecture {
#endif #endif
}; };
Architecture find_best_supported_architecture();
Architecture parse_architecture(char *architecture);
enum ErrorValues { enum ErrorValues {
SUCCESS = 0, SUCCESS = 0,
SUCCESS_AND_HAS_MORE, //No errors and buffer still has more data SUCCESS_AND_HAS_MORE, //No errors and buffer still has more data

View File

@ -29,7 +29,7 @@ int json_parse(const char *buf, size_t len, ParsedJson &pj,
realloc); realloc);
} }
Architecture find_best_supported_implementation() { Architecture find_best_supported_architecture() {
constexpr uint32_t haswell_flags = constexpr uint32_t haswell_flags =
instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::AVX2 | instruction_set::PCLMULQDQ |
instruction_set::BMI1 | instruction_set::BMI2; instruction_set::BMI1 | instruction_set::BMI2;
@ -45,13 +45,20 @@ Architecture find_best_supported_implementation() {
if (supports & instruction_set::NEON) if (supports & instruction_set::NEON)
return Architecture::ARM64; return Architecture::ARM64;
return Architecture::NONE; return Architecture::UNSUPPORTED;
}
Architecture parse_architecture(char *architecture) {
if (!strcmp(architecture, "HASWELL")) { return Architecture::HASWELL; }
if (!strcmp(architecture, "WESTMERE")) { return Architecture::WESTMERE; }
if (!strcmp(architecture, "ARM64")) { return Architecture::ARM64; }
return Architecture::UNSUPPORTED;
} }
// Responsible to select the best json_parse implementation // Responsible to select the best json_parse implementation
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj,
bool realloc) { bool realloc) {
Architecture best_implementation = find_best_supported_implementation(); Architecture best_implementation = find_best_supported_architecture();
// Selecting the best implementation // Selecting the best implementation
switch (best_implementation) { switch (best_implementation) {
#ifdef IS_X86_64 #ifdef IS_X86_64