Measure impact of utf-8 blocks and structurals per block directly
This commit is contained in:
parent
102262c7ab
commit
e2f349e7bd
|
@ -53,6 +53,7 @@ objs
|
|||
# Build outputs (TODO build to a subdir so we can exclude that instead)
|
||||
/allparserscheckfile
|
||||
/basictests
|
||||
/benchfeatures
|
||||
/benchmark/parse
|
||||
/benchmark/perfdiff
|
||||
/benchmark/statisticalmodel
|
||||
|
@ -86,6 +87,9 @@ objs
|
|||
/tools/jsonstats
|
||||
/tools/minify
|
||||
|
||||
# Don't check in generated examples
|
||||
/jsonexamples/generated
|
||||
|
||||
# C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
|
||||
|
||||
# Prerequisites
|
||||
|
|
11
Makefile
11
Makefile
|
@ -126,6 +126,12 @@ run_issue150_sh: allparserscheckfile
|
|||
run_testjson2json_sh: minify json2json
|
||||
./scripts/testjson2json.sh
|
||||
|
||||
generate_featurejson:
|
||||
ruby ./benchmark/genfeaturejson.rb
|
||||
|
||||
run_benchfeatures: benchfeatures generate_featurejson
|
||||
./benchfeatures -n 1000
|
||||
|
||||
test: run_basictests run_jsoncheck run_numberparsingcheck run_integer_tests run_stringparsingcheck run_jsonstream_test run_pointercheck run_testjson2json_sh run_issue150_sh run_jsoncheck_noavx
|
||||
@echo "It looks like the code is good!"
|
||||
|
||||
|
@ -145,9 +151,12 @@ submodules:
|
|||
|
||||
$(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJSON_INCLUDE) $(GASON_INCLUDE) $(UJSON4C_INCLUDE) $(CJSON_INCLUDE) $(JSMN_INCLUDE) : submodules
|
||||
|
||||
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
parse: benchmark/parse.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
||||
|
||||
benchfeatures: benchmark/benchfeatures.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o benchfeatures $(LIBFILES) benchmark/benchfeatures.cpp $(LIBFLAGS)
|
||||
|
||||
perfdiff: benchmark/perfdiff.cpp
|
||||
$(CXX) $(CXXFLAGS) -o perfdiff benchmark/perfdiff.cpp $(LIBFLAGS)
|
||||
|
||||
|
|
|
@ -0,0 +1,326 @@
|
|||
#include "json_parser.h"
|
||||
#include "event_counter.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "linux-perf-events.h"
|
||||
#ifdef __linux__
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
//#define DEBUG
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/isadetection.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "benchmarker.h"
|
||||
|
||||
using namespace simdjson;
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using std::ostream;
|
||||
using std::ofstream;
|
||||
using std::exception;
|
||||
|
||||
// Stash the exe_name in main() for functions to use
|
||||
char* exe_name;
|
||||
|
||||
void print_usage(ostream& out) {
|
||||
out << "Usage: " << exe_name << " [-v] [-n #] [-s STAGE] [-a ARCH]" << endl;
|
||||
out << endl;
|
||||
out << "Runs the parser against jsonexamples/generated json files in a loop, measuring speed and other statistics." << endl;
|
||||
out << endl;
|
||||
out << "Options:" << endl;
|
||||
out << endl;
|
||||
out << "-n # - Number of iterations per file. Default: 400" << endl;
|
||||
out << "-i # - Number of times to iterate a single file before moving to the next. Default: 20" << endl;
|
||||
out << "-v - Verbose output." << endl;
|
||||
out << "-s STAGE - Stop after the given stage." << endl;
|
||||
out << " -s stage1 - Stop after find_structural_bits." << endl;
|
||||
out << " -s all - Run all stages." << endl;
|
||||
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
|
||||
out << " or ARM64). By default, detects best supported architecture." << endl;
|
||||
}
|
||||
|
||||
void exit_usage(string message) {
|
||||
cerr << message << endl;
|
||||
cerr << endl;
|
||||
print_usage(cerr);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
struct option_struct {
|
||||
Architecture architecture = Architecture::UNSUPPORTED;
|
||||
bool stage1_only = false;
|
||||
|
||||
int32_t iterations = 400;
|
||||
int32_t iteration_step = 50;
|
||||
|
||||
bool verbose = false;
|
||||
|
||||
option_struct(int argc, char **argv) {
|
||||
#ifndef _MSC_VER
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
iterations = atoi(optarg);
|
||||
break;
|
||||
case 'i':
|
||||
iteration_step = atoi(optarg);
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'a':
|
||||
architecture = parse_architecture(optarg);
|
||||
if (architecture == Architecture::UNSUPPORTED) {
|
||||
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (!strcmp(optarg, "stage1")) {
|
||||
stage1_only = true;
|
||||
} else if (!strcmp(optarg, "all")) {
|
||||
stage1_only = false;
|
||||
} else {
|
||||
exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
exit_error("Unexpected argument " + c);
|
||||
}
|
||||
}
|
||||
#else
|
||||
int optind = 1;
|
||||
#endif
|
||||
|
||||
// If architecture is not specified, pick the best supported architecture by default
|
||||
if (architecture == Architecture::UNSUPPORTED) {
|
||||
architecture = find_best_supported_architecture();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
double actual(const benchmarker& feature) {
|
||||
return feature.stage1.best.elapsed_ns() / feature.stats->blocks;
|
||||
}
|
||||
double diff(const benchmarker& feature, const benchmarker& struct7) {
|
||||
if (feature.stats->blocks == struct7.stats->blocks) {
|
||||
return (feature.stage1.best.elapsed_ns() - struct7.stage1.best.elapsed_ns()) / struct7.stats->blocks;
|
||||
} else {
|
||||
return (feature.stage1.best.elapsed_ns() / feature.stats->blocks) - (struct7.stage1.best.elapsed_ns() / struct7.stats->blocks);
|
||||
}
|
||||
}
|
||||
double diff_miss(const benchmarker& feature, const benchmarker& struct7) {
|
||||
// There are roughly 2650 branch mispredicts, so we have to scale it so it represents a per block amount
|
||||
return diff(feature, struct7) * 10000.0 / 2650.0;
|
||||
}
|
||||
|
||||
struct feature_benchmarker {
|
||||
benchmarker utf8;
|
||||
benchmarker utf8_miss;
|
||||
benchmarker empty;
|
||||
benchmarker empty_miss;
|
||||
benchmarker struct7;
|
||||
benchmarker struct7_miss;
|
||||
benchmarker struct7_full;
|
||||
benchmarker struct15;
|
||||
benchmarker struct15_miss;
|
||||
benchmarker struct23;
|
||||
benchmarker struct23_miss;
|
||||
|
||||
feature_benchmarker(json_parser& parser, event_collector& collector) :
|
||||
utf8 ("jsonexamples/generated/utf-8.json", parser, collector),
|
||||
utf8_miss ("jsonexamples/generated/utf-8-miss.json", parser, collector),
|
||||
empty ("jsonexamples/generated/0-structurals.json", parser, collector),
|
||||
empty_miss ("jsonexamples/generated/0-structurals-miss.json", parser, collector),
|
||||
struct7 ("jsonexamples/generated/7-structurals.json", parser, collector),
|
||||
struct7_miss ("jsonexamples/generated/7-structurals-miss.json", parser, collector),
|
||||
struct7_full ("jsonexamples/generated/7-structurals-full.json", parser, collector),
|
||||
struct15 ("jsonexamples/generated/15-structurals.json", parser, collector),
|
||||
struct15_miss("jsonexamples/generated/15-structurals-miss.json", parser, collector),
|
||||
struct23 ("jsonexamples/generated/23-structurals.json", parser, collector),
|
||||
struct23_miss("jsonexamples/generated/23-structurals-miss.json", parser, collector)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
|
||||
struct7.run_iterations(iterations, stage1_only);
|
||||
struct7_miss.run_iterations(iterations, stage1_only);
|
||||
struct7_full.run_iterations(iterations, stage1_only);
|
||||
utf8.run_iterations(iterations, stage1_only);
|
||||
utf8_miss.run_iterations(iterations, stage1_only);
|
||||
empty.run_iterations(iterations, stage1_only);
|
||||
empty_miss.run_iterations(iterations, stage1_only);
|
||||
struct15.run_iterations(iterations, stage1_only);
|
||||
struct15_miss.run_iterations(iterations, stage1_only);
|
||||
struct23.run_iterations(iterations, stage1_only);
|
||||
struct23_miss.run_iterations(iterations, stage1_only);
|
||||
}
|
||||
|
||||
void print() {
|
||||
printf("base (ns/block)");
|
||||
printf(",struct 1-7");
|
||||
printf(",struct 1-7 miss");
|
||||
printf(",utf-8");
|
||||
printf(",utf-8 miss");
|
||||
printf(",struct 8-15");
|
||||
printf(",struct 8-15 miss");
|
||||
printf(",struct 16+");
|
||||
printf(",struct 16+ miss");
|
||||
printf("\n");
|
||||
|
||||
printf("%g", actual(empty));
|
||||
printf(",%+g", diff(struct7, empty));
|
||||
printf(",%+g", diff(struct7_miss, struct7));
|
||||
printf(",%+g", diff(utf8, struct7));
|
||||
printf(",%+g", diff(utf8_miss, utf8));
|
||||
printf(",%+g", diff(struct15, struct7));
|
||||
printf(",%+g", diff(struct15_miss, struct15));
|
||||
printf(",%+g", diff(struct23, struct15));
|
||||
printf(",%+g", diff(struct23_miss, struct23));
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
double cost_per_block(benchmarker& feature, size_t feature_blocks, benchmarker& base) {
|
||||
return (feature.stage1.best.elapsed_ns() - base.stage1.best.elapsed_ns()) / feature_blocks;
|
||||
}
|
||||
|
||||
// Base cost of any block (including empty ones)
|
||||
double base_cost() {
|
||||
return (empty.stage1.best.elapsed_ns() / empty.stats->blocks);
|
||||
}
|
||||
// Extra cost of a 1-7 structural block over an empty block
|
||||
double struct1_7_cost() {
|
||||
return cost_per_block(struct7, struct7.stats->blocks_with_1_structural, empty);
|
||||
}
|
||||
// Extra cost of an 1-7-structural miss
|
||||
double struct1_7_miss_cost() {
|
||||
return cost_per_block(struct7_miss, struct7_miss.stats->blocks_with_1_structural, struct7);
|
||||
}
|
||||
// Extra cost of an 8-15 structural block over a 1-7 structural block
|
||||
double struct8_15_cost() {
|
||||
return cost_per_block(struct15, struct15.stats->blocks_with_8_structurals, struct7);
|
||||
}
|
||||
// Extra cost of an 8-15-structural miss over a 1-7 miss
|
||||
double struct8_15_miss_cost() {
|
||||
return cost_per_block(struct15_miss, struct15_miss.stats->blocks_with_8_structurals_flipped, struct15);
|
||||
}
|
||||
// Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
|
||||
double struct16_cost() {
|
||||
return cost_per_block(struct23, struct23.stats->blocks_with_16_structurals, struct15);
|
||||
}
|
||||
// Extra cost of a 16-structural miss over an 8-15 miss
|
||||
double struct16_miss_cost() {
|
||||
return cost_per_block(struct23_miss, struct23_miss.stats->blocks_with_16_structurals_flipped, struct23);
|
||||
}
|
||||
// Extra cost of having UTF-8 in a block
|
||||
double utf8_cost() {
|
||||
return cost_per_block(utf8, utf8.stats->blocks_with_utf8, struct7_full);
|
||||
}
|
||||
// Extra cost of a UTF-8 miss
|
||||
double utf8_miss_cost() {
|
||||
return cost_per_block(utf8_miss, utf8_miss.stats->blocks_with_utf8_flipped, utf8);
|
||||
}
|
||||
|
||||
double calc_expected(benchmarker& file) {
|
||||
// Expected base ns/block (empty)
|
||||
json_stats& stats = *file.stats;
|
||||
double expected = base_cost() * stats.blocks;
|
||||
expected += struct1_7_cost() * stats.blocks_with_1_structural;
|
||||
expected += struct1_7_miss_cost() * stats.blocks_with_1_structural_flipped;
|
||||
expected += utf8_cost() * stats.blocks_with_utf8;
|
||||
expected += utf8_miss_cost() * stats.blocks_with_utf8_flipped;
|
||||
expected += struct8_15_cost() * stats.blocks_with_8_structurals;
|
||||
expected += struct8_15_miss_cost() * stats.blocks_with_8_structurals_flipped;
|
||||
expected += struct16_cost() * stats.blocks_with_16_structurals;
|
||||
expected += struct16_miss_cost() * stats.blocks_with_16_structurals_flipped;
|
||||
return expected / stats.blocks;
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// Read options
|
||||
exe_name = argv[0];
|
||||
option_struct options(argc, argv);
|
||||
if (options.verbose) {
|
||||
verbose_stream = &cout;
|
||||
}
|
||||
|
||||
// Initialize the event collector. We put this early so if it prints an error message, it's the
|
||||
// first thing printed.
|
||||
event_collector collector;
|
||||
|
||||
// Set up benchmarkers by reading all files
|
||||
json_parser parser(options.architecture);
|
||||
|
||||
feature_benchmarker features(parser, collector);
|
||||
benchmarker gsoc_2018("jsonexamples/gsoc-2018.json", parser, collector);
|
||||
benchmarker twitter("jsonexamples/twitter.json", parser, collector);
|
||||
benchmarker random("jsonexamples/random.json", parser, collector);
|
||||
|
||||
// Run the benchmarks
|
||||
progress_bar progress(options.iterations, 100);
|
||||
// Put the if (options.stage1_only) *outside* the loop so that run_iterations will be optimized
|
||||
if (options.stage1_only) {
|
||||
for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
|
||||
if (!options.verbose) { progress.print(iteration); }
|
||||
features.run_iterations(options.iteration_step, true);
|
||||
gsoc_2018.run_iterations(options.iteration_step, true);
|
||||
twitter.run_iterations(options.iteration_step, true);
|
||||
random.run_iterations(options.iteration_step, true);
|
||||
}
|
||||
} else {
|
||||
for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
|
||||
if (!options.verbose) { progress.print(iteration); }
|
||||
features.run_iterations(options.iteration_step, false);
|
||||
gsoc_2018.run_iterations(options.iteration_step, false);
|
||||
twitter.run_iterations(options.iteration_step, false);
|
||||
random.run_iterations(options.iteration_step, false);
|
||||
}
|
||||
}
|
||||
if (!options.verbose) { progress.erase(); }
|
||||
|
||||
features.print();
|
||||
|
||||
// Gauge effectiveness
|
||||
printf("gsoc-2018.json expected/actual: %g/%g\n", features.calc_expected(gsoc_2018), actual(gsoc_2018));
|
||||
printf("twitter.json expected/actual: %g/%g\n", features.calc_expected(twitter), actual(twitter));
|
||||
printf("random.json expected/actual: %g/%g\n", features.calc_expected(random), actual(random));
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -0,0 +1,424 @@
|
|||
#ifndef __BENCHMARKER_H
|
||||
#define __BENCHMARKER_H
|
||||
|
||||
#include "json_parser.h"
|
||||
#include "event_counter.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "linux-perf-events.h"
|
||||
#ifdef __linux__
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
//#define DEBUG
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/isadetection.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
using namespace simdjson;
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using std::ostream;
|
||||
using std::ofstream;
|
||||
using std::exception;
|
||||
|
||||
// Initialize "verbose" to go nowhere. We'll read options in main() and set to cout if verbose is true.
|
||||
std::ofstream dev_null;
|
||||
ostream *verbose_stream = &dev_null;
|
||||
const size_t BYTES_PER_BLOCK = 64;
|
||||
|
||||
ostream& verbose() {
|
||||
return *verbose_stream;
|
||||
}
|
||||
|
||||
void exit_error(string message) {
|
||||
cerr << message << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
abort();
|
||||
}
|
||||
|
||||
struct json_stats {
|
||||
size_t bytes = 0;
|
||||
size_t blocks = 0;
|
||||
size_t structurals = 0;
|
||||
size_t blocks_with_utf8 = 0;
|
||||
size_t blocks_with_utf8_flipped = 0;
|
||||
size_t blocks_with_0_structurals = 0;
|
||||
size_t blocks_with_0_structurals_flipped = 0;
|
||||
size_t blocks_with_1_structural = 0;
|
||||
size_t blocks_with_1_structural_flipped = 0;
|
||||
size_t blocks_with_8_structurals = 0;
|
||||
size_t blocks_with_8_structurals_flipped = 0;
|
||||
size_t blocks_with_16_structurals = 0;
|
||||
size_t blocks_with_16_structurals_flipped = 0;
|
||||
|
||||
json_stats(const padded_string& json, const ParsedJson& pj) {
|
||||
bytes = json.size();
|
||||
blocks = bytes / BYTES_PER_BLOCK;
|
||||
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
|
||||
structurals = pj.n_structural_indexes-1;
|
||||
|
||||
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
|
||||
bool last_block_has_utf8 = false;
|
||||
for (size_t block=0; block<blocks; block++) {
|
||||
// Find utf-8 in the block
|
||||
size_t block_start = block*BYTES_PER_BLOCK;
|
||||
size_t block_end = block_start+BYTES_PER_BLOCK;
|
||||
if (block_end > json.size()) { block_end = json.size(); }
|
||||
bool block_has_utf8 = false;
|
||||
for (size_t i=block_start; i<block_end; i++) {
|
||||
if (json.data()[i] & 0x80) {
|
||||
block_has_utf8 = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (block_has_utf8) {
|
||||
blocks_with_utf8++;
|
||||
}
|
||||
if (block > 0 && last_block_has_utf8 != block_has_utf8) {
|
||||
blocks_with_utf8_flipped++;
|
||||
}
|
||||
last_block_has_utf8 = block_has_utf8;
|
||||
}
|
||||
|
||||
// Calculate stats on blocks that will trigger structural count if statements / mispredictions
|
||||
bool last_block_has_0_structurals = false;
|
||||
bool last_block_has_1_structural = false;
|
||||
bool last_block_has_8_structurals = false;
|
||||
bool last_block_has_16_structurals = false;
|
||||
size_t structural=0;
|
||||
for (size_t block=0; block<blocks; block++) {
|
||||
// Count structurals in the block
|
||||
int block_structurals=0;
|
||||
while (structural < pj.n_structural_indexes && pj.structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
|
||||
block_structurals++;
|
||||
structural++;
|
||||
}
|
||||
|
||||
bool block_has_0_structurals = block_structurals == 0;
|
||||
if (block_has_0_structurals) {
|
||||
blocks_with_0_structurals++;
|
||||
}
|
||||
if (block > 0 && last_block_has_0_structurals != block_has_0_structurals) {
|
||||
blocks_with_0_structurals_flipped++;
|
||||
}
|
||||
last_block_has_0_structurals = block_has_0_structurals;
|
||||
|
||||
bool block_has_1_structural = block_structurals >= 1;
|
||||
if (block_has_1_structural) {
|
||||
blocks_with_1_structural++;
|
||||
}
|
||||
if (block > 0 && last_block_has_1_structural != block_has_1_structural) {
|
||||
blocks_with_1_structural_flipped++;
|
||||
}
|
||||
last_block_has_1_structural = block_has_1_structural;
|
||||
|
||||
bool block_has_8_structurals = block_structurals >= 8;
|
||||
if (block_has_8_structurals) {
|
||||
blocks_with_8_structurals++;
|
||||
}
|
||||
if (block > 0 && last_block_has_8_structurals != block_has_8_structurals) {
|
||||
blocks_with_8_structurals_flipped++;
|
||||
}
|
||||
last_block_has_8_structurals = block_has_8_structurals;
|
||||
|
||||
bool block_has_16_structurals = block_structurals >= 16;
|
||||
if (block_has_16_structurals) {
|
||||
blocks_with_16_structurals++;
|
||||
}
|
||||
if (block > 0 && last_block_has_16_structurals != block_has_16_structurals) {
|
||||
blocks_with_16_structurals_flipped++;
|
||||
}
|
||||
last_block_has_16_structurals = block_has_16_structurals;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
padded_string load_json(const char *filename) {
|
||||
try {
|
||||
verbose() << "[verbose] loading " << filename << endl;
|
||||
padded_string json = simdjson::get_corpus(filename);
|
||||
verbose() << "[verbose] loaded " << filename << " (" << json.size() << " bytes)" << endl;
|
||||
return json;
|
||||
} catch (const exception &) { // caught by reference to base
|
||||
exit_error(string("Could not load the file ") + filename);
|
||||
exit(EXIT_FAILURE); // This is not strictly necessary but removes the warning
|
||||
}
|
||||
}
|
||||
|
||||
struct progress_bar {
|
||||
int max_value;
|
||||
int total_ticks;
|
||||
double ticks_per_value;
|
||||
int next_tick;
|
||||
progress_bar(int _max_value, int _total_ticks) : max_value(_max_value), total_ticks(_total_ticks), ticks_per_value(double(_total_ticks)/_max_value), next_tick(0) {
|
||||
fprintf(stderr, "[");
|
||||
for (int i=0;i<total_ticks;i++) {
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
fprintf(stderr, "]");
|
||||
for (int i=0;i<total_ticks+1;i++) {
|
||||
fprintf(stderr, "\b");
|
||||
}
|
||||
}
|
||||
|
||||
void print(int value) {
|
||||
double ticks = value*ticks_per_value;
|
||||
if (ticks >= total_ticks) {
|
||||
ticks = total_ticks-1;
|
||||
}
|
||||
int tick;
|
||||
for (tick=next_tick; tick <= ticks && tick <= total_ticks; tick++) {
|
||||
fprintf(stderr, "=");
|
||||
}
|
||||
next_tick = tick;
|
||||
}
|
||||
void erase() {
|
||||
for (int i=0;i<next_tick+1;i++) {
|
||||
fprintf(stderr, "\b");
|
||||
}
|
||||
for (int tick=0; tick<=total_ticks+2; tick++) {
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
for (int tick=0; tick<=total_ticks+2; tick++) {
|
||||
fprintf(stderr, "\b");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct benchmarker {
|
||||
// JSON text from loading the file. Owns the memory.
|
||||
const padded_string json;
|
||||
// JSON filename
|
||||
const char *filename;
|
||||
// Parser that will parse the JSON file
|
||||
const json_parser& parser;
|
||||
// Event collector that can be turned on to measure cycles, missed branches, etc.
|
||||
event_collector& collector;
|
||||
|
||||
// Statistics about the JSON file independent of its speed (amount of utf-8, structurals, etc.).
|
||||
// Loaded on first parse.
|
||||
json_stats* stats;
|
||||
// Speed and event summary for full parse (not including allocation)
|
||||
event_aggregate all_stages;
|
||||
// Speed and event summary for stage 1
|
||||
event_aggregate stage1;
|
||||
// Speed and event summary for stage 2
|
||||
event_aggregate stage2;
|
||||
// Speed and event summary for allocation
|
||||
event_aggregate allocate_stage;
|
||||
|
||||
benchmarker(const char *_filename, const json_parser& _parser, event_collector& _collector)
|
||||
: json(load_json(_filename)), filename(_filename), parser(_parser), collector(_collector), stats(NULL) {}
|
||||
|
||||
~benchmarker() {
|
||||
if (stats) {
|
||||
delete stats;
|
||||
}
|
||||
}
|
||||
|
||||
int iterations() const {
|
||||
return all_stages.iterations;
|
||||
}
|
||||
|
||||
really_inline void run_iteration(bool stage1_only=false) {
|
||||
// Allocate ParsedJson
|
||||
collector.start();
|
||||
ParsedJson pj;
|
||||
bool allocok = pj.allocate_capacity(json.size());
|
||||
event_count allocate_count = collector.end();
|
||||
allocate_stage << allocate_count;
|
||||
|
||||
if (!allocok) {
|
||||
exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result.");
|
||||
}
|
||||
verbose() << "[verbose] allocated memory for parsed JSON " << endl;
|
||||
|
||||
// Stage 1 (find structurals)
|
||||
collector.start();
|
||||
int result = parser.stage1((const uint8_t *)json.data(), json.size(), pj);
|
||||
event_count stage1_count = collector.end();
|
||||
stage1 << stage1_count;
|
||||
|
||||
if (result != simdjson::SUCCESS) {
|
||||
exit_error(string("Failed to parse ") + filename + " during stage 1: " + pj.get_error_message());
|
||||
}
|
||||
|
||||
// Stage 2 (unified machine)
|
||||
event_count stage2_count;
|
||||
if (!stage1_only || stats == NULL) {
|
||||
if (!stage1_only) {
|
||||
collector.start();
|
||||
}
|
||||
result = parser.stage2((const uint8_t *)json.data(), json.size(), pj);
|
||||
if (!stage1_only) {
|
||||
stage2_count = collector.end();
|
||||
stage2 << stage2_count;
|
||||
}
|
||||
|
||||
if (result != simdjson::SUCCESS) {
|
||||
exit_error(string("Failed to parse ") + filename + " during stage 2: " + pj.get_error_message());
|
||||
}
|
||||
}
|
||||
|
||||
all_stages << (stage1_count + stage2_count);
|
||||
|
||||
// Calculate stats the first time we parse
|
||||
if (stats == NULL) {
|
||||
stats = new json_stats(json, pj);
|
||||
}
|
||||
}
|
||||
|
||||
really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
|
||||
for (size_t i = 0; i<iterations; i++) {
|
||||
run_iteration(stage1_only);
|
||||
}
|
||||
}
|
||||
|
||||
double stage1_ns_per_block() {
|
||||
return stage1.elapsed_ns() / stats->blocks;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void print_aggregate(const char* prefix, const T& stage) const {
|
||||
printf("%s%-13s: %8.4f ns per block (%5.1f %%) - %8.4f ns per byte - %8.4f ns per structural - %8.3f GB/s\n",
|
||||
prefix,
|
||||
"Speed",
|
||||
stage.elapsed_ns() / stats->blocks, // per block
|
||||
100.0 * stage.elapsed_sec() / all_stages.elapsed_sec(), // %
|
||||
stage.elapsed_ns() / stats->bytes, // per byte
|
||||
stage.elapsed_ns() / stats->structurals, // per structural
|
||||
(json.size() / 1000000000.0) / stage.elapsed_sec() // GB/s
|
||||
);
|
||||
|
||||
if (collector.has_events()) {
|
||||
printf("%s%-13s: %2.3f per block (%5.2f %%) - %2.3f per byte - %2.3f per structural - %2.3f GHz est. frequency\n",
|
||||
prefix,
|
||||
"Cycles",
|
||||
stage.cycles() / stats->blocks,
|
||||
100.0 * stage.cycles() / all_stages.cycles(),
|
||||
stage.cycles() / stats->bytes,
|
||||
stage.cycles() / stats->structurals,
|
||||
(stage.cycles() / stage.elapsed_sec()) / 1000000000.0
|
||||
);
|
||||
|
||||
printf("%s%-13s: %2.2f per block (%5.2f %%) - %2.2f per byte - %2.2f per structural - %2.2f per cycle\n",
|
||||
prefix,
|
||||
"Instructions",
|
||||
stage.instructions() / stats->blocks,
|
||||
100.0 * stage.instructions() / all_stages.instructions(),
|
||||
stage.instructions() / stats->bytes,
|
||||
stage.instructions() / stats->structurals,
|
||||
stage.instructions() / stage.cycles()
|
||||
);
|
||||
|
||||
// NOTE: removed cycles/miss because it is a somewhat misleading stat
|
||||
printf("%s%-13s: %2.2f branch misses (%5.2f %%) - %2.2f cache misses (%5.2f %%) - %2.2f cache references\n",
|
||||
prefix,
|
||||
"Misses",
|
||||
stage.branch_misses(),
|
||||
100.0 * stage.branch_misses() / all_stages.branch_misses(),
|
||||
stage.cache_misses(),
|
||||
100.0 * stage.cache_misses() / all_stages.cache_misses(),
|
||||
stage.cache_references()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void print(bool tabbed_output) const {
|
||||
if (tabbed_output) {
|
||||
char* filename_copy = (char*)malloc(strlen(filename)+1);
|
||||
strcpy(filename_copy, filename);
|
||||
#if defined(__linux__)
|
||||
char* base = ::basename(filename_copy);
|
||||
#else
|
||||
char* base = filename_copy;
|
||||
#endif
|
||||
if (strlen(base) >= 5 && !strcmp(base+strlen(base)-5, ".json")) {
|
||||
base[strlen(base)-5] = '\0';
|
||||
}
|
||||
|
||||
double gb = json.size() / 1000000000.0;
|
||||
if (collector.has_events()) {
|
||||
printf("\"%s\"\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n",
|
||||
base,
|
||||
allocate_stage.best.cycles() / json.size(),
|
||||
stage1.best.cycles() / json.size(),
|
||||
stage2.best.cycles() / json.size(),
|
||||
all_stages.best.cycles() / json.size(),
|
||||
gb / all_stages.best.elapsed_sec(),
|
||||
gb / stage1.best.elapsed_sec(),
|
||||
gb / stage2.best.elapsed_sec());
|
||||
} else {
|
||||
printf("\"%s\"\t\t\t\t\t%f\t%f\t%f\n",
|
||||
base,
|
||||
gb / all_stages.best.elapsed_sec(),
|
||||
gb / stage1.best.elapsed_sec(),
|
||||
gb / stage2.best.elapsed_sec());
|
||||
}
|
||||
free(filename_copy);
|
||||
} else {
|
||||
printf("\n");
|
||||
printf("%s\n", filename);
|
||||
printf("%s\n", string(strlen(filename), '=').c_str());
|
||||
printf("%9zu blocks - %10zu bytes - %5zu structurals (%5.1f %%)\n", stats->bytes / BYTES_PER_BLOCK, stats->bytes, stats->structurals, 100.0 * stats->structurals / stats->bytes);
|
||||
if (stats) {
|
||||
printf("special blocks with: utf8 %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n",
|
||||
stats->blocks_with_utf8, 100.0 * stats->blocks_with_utf8 / stats->blocks,
|
||||
stats->blocks_with_0_structurals, 100.0 * stats->blocks_with_0_structurals / stats->blocks,
|
||||
stats->blocks_with_1_structural, 100.0 * stats->blocks_with_1_structural / stats->blocks,
|
||||
stats->blocks_with_8_structurals, 100.0 * stats->blocks_with_8_structurals / stats->blocks,
|
||||
stats->blocks_with_16_structurals, 100.0 * stats->blocks_with_16_structurals / stats->blocks);
|
||||
printf("special block flips: utf8 %9zu (%5.1f %%) - 0 structurals %9zu (%5.1f %%) - 1+ structurals %9zu (%5.1f %%) - 8+ structurals %9zu (%5.1f %%) - 16+ structurals %9zu (%5.1f %%)\n",
|
||||
stats->blocks_with_utf8_flipped, 100.0 * stats->blocks_with_utf8_flipped / stats->blocks,
|
||||
stats->blocks_with_1_structural_flipped, 100.0 * stats->blocks_with_1_structural_flipped / stats->blocks,
|
||||
stats->blocks_with_0_structurals_flipped, 100.0 * stats->blocks_with_0_structurals_flipped / stats->blocks,
|
||||
stats->blocks_with_8_structurals_flipped, 100.0 * stats->blocks_with_8_structurals_flipped / stats->blocks,
|
||||
stats->blocks_with_16_structurals_flipped, 100.0 * stats->blocks_with_16_structurals_flipped / stats->blocks);
|
||||
}
|
||||
printf("\n");
|
||||
printf("All Stages\n");
|
||||
print_aggregate("| " , all_stages.best);
|
||||
// printf("|- Allocation\n");
|
||||
// print_aggregate("| ", allocate_stage.best);
|
||||
printf("|- Stage 1\n");
|
||||
print_aggregate("| ", stage1.best);
|
||||
printf("|- Stage 2\n");
|
||||
print_aggregate("| ", stage2.best);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,152 @@
|
|||
#ifndef __EVENT_COUNTER_H
|
||||
#define __EVENT_COUNTER_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "linux-perf-events.h"
|
||||
#ifdef __linux__
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
//#define DEBUG
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/isadetection.h"
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::chrono::steady_clock;
|
||||
using std::chrono::time_point;
|
||||
using std::chrono::duration;
|
||||
|
||||
struct event_count {
|
||||
duration<double> elapsed;
|
||||
vector<unsigned long long> event_counts;
|
||||
event_count() : elapsed(0), event_counts{0,0,0,0,0} {}
|
||||
event_count(const duration<double> _elapsed, const vector<unsigned long long> _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {}
|
||||
event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { }
|
||||
|
||||
// The types of counters (so we can read the getter more easily)
|
||||
enum event_counter_types {
|
||||
CPU_CYCLES,
|
||||
INSTRUCTIONS,
|
||||
BRANCH_MISSES,
|
||||
CACHE_REFERENCES,
|
||||
CACHE_MISSES
|
||||
};
|
||||
|
||||
double elapsed_sec() const { return duration<double>(elapsed).count(); }
|
||||
double elapsed_ns() const { return duration<double, std::nano>(elapsed).count(); }
|
||||
double cycles() const { return event_counts[CPU_CYCLES]; }
|
||||
double instructions() const { return event_counts[INSTRUCTIONS]; }
|
||||
double branch_misses() const { return event_counts[BRANCH_MISSES]; }
|
||||
double cache_references() const { return event_counts[CACHE_REFERENCES]; }
|
||||
double cache_misses() const { return event_counts[CACHE_MISSES]; }
|
||||
|
||||
event_count& operator=(const event_count other) {
|
||||
this->elapsed = other.elapsed;
|
||||
this->event_counts = other.event_counts;
|
||||
return *this;
|
||||
}
|
||||
event_count operator+(const event_count other) const {
|
||||
return event_count(elapsed+other.elapsed, {
|
||||
event_counts[0]+other.event_counts[0],
|
||||
event_counts[1]+other.event_counts[1],
|
||||
event_counts[2]+other.event_counts[2],
|
||||
event_counts[3]+other.event_counts[3],
|
||||
event_counts[4]+other.event_counts[4],
|
||||
});
|
||||
}
|
||||
|
||||
void operator+=(const event_count other) {
|
||||
*this = *this + other;
|
||||
}
|
||||
};
|
||||
|
||||
struct event_aggregate {
|
||||
int iterations = 0;
|
||||
event_count total;
|
||||
event_count best;
|
||||
event_count worst;
|
||||
|
||||
event_aggregate() {}
|
||||
|
||||
void operator<<(const event_count other) {
|
||||
if (iterations == 0 || other.elapsed < best.elapsed) {
|
||||
best = other;
|
||||
}
|
||||
if (iterations == 0 || other.elapsed > worst.elapsed) {
|
||||
worst = other;
|
||||
}
|
||||
iterations++;
|
||||
total += other;
|
||||
}
|
||||
|
||||
double elapsed_sec() const { return total.elapsed_sec() / iterations; }
|
||||
double elapsed_ns() const { return total.elapsed_ns() / iterations; }
|
||||
double cycles() const { return total.cycles() / iterations; }
|
||||
double instructions() const { return total.instructions() / iterations; }
|
||||
double branch_misses() const { return total.branch_misses() / iterations; }
|
||||
double cache_references() const { return total.cache_references() / iterations; }
|
||||
double cache_misses() const { return total.cache_misses() / iterations; }
|
||||
};
|
||||
|
||||
struct event_collector {
|
||||
event_count count;
|
||||
time_point<steady_clock> start_clock;
|
||||
|
||||
#if defined(__linux__)
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
|
||||
event_collector() : linux_events(vector<int>{
|
||||
PERF_COUNT_HW_CPU_CYCLES,
|
||||
PERF_COUNT_HW_INSTRUCTIONS,
|
||||
PERF_COUNT_HW_BRANCH_MISSES,
|
||||
PERF_COUNT_HW_CACHE_REFERENCES,
|
||||
PERF_COUNT_HW_CACHE_MISSES
|
||||
}) {}
|
||||
bool has_events() {
|
||||
return linux_events.is_working();
|
||||
}
|
||||
#else
|
||||
bool has_events() {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
really_inline void start() {
|
||||
#if defined(__linux)
|
||||
linux_events.start();
|
||||
#endif
|
||||
start_clock = steady_clock::now();
|
||||
}
|
||||
really_inline event_count& end() {
|
||||
time_point<steady_clock> end_clock = steady_clock::now();
|
||||
#if defined(__linux)
|
||||
linux_events.end(count.event_counts);
|
||||
#endif
|
||||
count.elapsed = end_clock - start_clock;
|
||||
return count;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,114 @@
|
|||
class ChunkWriter
|
||||
def initialize(output_dir, miss_templates, file_size=640*1000, block_size=64)
|
||||
@@output_dir = output_dir
|
||||
@@miss_templates = miss_templates
|
||||
@@file_size = file_size
|
||||
@@block_size = block_size
|
||||
end
|
||||
|
||||
def prepare_chunk(chunks, include_newline)
|
||||
Array(chunks).map do |chunk|
|
||||
"#{chunk}#{' '*(@@block_size-chunk.bytesize-1)}#{include_newline ? "\n" : " "}"
|
||||
end.join("")
|
||||
end
|
||||
|
||||
def write_files(filename, start1, repeat1, end1, repeat2: '', include_newline: true)
|
||||
start1 = prepare_chunk(start1, include_newline)
|
||||
repeat1 = prepare_chunk(repeat1, include_newline)
|
||||
end1 = prepare_chunk(end1, include_newline)
|
||||
write_full(File.join(@@output_dir, "#{filename}-full.json"), start1, repeat1, end1)
|
||||
|
||||
repeat2 = prepare_chunk(repeat2, include_newline)
|
||||
repeat2 = repeat2 * (repeat1.bytesize/repeat2.bytesize)
|
||||
write_half(File.join(@@output_dir, "#{filename}.json"), start1, repeat1, end1, repeat2)
|
||||
write_half_miss(File.join(@@output_dir, "#{filename}-miss.json"), start1, repeat1, end1, repeat2)
|
||||
end
|
||||
|
||||
def write_full(filename, start1, repeat1, end1)
|
||||
puts "Writing #{filename} ..."
|
||||
File.open(filename, "w") do |file|
|
||||
write_chunks(file, start1, repeat1, end1, @@file_size)
|
||||
end
|
||||
raise "OMG wrong file size #{File.size(filename)} (should be #{@@file_size})" if File.size(filename) != @@file_size
|
||||
end
|
||||
|
||||
def write_half(filename, start1, repeat1, end1, repeat2)
|
||||
# repeat1 is already represented in start1 and end1, so it doesn't need quite
|
||||
# half the iterations.
|
||||
repeat1_len = (@@file_size/2) - start1.bytesize - end1.bytesize
|
||||
halfway_point = start1.bytesize + repeat1_len + repeat2.bytesize
|
||||
|
||||
puts "Writing #{filename} ..."
|
||||
File.open(filename, "w") do |file|
|
||||
write_chunks(file, start1, repeat1, repeat2, halfway_point)
|
||||
write_chunks(file, repeat2, repeat2, end1, @@file_size-halfway_point)
|
||||
end
|
||||
raise "OMG wrong file size #{File.size(filename)} (should be #{@@file_size})" if File.size(filename) != @@file_size
|
||||
end
|
||||
|
||||
def write_half_miss(filename, start1, repeat1, end1, repeat2)
|
||||
miss_template = Array(File.read(File.join(@@miss_templates, "#{repeat1.bytesize}.txt")).chomp.split("\n"))
|
||||
# Take the start and end out of the template
|
||||
repeat_template = miss_template[(start1.bytesize/64)..(-end1.bytesize/64-1)]
|
||||
# If repeat is 128 bytes, each *pair* of elements is set. Use that.
|
||||
repeat_chunks = repeat1.bytesize/64
|
||||
repeat_template = (repeat_chunks - 1).step(repeat_template.size - 1, repeat_chunks).map { |i| repeat_template[i] }
|
||||
|
||||
puts "Writing #{filename} ..."
|
||||
File.open(filename, "w") do |file|
|
||||
file.write(start1)
|
||||
repeat_template.each do |should_repeat|
|
||||
file.write(should_repeat == "1" ? repeat1 : repeat2)
|
||||
end
|
||||
file.write(end1)
|
||||
end
|
||||
raise "OMG wrong file size #{File.size(filename)} (should be #{@@file_size})" if File.size(filename) != @@file_size
|
||||
end
|
||||
|
||||
def write_chunks(file, start1, repeat1, end1, size)
|
||||
pos = 0
|
||||
file.write(start1)
|
||||
pos += start1.bytesize
|
||||
|
||||
repeat_end = size-end1.bytesize
|
||||
loop do
|
||||
file.write(repeat1)
|
||||
pos += repeat1.bytesize
|
||||
break if pos >= repeat_end
|
||||
end
|
||||
|
||||
file.write(end1)
|
||||
pos += end1.bytesize
|
||||
return pos
|
||||
end
|
||||
end
|
||||
|
||||
output_dir = File.expand_path("../jsonexamples/generated", File.dirname(__FILE__))
|
||||
miss_templates = File.expand_path("miss-templates", File.dirname(__FILE__))
|
||||
Dir.mkdir(output_dir) unless File.directory?(output_dir)
|
||||
w = ChunkWriter.new(output_dir, miss_templates)
|
||||
w.write_files "utf-8", '["֏","֏",{}', ',"֏","֏",{}', ',"֏","֏","֏"]', repeat2: ',"ab","ab",{}'
|
||||
w.write_files "0-structurals", '"ab"', '', ''
|
||||
# w.write_files "1-structurals", [ '[', '"ab"' ], [ ',', '"ab"' ], [ ',', '{', '}', ']' ]
|
||||
# w.write_files "2-structurals", '["ab"', ',"ab"', [',{', '}]']
|
||||
# w.write_files "3-structurals", '[{}', ',{}', ',"ab"]'
|
||||
# w.write_files "4-structurals", '["ab","ab"', ',"ab","ab"', ',{}]'
|
||||
# w.write_files "5-structurals", '["ab",{}', ',"ab",{}', ',"ab","ab"]'
|
||||
# w.write_files "6-structurals", '["ab","ab","ab"', ',"ab","ab","ab"', ',"ab",{}]'
|
||||
w.write_files "7-structurals", '["ab","ab",{}', ',"ab","ab",{}', ',"ab","ab","ab"]'
|
||||
# w.write_files "8-structurals", '["ab","ab","ab","ab"', ',"ab","ab","ab","ab"', ',"ab","ab",{}]'
|
||||
# w.write_files "9-structurals", '["ab","ab","ab",{}', ',"ab","ab","ab",{}', ',"ab","ab","ab","ab"]'
|
||||
# w.write_files "10-structurals", '["ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab"', ',"ab","ab","ab",{}]'
|
||||
# w.write_files "11-structurals", '["ab","ab","ab","ab",{}', ',"ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab"]'
|
||||
# w.write_files "12-structurals", '["ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab",{}]'
|
||||
# w.write_files "13-structurals", '["ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab"]'
|
||||
# w.write_files "14-structurals", '["ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab",{}]'
|
||||
w.write_files "15-structurals", '["ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab"]'
|
||||
# w.write_files "16-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab",{}]'
|
||||
# w.write_files "17-structurals", '["ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab"]'
|
||||
# w.write_files "18-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab",{}]'
|
||||
# w.write_files "19-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab"]'
|
||||
# w.write_files "20-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab",{}]'
|
||||
# w.write_files "21-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"]'
|
||||
# w.write_files "22-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab",{}]'
|
||||
w.write_files "23-structurals", '["ab","ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab",{}', ',"ab","ab","ab","ab","ab","ab","ab","ab","ab","ab","ab"]'
|
|
@ -0,0 +1,49 @@
|
|||
def gen_seeds(start_blocks, repeat_blocks, end_blocks)
|
||||
total_size = 640*1000
|
||||
total_blocks = total_size/64
|
||||
seed_space = 1..1000000
|
||||
target_blocks = total_blocks*0.5
|
||||
target_flips = total_blocks*0.25
|
||||
percent_flips = 0.25*repeat_blocks
|
||||
|
||||
puts "Seeds for #{start_blocks} start blocks, #{end_blocks} end blocks and #{repeat_blocks} repeat blocks: #{percent_flips*100}% flips"
|
||||
closest_flips = nil
|
||||
closest_seeds = []
|
||||
seed_space.each do |seed|
|
||||
r = Random.new(seed)
|
||||
# First block is always type 1
|
||||
flips = 0
|
||||
type1 = true
|
||||
type1_blocks = start_blocks
|
||||
finished_blocks = start_blocks
|
||||
last_repeat = total_blocks-end_blocks
|
||||
while finished_blocks < last_repeat
|
||||
if r.rand < percent_flips
|
||||
flips += 1
|
||||
type1 = !type1
|
||||
end
|
||||
type1_blocks += repeat_blocks if type1
|
||||
finished_blocks += repeat_blocks
|
||||
end
|
||||
|
||||
# Last one is always type 1
|
||||
flips += 1 if !type1
|
||||
type1 = true
|
||||
type1_blocks += end_blocks
|
||||
finished_blocks += end_blocks
|
||||
|
||||
raise "simulated the wrong number of blocks #{finished_blocks}" if finished_blocks != total_blocks
|
||||
|
||||
if type1_blocks == target_blocks
|
||||
if flips == target_flips
|
||||
puts seed
|
||||
closest_seeds << seed
|
||||
end
|
||||
end
|
||||
end
|
||||
puts closest_seeds
|
||||
end
|
||||
|
||||
gen_seeds(1,1,1)
|
||||
gen_seeds(1,1,2)
|
||||
gen_seeds(2,2,4)
|
|
@ -0,0 +1,113 @@
|
|||
#ifndef __JSON_PARSER_H
|
||||
#define __JSON_PARSER_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "linux-perf-events.h"
|
||||
#ifdef __linux__
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
//#define DEBUG
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/isadetection.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
using namespace simdjson;
|
||||
using std::string;
|
||||
|
||||
using stage2_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
stage1_functype* get_stage1_func(const Architecture architecture) {
|
||||
switch (architecture) {
|
||||
#ifdef IS_X86_64
|
||||
case Architecture::HASWELL:
|
||||
return &find_structural_bits<Architecture::HASWELL>;
|
||||
case Architecture::WESTMERE:
|
||||
return &find_structural_bits<Architecture::WESTMERE>;
|
||||
#endif
|
||||
#ifdef IS_ARM64
|
||||
case Architecture::ARM64:
|
||||
return &find_structural_bits<Architecture::ARM64>;
|
||||
#endif
|
||||
default:
|
||||
std::cerr << "The processor is not supported by simdjson." << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
stage2_functype* get_stage2_func(const Architecture architecture) {
|
||||
switch (architecture) {
|
||||
#ifdef IS_X86_64
|
||||
case Architecture::HASWELL:
|
||||
return &unified_machine<Architecture::HASWELL>;
|
||||
break;
|
||||
case Architecture::WESTMERE:
|
||||
return &unified_machine<Architecture::WESTMERE>;
|
||||
break;
|
||||
#endif
|
||||
#ifdef IS_ARM64
|
||||
case Architecture::ARM64:
|
||||
return &unified_machine<Architecture::ARM64>;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
std::cerr << "The processor is not supported by simdjson." << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
struct json_parser {
|
||||
const Architecture architecture;
|
||||
const stage1_functype *stage1_func;
|
||||
const stage2_functype *stage2_func;
|
||||
|
||||
json_parser(const Architecture _architecture) : architecture(_architecture) {
|
||||
this->stage1_func = get_stage1_func(architecture);
|
||||
this->stage2_func = get_stage2_func(architecture);
|
||||
}
|
||||
json_parser() : json_parser(find_best_supported_architecture()) {}
|
||||
|
||||
int stage1(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
|
||||
return this->stage1_func(buf, len, pj);
|
||||
}
|
||||
int stage2(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
|
||||
return this->stage2_func(buf, len, pj);
|
||||
}
|
||||
|
||||
int parse(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
|
||||
int result = this->stage1(buf, len, pj);
|
||||
if (result == SUCCESS) {
|
||||
result = this->stage2(buf, len, pj);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
|
@ -83,6 +83,10 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
bool is_working() {
|
||||
return working;
|
||||
}
|
||||
|
||||
private:
|
||||
void report_error(const std::string &context) {
|
||||
if (working)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,6 @@
|
|||
#include "json_parser.h"
|
||||
#include "event_counter.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
|
@ -35,405 +38,179 @@
|
|||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
// Global arguments
|
||||
bool find_marks_only = false;
|
||||
bool verbose = false;
|
||||
bool dump = false;
|
||||
bool json_output = false;
|
||||
bool force_one_iteration = false;
|
||||
bool just_data = false;
|
||||
bool force_sse = false;
|
||||
int32_t iterations = -1;
|
||||
int32_t warmup_iterations = -1;
|
||||
#include <functional>
|
||||
|
||||
namespace simdjson {
|
||||
Architecture _find_best_supported_implementation() {
|
||||
constexpr uint32_t haswell_flags =
|
||||
instruction_set::AVX2 | instruction_set::PCLMULQDQ |
|
||||
instruction_set::BMI1 | instruction_set::BMI2;
|
||||
constexpr uint32_t westmere_flags =
|
||||
instruction_set::SSE42 | instruction_set::PCLMULQDQ;
|
||||
uint32_t supports = detect_supported_architectures();
|
||||
// Order from best to worst (within architecture)
|
||||
if ((haswell_flags & supports) == haswell_flags && !force_sse) {
|
||||
return Architecture::HASWELL;
|
||||
}
|
||||
if ((westmere_flags & supports) == westmere_flags) {
|
||||
return Architecture::WESTMERE;
|
||||
}
|
||||
if (instruction_set::NEON)
|
||||
return Architecture::ARM64;
|
||||
#include "benchmarker.h"
|
||||
|
||||
return Architecture::NONE;
|
||||
using namespace simdjson;
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using std::ostream;
|
||||
using std::ofstream;
|
||||
using std::exception;
|
||||
|
||||
// Stash the exe_name in main() for functions to use
|
||||
char* exe_name;
|
||||
|
||||
void print_usage(ostream& out) {
|
||||
out << "Usage: " << exe_name << " [-vt] [-n #] [-s STAGE] [-a ARCH] <jsonfile> ..." << endl;
|
||||
out << endl;
|
||||
out << "Runs the parser against the given json files in a loop, measuring speed and other statistics." << endl;
|
||||
out << endl;
|
||||
out << "Options:" << endl;
|
||||
out << endl;
|
||||
out << "-n # - Number of iterations per file. Default: 200" << endl;
|
||||
out << "-i # - Number of times to iterate a single file before moving to the next. Default: 20" << endl;
|
||||
out << "-t - Tabbed data output" << endl;
|
||||
out << "-v - Verbose output." << endl;
|
||||
out << "-s STAGE - Stop after the given stage." << endl;
|
||||
out << " -s stage1 - Stop after find_structural_bits." << endl;
|
||||
out << " -s all - Run all stages." << endl;
|
||||
out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
|
||||
out << " or ARM64). By default, detects best supported architecture." << endl;
|
||||
}
|
||||
|
||||
using unified_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
extern unified_functype *unified_ptr;
|
||||
|
||||
extern stage1_functype *stage1_ptr;
|
||||
|
||||
int unified_machine_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
if (find_marks_only) {
|
||||
return simdjson::SUCCESS;
|
||||
}
|
||||
Architecture best_implementation = _find_best_supported_implementation();
|
||||
// Selecting the best implementation
|
||||
switch (best_implementation) {
|
||||
#ifdef IS_X86_64
|
||||
case Architecture::HASWELL:
|
||||
unified_ptr = &unified_machine<Architecture::HASWELL>;
|
||||
break;
|
||||
case Architecture::WESTMERE:
|
||||
unified_ptr = &unified_machine<Architecture::WESTMERE>;
|
||||
break;
|
||||
#endif
|
||||
#ifdef IS_ARM64
|
||||
case Architecture::ARM64:
|
||||
unified_ptr = &unified_machine<Architecture::ARM64>;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
std::cerr << "The processor is not supported by simdjson." << std::endl;
|
||||
return simdjson::UNEXPECTED_ERROR;
|
||||
}
|
||||
|
||||
return unified_ptr(buf, len, pj);
|
||||
void exit_usage(string message) {
|
||||
cerr << message << endl;
|
||||
cerr << endl;
|
||||
print_usage(cerr);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Responsible to select the best json_parse implementation
|
||||
int find_structural_bits_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
Architecture best_implementation = _find_best_supported_implementation();
|
||||
// Selecting the best implementation
|
||||
switch (best_implementation) {
|
||||
#ifdef IS_X86_64
|
||||
case Architecture::HASWELL:
|
||||
stage1_ptr = &find_structural_bits<Architecture::HASWELL>;
|
||||
break;
|
||||
case Architecture::WESTMERE:
|
||||
stage1_ptr = &find_structural_bits<Architecture::WESTMERE>;
|
||||
break;
|
||||
#endif
|
||||
#ifdef IS_ARM64
|
||||
case Architecture::ARM64:
|
||||
stage1_ptr = &find_structural_bits<Architecture::ARM64>;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
std::cerr << "The processor is not supported by simdjson." << std::endl;
|
||||
return simdjson::UNEXPECTED_ERROR;
|
||||
struct option_struct {
|
||||
vector<char*> files;
|
||||
Architecture architecture = Architecture::UNSUPPORTED;
|
||||
bool stage1_only = false;
|
||||
|
||||
int32_t iterations = 200;
|
||||
int32_t iteration_step = 50;
|
||||
|
||||
bool verbose = false;
|
||||
bool tabbed_output = false;
|
||||
|
||||
option_struct(int argc, char **argv) {
|
||||
#ifndef _MSC_VER
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
iterations = atoi(optarg);
|
||||
break;
|
||||
case 'i':
|
||||
iteration_step = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
tabbed_output = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'a':
|
||||
architecture = parse_architecture(optarg);
|
||||
if (architecture == Architecture::UNSUPPORTED) {
|
||||
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a HASWELL, WESTMERE or ARM64");
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (!strcmp(optarg, "stage1")) {
|
||||
stage1_only = true;
|
||||
} else if (!strcmp(optarg, "all")) {
|
||||
stage1_only = false;
|
||||
} else {
|
||||
exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
exit_error("Unexpected argument " + c);
|
||||
}
|
||||
}
|
||||
#else
|
||||
int optind = 1;
|
||||
#endif
|
||||
|
||||
// If architecture is not specified, pick the best supported architecture by default
|
||||
if (architecture == Architecture::UNSUPPORTED) {
|
||||
architecture = find_best_supported_architecture();
|
||||
}
|
||||
|
||||
// All remaining arguments are considered to be files
|
||||
for (int i=optind; i<argc; i++) {
|
||||
files.push_back(argv[i]);
|
||||
}
|
||||
if (files.empty()) {
|
||||
exit_usage("No files specified");
|
||||
}
|
||||
|
||||
// Keeps the numbers the same for CI (old ./parse didn't have a two-stage loop)
|
||||
if (files.size() == 1) {
|
||||
iteration_step = iterations;
|
||||
}
|
||||
|
||||
#if !defined(__linux__)
|
||||
if (tabbed_output) {
|
||||
exit_error("tabbed_output (-t) flag only works under linux.\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return stage1_ptr(buf, len, pj);
|
||||
}
|
||||
|
||||
stage1_functype *stage1_ptr = &find_structural_bits_dispatch;
|
||||
unified_functype *unified_ptr = &unified_machine_dispatch;
|
||||
} // namespace simdjson
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
#ifndef _MSC_VER
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "1vdtn:w:fs")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
iterations = atoi(optarg);
|
||||
break;
|
||||
case 'w':
|
||||
warmup_iterations = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
force_sse = true;
|
||||
break;
|
||||
case 't':
|
||||
just_data = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'd':
|
||||
dump = true;
|
||||
break;
|
||||
case 'j':
|
||||
json_output = true;
|
||||
break;
|
||||
case '1':
|
||||
force_one_iteration = true;
|
||||
break;
|
||||
case 'f':
|
||||
find_marks_only = true;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
#else
|
||||
int optind = 1;
|
||||
#endif
|
||||
if (optind >= argc) {
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char *filename = argv[optind];
|
||||
if (optind + 1 < argc) {
|
||||
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
|
||||
<< std::endl;
|
||||
}
|
||||
if (verbose) {
|
||||
std::cout << "[verbose] loading " << filename << std::endl;
|
||||
}
|
||||
simdjson::padded_string p;
|
||||
try {
|
||||
simdjson::get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (verbose) {
|
||||
std::cout << "[verbose] loaded " << filename << " (" << p.size()
|
||||
<< " bytes)" << std::endl;
|
||||
}
|
||||
if (iterations == -1) {
|
||||
#if defined(DEBUG)
|
||||
iterations = 1;
|
||||
#else
|
||||
iterations = force_one_iteration ? 1 : (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
|
||||
#endif
|
||||
}
|
||||
if (warmup_iterations == -1) {
|
||||
#if defined(DEBUG)
|
||||
warmup_iterations = 0;
|
||||
#else
|
||||
warmup_iterations = (p.size() < 1 * 1000 * 1000) ? 10 : 1;
|
||||
#endif
|
||||
// Read options
|
||||
exe_name = argv[0];
|
||||
option_struct options(argc, argv);
|
||||
if (options.verbose) {
|
||||
verbose_stream = &cout;
|
||||
}
|
||||
|
||||
std::vector<double> res;
|
||||
res.resize(iterations);
|
||||
if (!just_data)
|
||||
printf("number of iterations %u \n", iterations);
|
||||
#if !defined(__linux__)
|
||||
#define SQUASH_COUNTERS
|
||||
if (just_data) {
|
||||
printf("just_data (-t) flag only works under linux.\n");
|
||||
}
|
||||
#endif
|
||||
{ // practice run
|
||||
simdjson::ParsedJson pj;
|
||||
bool allocok = pj.allocate_capacity(p.size());
|
||||
if (allocok) {
|
||||
simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj);
|
||||
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj);
|
||||
}
|
||||
}
|
||||
#ifndef SQUASH_COUNTERS
|
||||
std::vector<int> evts;
|
||||
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
||||
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
evts.push_back(PERF_COUNT_HW_BRANCH_MISSES);
|
||||
evts.push_back(PERF_COUNT_HW_CACHE_REFERENCES);
|
||||
evts.push_back(PERF_COUNT_HW_CACHE_MISSES);
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||
std::vector<unsigned long long> results;
|
||||
results.resize(evts.size());
|
||||
unsigned long cy0 = 0, cy1 = 0, cy2 = 0;
|
||||
unsigned long cl0 = 0, cl1 = 0, cl2 = 0;
|
||||
unsigned long mis0 = 0, mis1 = 0, mis2 = 0;
|
||||
unsigned long cref0 = 0, cref1 = 0, cref2 = 0;
|
||||
unsigned long cmis0 = 0, cmis1 = 0, cmis2 = 0;
|
||||
#endif
|
||||
// Start collecting events. We put this early so if it prints an error message, it's the
|
||||
// first thing printed.
|
||||
event_collector collector;
|
||||
|
||||
// Do warmup iterations
|
||||
bool isok = true;
|
||||
for (int32_t i = 0; i < warmup_iterations; i++) {
|
||||
if (verbose) {
|
||||
std::cout << "[verbose] warmup iteration # " << i << std::endl;
|
||||
}
|
||||
simdjson::ParsedJson pj;
|
||||
bool allocok = pj.allocate_capacity(p.size());
|
||||
if (!allocok) {
|
||||
std::cerr << "failed to allocate memory" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
|
||||
simdjson::SUCCESS);
|
||||
isok = isok &&
|
||||
(simdjson::SUCCESS ==
|
||||
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
|
||||
if (!isok) {
|
||||
std::cerr << pj.get_error_message() << std::endl;
|
||||
std::cerr << "Could not parse. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// Print preamble
|
||||
if (!options.tabbed_output) {
|
||||
printf("number of iterations %u \n", options.iterations);
|
||||
}
|
||||
|
||||
#ifndef SQUASH_COUNTERS
|
||||
for (int32_t i = 0; i < iterations; i++) {
|
||||
if (verbose) {
|
||||
std::cout << "[verbose] iteration # " << i << std::endl;
|
||||
}
|
||||
unified.start();
|
||||
simdjson::ParsedJson pj;
|
||||
bool allocok = pj.allocate_capacity(p.size());
|
||||
if (!allocok) {
|
||||
std::cerr << "failed to allocate memory" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
unified.end(results);
|
||||
cy0 += results[0];
|
||||
cl0 += results[1];
|
||||
mis0 += results[2];
|
||||
cref0 += results[3];
|
||||
cmis0 += results[4];
|
||||
if (verbose) {
|
||||
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
|
||||
}
|
||||
unified.start();
|
||||
isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
|
||||
simdjson::SUCCESS);
|
||||
unified.end(results);
|
||||
cy1 += results[0];
|
||||
cl1 += results[1];
|
||||
mis1 += results[2];
|
||||
cref1 += results[3];
|
||||
cmis1 += results[4];
|
||||
if (!isok) {
|
||||
std::cout << "Failed during stage 1" << std::endl;
|
||||
break;
|
||||
}
|
||||
unified.start();
|
||||
isok = isok &&
|
||||
(simdjson::SUCCESS ==
|
||||
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
|
||||
unified.end(results);
|
||||
cy2 += results[0];
|
||||
cl2 += results[1];
|
||||
mis2 += results[2];
|
||||
cref2 += results[3];
|
||||
cmis2 += results[4];
|
||||
if (!isok) {
|
||||
std::cout << "Failed during stage 2" << std::endl;
|
||||
break;
|
||||
}
|
||||
// Set up benchmarkers by reading all files
|
||||
json_parser parser(options.architecture);
|
||||
vector<benchmarker*> benchmarkers;
|
||||
for (size_t i=0; i<options.files.size(); i++) {
|
||||
benchmarkers.push_back(new benchmarker(options.files[i], parser, collector));
|
||||
}
|
||||
#endif
|
||||
|
||||
// we do it again, this time just measuring the elapsed time
|
||||
for (int32_t i = 0; i < iterations; i++) {
|
||||
if (verbose) {
|
||||
std::cout << "[verbose] iteration # " << i << std::endl;
|
||||
}
|
||||
simdjson::ParsedJson pj;
|
||||
bool allocok = pj.allocate_capacity(p.size());
|
||||
if (!allocok) {
|
||||
std::cerr << "failed to allocate memory" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (verbose) {
|
||||
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
|
||||
}
|
||||
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
|
||||
simdjson::SUCCESS);
|
||||
isok = isok &&
|
||||
(simdjson::SUCCESS ==
|
||||
simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> secs = end - start;
|
||||
res[i] = secs.count();
|
||||
if (!isok) {
|
||||
std::cerr << pj.get_error_message() << std::endl;
|
||||
std::cerr << "Could not parse. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
simdjson::ParsedJson pj =
|
||||
build_parsed_json(p); // do the parsing again to get the stats
|
||||
if (!pj.is_valid()) {
|
||||
std::cerr << pj.get_error_message() << std::endl;
|
||||
std::cerr << "Could not parse. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
double min_result = *min_element(res.begin(), res.end());
|
||||
double speedinGBs = (p.size()) / (min_result * 1000000000.0);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unsigned long total = cy0 + cy1 + cy2;
|
||||
if (just_data) {
|
||||
float cpb0 = (double)cy0 / (iterations * p.size());
|
||||
float cpb1 = (double)cy1 / (iterations * p.size());
|
||||
float cpb2 = (double)cy2 / (iterations * p.size());
|
||||
float cpbtotal = (double)total / (iterations * p.size());
|
||||
char *newfile = (char *)malloc(strlen(filename) + 1);
|
||||
if (newfile == NULL) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
::strcpy(newfile, filename);
|
||||
char *snewfile = ::basename(newfile);
|
||||
size_t nl = strlen(snewfile);
|
||||
for (size_t j = nl - 1; j > 0; j--) {
|
||||
if (snewfile[j] == '.') {
|
||||
snewfile[j] = '\0';
|
||||
break;
|
||||
// Run the benchmarks
|
||||
progress_bar progress(options.iterations, 50);
|
||||
// Put the if (options.stage1_only) *outside* the loop so that run_iterations will be optimized
|
||||
if (options.stage1_only) {
|
||||
for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
|
||||
if (!options.verbose) { progress.print(iteration); }
|
||||
// Benchmark each file once per iteration
|
||||
for (size_t f=0; f<options.files.size(); f++) {
|
||||
verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
|
||||
benchmarkers[f]->run_iterations(options.iteration_step, true);
|
||||
}
|
||||
}
|
||||
printf("\"%s\"\t%f\t%f\t%f\t%f\t%f\n", snewfile, cpb0, cpb1, cpb2, cpbtotal,
|
||||
speedinGBs);
|
||||
free(newfile);
|
||||
} else {
|
||||
printf("number of bytes %ld number of structural chars %u ratio %.3f\n",
|
||||
p.size(), pj.n_structural_indexes,
|
||||
(double)pj.n_structural_indexes / p.size());
|
||||
printf("mem alloc instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
|
||||
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache accesses: "
|
||||
"%10lu (failure %10lu)\n",
|
||||
cl0 / iterations, cy0 / iterations, 100. * cy0 / total,
|
||||
(double)cl0 / cy0, mis0 / iterations, (double)cy0 / mis0,
|
||||
cref1 / iterations, cmis0 / iterations);
|
||||
printf(" mem alloc runs at %.2f cycles per input byte.\n",
|
||||
(double)cy0 / (iterations * p.size()));
|
||||
printf("stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
|
||||
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache accesses: "
|
||||
"%10lu (failure %10lu)\n",
|
||||
cl1 / iterations, cy1 / iterations, 100. * cy1 / total,
|
||||
(double)cl1 / cy1, mis1 / iterations, (double)cy1 / mis1,
|
||||
cref1 / iterations, cmis1 / iterations);
|
||||
printf(" stage 1 runs at %.2f cycles per input byte.\n",
|
||||
(double)cy1 / (iterations * p.size()));
|
||||
for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
|
||||
if (!options.verbose) { progress.print(iteration); }
|
||||
// Benchmark each file once per iteration
|
||||
for (size_t f=0; f<options.files.size(); f++) {
|
||||
verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
|
||||
benchmarkers[f]->run_iterations(options.iteration_step, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!options.verbose) { progress.erase(); }
|
||||
|
||||
printf("stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
|
||||
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache "
|
||||
"accesses: %10lu (failure %10lu)\n",
|
||||
cl2 / iterations, cy2 / iterations, 100. * cy2 / total,
|
||||
(double)cl2 / cy2, mis2 / iterations, (double)cy2 / mis2,
|
||||
cref2 / iterations, cmis2 / iterations);
|
||||
printf(" stage 2 runs at %.2f cycles per input byte and ",
|
||||
(double)cy2 / (iterations * p.size()));
|
||||
printf("%.2f cycles per structural character.\n",
|
||||
(double)cy2 / (iterations * pj.n_structural_indexes));
|
||||
for (size_t i=0; i<options.files.size(); i++) {
|
||||
benchmarkers[i]->print(options.tabbed_output);
|
||||
delete benchmarkers[i];
|
||||
}
|
||||
|
||||
printf(" all stages: %.2f cycles per input byte.\n",
|
||||
(double)total / (iterations * p.size()));
|
||||
printf("Estimated average frequency: %.3f GHz.\n",
|
||||
(double)total / (iterations * min_result * 1000000000.0));
|
||||
}
|
||||
#endif
|
||||
if (!just_data) {
|
||||
std::cout << "Min: " << min_result << " bytes read: " << p.size()
|
||||
<< " Gigabytes/second: " << speedinGBs << std::endl;
|
||||
}
|
||||
if (json_output) {
|
||||
isok = isok && pj.print_json(std::cout);
|
||||
}
|
||||
if (dump) {
|
||||
isok = isok && pj.dump_raw_tape(std::cout);
|
||||
}
|
||||
if (!isok) {
|
||||
fprintf(stderr, " Parsing failed. \n ");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -6,10 +6,10 @@
|
|||
namespace simdjson {
|
||||
// Represents the minimal architecture that would support an implementation
|
||||
enum class Architecture {
|
||||
UNSUPPORTED,
|
||||
WESTMERE,
|
||||
HASWELL,
|
||||
ARM64,
|
||||
NONE,
|
||||
// TODO remove 'native' in favor of runtime dispatch?
|
||||
// the 'native' enum class value should point at a good default on the current
|
||||
// machine
|
||||
|
@ -20,6 +20,9 @@ enum class Architecture {
|
|||
#endif
|
||||
};
|
||||
|
||||
Architecture find_best_supported_architecture();
|
||||
Architecture parse_architecture(char *architecture);
|
||||
|
||||
enum ErrorValues {
|
||||
SUCCESS = 0,
|
||||
SUCCESS_AND_HAS_MORE, //No errors and buffer still has more data
|
||||
|
|
|
@ -29,7 +29,7 @@ int json_parse(const char *buf, size_t len, ParsedJson &pj,
|
|||
realloc);
|
||||
}
|
||||
|
||||
Architecture find_best_supported_implementation() {
|
||||
Architecture find_best_supported_architecture() {
|
||||
constexpr uint32_t haswell_flags =
|
||||
instruction_set::AVX2 | instruction_set::PCLMULQDQ |
|
||||
instruction_set::BMI1 | instruction_set::BMI2;
|
||||
|
@ -45,13 +45,20 @@ Architecture find_best_supported_implementation() {
|
|||
if (supports & instruction_set::NEON)
|
||||
return Architecture::ARM64;
|
||||
|
||||
return Architecture::NONE;
|
||||
return Architecture::UNSUPPORTED;
|
||||
}
|
||||
|
||||
Architecture parse_architecture(char *architecture) {
|
||||
if (!strcmp(architecture, "HASWELL")) { return Architecture::HASWELL; }
|
||||
if (!strcmp(architecture, "WESTMERE")) { return Architecture::WESTMERE; }
|
||||
if (!strcmp(architecture, "ARM64")) { return Architecture::ARM64; }
|
||||
return Architecture::UNSUPPORTED;
|
||||
}
|
||||
|
||||
// Responsible to select the best json_parse implementation
|
||||
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj,
|
||||
bool realloc) {
|
||||
Architecture best_implementation = find_best_supported_implementation();
|
||||
Architecture best_implementation = find_best_supported_architecture();
|
||||
// Selecting the best implementation
|
||||
switch (best_implementation) {
|
||||
#ifdef IS_X86_64
|
||||
|
|
Loading…
Reference in New Issue