From 14b55ab77f6136f1cb78ff2ff9a2d3fe097e1508 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 18 Dec 2018 22:18:23 -0500 Subject: [PATCH] Preparing new version with plotting. --- benchmark/benchmark.h | 13 ++--- benchmark/distinctuseridcompetition.cpp | 13 +++-- benchmark/minifiercompetition.cpp | 25 ++++---- benchmark/parse.cpp | 24 +++++++- benchmark/parseandstatcompetition.cpp | 15 +++-- benchmark/parsingcompetition.cpp | 56 +++++++++--------- scripts/bar.gnuplot | 27 +++++++++ scripts/plotparse.sh | 77 +++++++++++++++++++++++++ 8 files changed, 194 insertions(+), 56 deletions(-) create mode 100644 scripts/bar.gnuplot create mode 100755 scripts/plotparse.sh diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h index 8b77c421..2ada3bec 100644 --- a/benchmark/benchmark.h +++ b/benchmark/benchmark.h @@ -84,6 +84,7 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; } \ if (verbose) \ printf("%-40s\t: ", name); \ + else printf("\"%s\"\t", name); \ fflush(NULL); \ uint64_t cycles_start, cycles_final, cycles_diff; \ uint64_t min_diff = (uint64_t)-1; \ @@ -105,14 +106,10 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; uint64_t S = size; \ float cycle_per_op = (min_diff) / (double)S; \ float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \ - if (verbose) \ - printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \ - if (verbose) \ - printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \ - if (verbose) \ - printf("\n"); \ - if (!verbose) \ - printf(" %.3f ", cycle_per_op); \ + if (verbose) printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \ + if (verbose) printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \ + if (!verbose) printf(" %.3f ", cycle_per_op); \ + printf("\n"); \ fflush(NULL); \ } while (0) diff --git a/benchmark/distinctuseridcompetition.cpp b/benchmark/distinctuseridcompetition.cpp index b82a9fcb..1a463a28 100644 --- a/benchmark/distinctuseridcompetition.cpp +++ b/benchmark/distinctuseridcompetition.cpp @@ -205,9 +205,14 @@ std::vector rapid_computestats(const std::string_view &p) { int main(int argc, char *argv[]) { bool verbose = false; + bool justdata = false; + int c; - while ((c = getopt(argc, argv, "v")) != -1) + while ((c = getopt(argc, argv, "vt")) != -1) switch (c) { + case 't': + justdata = true; + break; case 'v': verbose = true; break; @@ -265,11 +270,11 @@ int main(int argc, char *argv[]) { int repeat = 10; int volume = p.size(); BEST_TIME("simdjson ", simdjson_computestats(p).size(), size, , repeat, - volume, true); + volume, !justdata); BEST_TIME("rapid ", rapid_computestats(p).size(), size, , repeat, volume, - true); + !justdata); BEST_TIME("sasjon ", sasjon_computestats(p).size(), size, , repeat, volume, - true); + !justdata); free((void*)p.data()); } diff --git a/benchmark/minifiercompetition.cpp b/benchmark/minifiercompetition.cpp index 15240ca3..8cefe74a 100644 --- a/benchmark/minifiercompetition.cpp +++ b/benchmark/minifiercompetition.cpp @@ -47,9 +47,14 @@ std::string rapidstringme(char *json) { int main(int argc, char *argv[]) { int c; bool verbose = false; - while ((c = getopt (argc, argv, "v")) != -1) + bool justdata = false; + + while ((c = getopt (argc, argv, "vt")) != -1) switch (c) { + case 't': + justdata = true; + break; case 'v': verbose = true; break; @@ -89,9 +94,9 @@ int main(int argc, char *argv[]) { if (verbose) std::cout << "input length is " << p.size() << " stringified length is " << strlength << std::endl; - BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.data()), , repeat, volume, true); + BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.data()), , repeat, volume, !justdata); BEST_TIME_NOCHECK("despacing with RapidJSON Insitu", rapidstringmeInsitu((char *)buffer), - memcpy(buffer, p.data(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); memcpy(buffer, p.data(), p.size()); size_t outlength = @@ -101,7 +106,7 @@ int main(int argc, char *argv[]) { uint8_t *cbuffer = (uint8_t *)buffer; BEST_TIME("jsonminify", jsonminify(cbuffer, p.size(), cbuffer), outlength, - memcpy(buffer, p.data(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); printf("minisize = %zu, original size = %zu (minified down to %.2f percent of original) \n", outlength, p.size(), outlength * 100.0 / p.size()); /*** @@ -109,7 +114,7 @@ int main(int argc, char *argv[]) { ***/ rapidjson::Document d; BEST_TIME("RapidJSON Insitu orig", d.ParseInsitu(buffer).HasParseError(), false, - memcpy(buffer, p.data(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); char *minibuffer = allocate_padded_buffer(p.size() + 1); size_t minisize = jsonminify((const uint8_t *)p.data(), p.size(), (uint8_t*) minibuffer); @@ -117,15 +122,15 @@ int main(int argc, char *argv[]) { BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(), false, memcpy(buffer, minibuffer, p.size()), - repeat, volume, true); + repeat, volume, !justdata); size_t astbuffersize = p.size() * 2; size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t)); - BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); - BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true); + BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata); ParsedJson pj; bool isallocok = pj.allocateCapacity(p.size(), 1024); @@ -133,7 +138,7 @@ int main(int argc, char *argv[]) { printf("failed to allocate memory\n"); return EXIT_FAILURE; } - BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); ParsedJson pj2; bool isallocok2 = pj2.allocateCapacity(p.size(), 1024); @@ -142,7 +147,7 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - BEST_TIME("json_parse despaced", json_parse((const u8*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true); + BEST_TIME("json_parse despaced", json_parse((const u8*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata); free((void*)p.data()); free(buffer); free(ast_buffer); diff --git a/benchmark/parse.cpp b/benchmark/parse.cpp index 8ed56de8..9e3a212e 100644 --- a/benchmark/parse.cpp +++ b/benchmark/parse.cpp @@ -23,7 +23,9 @@ #include "linux-perf-events.h" - +#ifdef __linux__ +#include +#endif //#define DEBUG #include "simdjson/common_defs.h" #include "simdjson/jsonparser.h" @@ -39,12 +41,16 @@ int main(int argc, char *argv[]) { bool dump = false; bool jsonoutput = false; bool forceoneiteration = false; + bool justdata = false; int c; - while ((c = getopt (argc, argv, "1vd")) != -1) + while ((c = getopt (argc, argv, "1vdt")) != -1) switch (c) { + case 't': + justdata = true; + break; case 'v': verbose = true; break; @@ -87,6 +93,9 @@ int main(int argc, char *argv[]) { #if !defined(__linux__) #define SQUASH_COUNTERS + if(justdata) { + printf("justdata (-t) flag only works under linux.\n"); + } #endif #ifndef SQUASH_COUNTERS @@ -185,6 +194,14 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } #ifndef SQUASH_COUNTERS + if(justdata) { + float cpb0 = (double)cy0 / (iterations * p.size()); + float cpb1 = (double)cy1 / (iterations * p.size()); + float cpb2 = (double)cy2 / (iterations * p.size()); + float cpb3 = (double)cy3 / (iterations * p.size()); + float cpbtotal = (double)total / (iterations * p.size()); + printf("\"%s\"\t%f\t%f\t%f\t%f\t%f\n", basename(filename), cpb0,cpb1,cpb2,cpb3,cpbtotal); + } else { printf("number of bytes %ld number of structural chars %u ratio %.3f\n", p.size(), pj.n_structural_indexes, (double)pj.n_structural_indexes / p.size()); @@ -218,9 +235,10 @@ int main(int argc, char *argv[]) { printf(" all stages: %.2f cycles per input byte.\n", (double)total / (iterations * p.size())); + } #endif double min_result = *min_element(res.begin(), res.end()); - cout << "Min: " << min_result << " bytes read: " << p.size() + if(!justdata) cout << "Min: " << min_result << " bytes read: " << p.size() << " Gigabytes/second: " << (p.size()) / (min_result * 1000000000.0) << "\n"; if(jsonoutput) { diff --git a/benchmark/parseandstatcompetition.cpp b/benchmark/parseandstatcompetition.cpp index 9477331b..bd40be4a 100644 --- a/benchmark/parseandstatcompetition.cpp +++ b/benchmark/parseandstatcompetition.cpp @@ -226,9 +226,14 @@ stat_t rapid_computestats(const std::string_view &p) { int main(int argc, char *argv[]) { bool verbose = false; + bool justdata = false; + int c; - while ((c = getopt(argc, argv, "v")) != -1) + while ((c = getopt(argc, argv, "vt")) != -1) switch (c) { + case 't': + justdata = true; + break; case 'v': verbose = true; break; @@ -284,10 +289,10 @@ int main(int argc, char *argv[]) { int repeat = 10; int volume = p.size(); BEST_TIME("simdjson ", simdjson_computestats(p).valid, true, , repeat, - volume, true); - BEST_TIME("rapid ", rapid_computestats(p).valid, true, , repeat, volume, - true); + volume, !justdata); + BEST_TIME("RapidJSON ", rapid_computestats(p).valid, true, , repeat, volume, + !justdata); BEST_TIME("sasjon ", sasjon_computestats(p).valid, true, , repeat, volume, - true); + !justdata); free((void*)p.data()); } diff --git a/benchmark/parsingcompetition.cpp b/benchmark/parsingcompetition.cpp index 7f8e01e7..eca35424 100644 --- a/benchmark/parsingcompetition.cpp +++ b/benchmark/parsingcompetition.cpp @@ -36,10 +36,14 @@ bool fastjson_parse(const char *input) { int main(int argc, char *argv[]) { bool verbose = false; + bool justdata = false; bool all = false; int c; - while ((c = getopt(argc, argv, "va")) != -1) + while ((c = getopt(argc, argv, "vat")) != -1) switch (c) { + case 't': + justdata = true; + break; case 'v': verbose = true; break; @@ -87,11 +91,11 @@ int main(int argc, char *argv[]) { } int repeat = 10; int volume = p.size(); - BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).isValid(), true, , - repeat, volume, true); - - BEST_TIME("simdjson (static alloc) ", json_parse(p, pj), true, , repeat, - volume, true); + if(!justdata) BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).isValid(), true, , + repeat, volume, !justdata); + // (static alloc) + BEST_TIME("simdjson ", json_parse(p, pj), true, , repeat, + volume, !justdata); rapidjson::Document d; @@ -99,13 +103,13 @@ int main(int argc, char *argv[]) { memcpy(buffer, p.data(), p.size()); buffer[p.size()] = '\0'; - BEST_TIME( + if(!justdata) BEST_TIME( "RapidJSON", d.Parse((const char *)buffer).HasParseError(), - false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); - BEST_TIME("RapidJSON (insitu)", + false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); + if(!justdata) BEST_TIME("RapidJSON (insitu)", d.ParseInsitu(buffer).HasParseError(), - false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); typedef rapidjson::GenericDocument, rapidjson::MemoryPoolAllocator<>, rapidjson::MemoryPoolAllocator<>> RapidDocumentType; @@ -121,60 +125,60 @@ int main(int argc, char *argv[]) { RapidDocumentType preallocedd(&valueAllocator, rapidvaallocsize, &parseAllocator); - BEST_TIME( + if(!justdata) BEST_TIME( "RapidJSON (static alloc)", preallocedd.Parse((const char *)buffer) .HasParseError(), - false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); - BEST_TIME("RapidJSON (static alloc, insitu)", + false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); + // (static alloc, insitu) + BEST_TIME("RapidJSON", preallocedd.ParseInsitu(buffer) .HasParseError(), - false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); assert(valueAllocator.Size() <= rapidvaallocsize); assert(parseAllocator.Size() <= rapidallocsize); } free(rapidvalueBuffer); free(rapidparseBuffer); - - BEST_TIME("sajson (dynamic mem, insitu)", + if(!justdata) BEST_TIME("sajson (dynamic mem, insitu)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)) .is_valid(), - true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); size_t astbuffersize = p.size(); size_t *ast_buffer = (size_t *)malloc(astbuffersize * sizeof(size_t)); - - BEST_TIME("sajson (static alloc, insitu)", + // (static alloc, insitu) + BEST_TIME("sajson", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)) .is_valid(), - true, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); std::string json11err; if (all) BEST_TIME("dropbox (json11) ", ((json11::Json::parse(buffer, json11err).is_null()) || (!json11err.empty())), - false, memcpy(buffer, p.data(), p.size()), repeat, volume, true); + false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); if (all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, - memcpy(buffer, p.data(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); JsonValue value; JsonAllocator allocator; char *endptr; if (all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, - memcpy(buffer, p.data(), p.size()), repeat, volume, true); + memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); void *state; if (all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, - memcpy(buffer, p.data(), p.size()), repeat, volume, true); - BEST_TIME("memcpy ", + memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata); + if(!justdata) BEST_TIME("memcpy ", (memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat, - volume, true); + volume, !justdata); free((void *)p.data()); free(ast_buffer); free(buffer); diff --git a/scripts/bar.gnuplot b/scripts/bar.gnuplot new file mode 100644 index 00000000..132a461e --- /dev/null +++ b/scripts/bar.gnuplot @@ -0,0 +1,27 @@ +set term pdfcairo fontscale 1 +set output name +set boxwidth 0.8 +set style fill solid +set ylabel "cycles per input byte" + + +set style line 80 lt rgb "#000000" + +# Line style for grid +set style line 81 lt 0 # dashed +set style line 81 lt rgb "#808080" # grey + +set grid back linestyle 81 +set border 3 back linestyle 80 # Remove border on top and right. These + # borders are useless and make it harder + # to see plotted lines near the border. + # Also, put it in grey; no need for so much emphasis on a border. +set xtics nomirror +set ytics nomirror + +set yrange [0:] +set format y "%0.1f"; + +set style line 1 lt rgb "#A0A0A0" lw 1 pt 1 ps 1 + +plot filename using 0:2:xtic(1) with boxes notitle ls 1, '' using 0:(1):(sprintf("%.1f", $2)) with labels notitle \ No newline at end of file diff --git a/scripts/plotparse.sh b/scripts/plotparse.sh new file mode 100755 index 00000000..759127eb --- /dev/null +++ b/scripts/plotparse.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +cd $SCRIPTPATH/.. +plotdirectory=$SCRIPTPATH/plots/$(uname -n) +mkdir -p $plotdirectory + +os=$(uname) + +if [ "$os" = "Linux" ]; then + echo "You are using linux." + echo "We are going to just parse using simdjson, and collect perf stats." + + make parse + for i in $SCRIPTPATH/../jsonexamples/*.json; do + [ -f "$i" ] || break + echo $i + shortname=$(basename $SCRIPTPATH/$i"justparse.table") + corename=$(basename ${shortname%.*})".pdf" + $SCRIPTPATH/../parse -t $i > $plotdirectory/$shortname + gnuplot -e "filename='$plotdirectory/$shortname';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot + rm $plotdirectory/$shortname + echo + done +fi + +make parsingcompetition +echo "parsing (with competition)" +echo +for i in $SCRIPTPATH/../jsonexamples/*.json; do + [ -f "$i" ] || break + echo $i + shortname=$(basename $SCRIPTPATH/$i.table) + corename=$(basename ${shortname%.*})".pdf" + $SCRIPTPATH/../parsingcompetition -t $i > $plotdirectory/$shortname + sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted + gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot + rm $plotdirectory/$shortname + rm $plotdirectory/$shortname.table.sorted + echo +done + + +make parseandstatcompetition +echo "parsing and collecting basic stats on json documents as quickly as possible" +echo +for i in $SCRIPTPATH/../jsonexamples/*.json; do + [ -f "$i" ] || break + echo $i + shortname=$(basename $SCRIPTPATH/$i"parseandstat.table") + corename=$(basename ${shortname%.*})".pdf" + $SCRIPTPATH/../parseandstatcompetition -t $i> $plotdirectory/$shortname + sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted + gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot + rm $plotdirectory/$shortname + rm $plotdirectory/$shortname.table.sorted + echo +done + +make distinctuseridcompetition +echo "parsing and finding all user.id" +echo + +for i in $SCRIPTPATH/../jsonexamples/twitter.json; do + [ -f "$i" ] || break + echo $i + shortname=$(basename $SCRIPTPATH/$i"distinctuserid.table") + corename=$(basename ${shortname%.*})".pdf" + $SCRIPTPATH/../distinctuseridcompetition -t jsonexamples/twitter.json> $plotdirectory/$shortname + sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted + gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot + rm $plotdirectory/$shortname + rm $plotdirectory/$shortname.table.sorted + echo +done + +echo "see results in "$plotdirectory \ No newline at end of file