Preparing new version with plotting.

This commit is contained in:
Daniel Lemire 2018-12-18 22:18:23 -05:00
parent 0a109508de
commit 14b55ab77f
8 changed files with 194 additions and 56 deletions

View File

@ -84,6 +84,7 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
} \
if (verbose) \
printf("%-40s\t: ", name); \
else printf("\"%s\"\t", name); \
fflush(NULL); \
uint64_t cycles_start, cycles_final, cycles_diff; \
uint64_t min_diff = (uint64_t)-1; \
@ -105,14 +106,10 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
uint64_t S = size; \
float cycle_per_op = (min_diff) / (double)S; \
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
if (verbose) \
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
if (verbose) \
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
if (verbose) \
if (verbose) printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
if (verbose) printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
if (!verbose) printf(" %.3f ", cycle_per_op); \
printf("\n"); \
if (!verbose) \
printf(" %.3f ", cycle_per_op); \
fflush(NULL); \
} while (0)

View File

@ -205,9 +205,14 @@ std::vector<int64_t> rapid_computestats(const std::string_view &p) {
int main(int argc, char *argv[]) {
bool verbose = false;
bool justdata = false;
int c;
while ((c = getopt(argc, argv, "v")) != -1)
while ((c = getopt(argc, argv, "vt")) != -1)
switch (c) {
case 't':
justdata = true;
break;
case 'v':
verbose = true;
break;
@ -265,11 +270,11 @@ int main(int argc, char *argv[]) {
int repeat = 10;
int volume = p.size();
BEST_TIME("simdjson ", simdjson_computestats(p).size(), size, , repeat,
volume, true);
volume, !justdata);
BEST_TIME("rapid ", rapid_computestats(p).size(), size, , repeat, volume,
true);
!justdata);
BEST_TIME("sasjon ", sasjon_computestats(p).size(), size, , repeat, volume,
true);
!justdata);
free((void*)p.data());
}

View File

@ -47,9 +47,14 @@ std::string rapidstringme(char *json) {
int main(int argc, char *argv[]) {
int c;
bool verbose = false;
while ((c = getopt (argc, argv, "v")) != -1)
bool justdata = false;
while ((c = getopt (argc, argv, "vt")) != -1)
switch (c)
{
case 't':
justdata = true;
break;
case 'v':
verbose = true;
break;
@ -89,9 +94,9 @@ int main(int argc, char *argv[]) {
if (verbose)
std::cout << "input length is " << p.size() << " stringified length is "
<< strlength << std::endl;
BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.data()), , repeat, volume, true);
BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.data()), , repeat, volume, !justdata);
BEST_TIME_NOCHECK("despacing with RapidJSON Insitu", rapidstringmeInsitu((char *)buffer),
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
memcpy(buffer, p.data(), p.size());
size_t outlength =
@ -101,7 +106,7 @@ int main(int argc, char *argv[]) {
uint8_t *cbuffer = (uint8_t *)buffer;
BEST_TIME("jsonminify", jsonminify(cbuffer, p.size(), cbuffer), outlength,
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
printf("minisize = %zu, original size = %zu (minified down to %.2f percent of original) \n", outlength, p.size(), outlength * 100.0 / p.size());
/***
@ -109,7 +114,7 @@ int main(int argc, char *argv[]) {
***/
rapidjson::Document d;
BEST_TIME("RapidJSON Insitu orig", d.ParseInsitu(buffer).HasParseError(), false,
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
char *minibuffer = allocate_padded_buffer(p.size() + 1);
size_t minisize = jsonminify((const uint8_t *)p.data(), p.size(), (uint8_t*) minibuffer);
@ -117,15 +122,15 @@ int main(int argc, char *argv[]) {
BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(), false,
memcpy(buffer, minibuffer, p.size()),
repeat, volume, true);
repeat, volume, !justdata);
size_t astbuffersize = p.size() * 2;
size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t));
BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true);
BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata);
ParsedJson pj;
bool isallocok = pj.allocateCapacity(p.size(), 1024);
@ -133,7 +138,7 @@ int main(int argc, char *argv[]) {
printf("failed to allocate memory\n");
return EXIT_FAILURE;
}
BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
ParsedJson pj2;
bool isallocok2 = pj2.allocateCapacity(p.size(), 1024);
@ -142,7 +147,7 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
BEST_TIME("json_parse despaced", json_parse((const u8*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true);
BEST_TIME("json_parse despaced", json_parse((const u8*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata);
free((void*)p.data());
free(buffer);
free(ast_buffer);

View File

@ -23,7 +23,9 @@
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
//#define DEBUG
#include "simdjson/common_defs.h"
#include "simdjson/jsonparser.h"
@ -39,12 +41,16 @@ int main(int argc, char *argv[]) {
bool dump = false;
bool jsonoutput = false;
bool forceoneiteration = false;
bool justdata = false;
int c;
while ((c = getopt (argc, argv, "1vd")) != -1)
while ((c = getopt (argc, argv, "1vdt")) != -1)
switch (c)
{
case 't':
justdata = true;
break;
case 'v':
verbose = true;
break;
@ -87,6 +93,9 @@ int main(int argc, char *argv[]) {
#if !defined(__linux__)
#define SQUASH_COUNTERS
if(justdata) {
printf("justdata (-t) flag only works under linux.\n");
}
#endif
#ifndef SQUASH_COUNTERS
@ -185,6 +194,14 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
#ifndef SQUASH_COUNTERS
if(justdata) {
float cpb0 = (double)cy0 / (iterations * p.size());
float cpb1 = (double)cy1 / (iterations * p.size());
float cpb2 = (double)cy2 / (iterations * p.size());
float cpb3 = (double)cy3 / (iterations * p.size());
float cpbtotal = (double)total / (iterations * p.size());
printf("\"%s\"\t%f\t%f\t%f\t%f\t%f\n", basename(filename), cpb0,cpb1,cpb2,cpb3,cpbtotal);
} else {
printf("number of bytes %ld number of structural chars %u ratio %.3f\n",
p.size(), pj.n_structural_indexes,
(double)pj.n_structural_indexes / p.size());
@ -218,9 +235,10 @@ int main(int argc, char *argv[]) {
printf(" all stages: %.2f cycles per input byte.\n",
(double)total / (iterations * p.size()));
}
#endif
double min_result = *min_element(res.begin(), res.end());
cout << "Min: " << min_result << " bytes read: " << p.size()
if(!justdata) cout << "Min: " << min_result << " bytes read: " << p.size()
<< " Gigabytes/second: " << (p.size()) / (min_result * 1000000000.0)
<< "\n";
if(jsonoutput) {

View File

@ -226,9 +226,14 @@ stat_t rapid_computestats(const std::string_view &p) {
int main(int argc, char *argv[]) {
bool verbose = false;
bool justdata = false;
int c;
while ((c = getopt(argc, argv, "v")) != -1)
while ((c = getopt(argc, argv, "vt")) != -1)
switch (c) {
case 't':
justdata = true;
break;
case 'v':
verbose = true;
break;
@ -284,10 +289,10 @@ int main(int argc, char *argv[]) {
int repeat = 10;
int volume = p.size();
BEST_TIME("simdjson ", simdjson_computestats(p).valid, true, , repeat,
volume, true);
BEST_TIME("rapid ", rapid_computestats(p).valid, true, , repeat, volume,
true);
volume, !justdata);
BEST_TIME("RapidJSON ", rapid_computestats(p).valid, true, , repeat, volume,
!justdata);
BEST_TIME("sasjon ", sasjon_computestats(p).valid, true, , repeat, volume,
true);
!justdata);
free((void*)p.data());
}

View File

@ -36,10 +36,14 @@ bool fastjson_parse(const char *input) {
int main(int argc, char *argv[]) {
bool verbose = false;
bool justdata = false;
bool all = false;
int c;
while ((c = getopt(argc, argv, "va")) != -1)
while ((c = getopt(argc, argv, "vat")) != -1)
switch (c) {
case 't':
justdata = true;
break;
case 'v':
verbose = true;
break;
@ -87,11 +91,11 @@ int main(int argc, char *argv[]) {
}
int repeat = 10;
int volume = p.size();
BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).isValid(), true, ,
repeat, volume, true);
BEST_TIME("simdjson (static alloc) ", json_parse(p, pj), true, , repeat,
volume, true);
if(!justdata) BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).isValid(), true, ,
repeat, volume, !justdata);
// (static alloc)
BEST_TIME("simdjson ", json_parse(p, pj), true, , repeat,
volume, !justdata);
rapidjson::Document d;
@ -99,13 +103,13 @@ int main(int argc, char *argv[]) {
memcpy(buffer, p.data(), p.size());
buffer[p.size()] = '\0';
BEST_TIME(
if(!justdata) BEST_TIME(
"RapidJSON",
d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
BEST_TIME("RapidJSON (insitu)",
false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
if(!justdata) BEST_TIME("RapidJSON (insitu)",
d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError(),
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
typedef rapidjson::GenericDocument<UTF8<>, rapidjson::MemoryPoolAllocator<>,
rapidjson::MemoryPoolAllocator<>>
RapidDocumentType;
@ -121,60 +125,60 @@ int main(int argc, char *argv[]) {
RapidDocumentType preallocedd(&valueAllocator, rapidvaallocsize,
&parseAllocator);
BEST_TIME(
if(!justdata) BEST_TIME(
"RapidJSON (static alloc)",
preallocedd.Parse<kParseValidateEncodingFlag>((const char *)buffer)
.HasParseError(),
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
BEST_TIME("RapidJSON (static alloc, insitu)",
false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
// (static alloc, insitu)
BEST_TIME("RapidJSON",
preallocedd.ParseInsitu<kParseValidateEncodingFlag>(buffer)
.HasParseError(),
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
assert(valueAllocator.Size() <= rapidvaallocsize);
assert(parseAllocator.Size() <= rapidallocsize);
}
free(rapidvalueBuffer);
free(rapidparseBuffer);
BEST_TIME("sajson (dynamic mem, insitu)",
if(!justdata) BEST_TIME("sajson (dynamic mem, insitu)",
sajson::parse(sajson::dynamic_allocation(),
sajson::mutable_string_view(p.size(), buffer))
.is_valid(),
true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
size_t astbuffersize = p.size();
size_t *ast_buffer = (size_t *)malloc(astbuffersize * sizeof(size_t));
BEST_TIME("sajson (static alloc, insitu)",
// (static alloc, insitu)
BEST_TIME("sajson",
sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize),
sajson::mutable_string_view(p.size(), buffer))
.is_valid(),
true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
std::string json11err;
if (all)
BEST_TIME("dropbox (json11) ",
((json11::Json::parse(buffer, json11err).is_null()) ||
(!json11err.empty())),
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
false, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
if (all)
BEST_TIME("fastjson ", fastjson_parse(buffer), true,
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
JsonValue value;
JsonAllocator allocator;
char *endptr;
if (all)
BEST_TIME("gason ",
jsonParse(buffer, &endptr, &value, allocator), JSON_OK,
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
void *state;
if (all)
BEST_TIME("ultrajson ",
(UJDecode(buffer, p.size(), NULL, &state) == NULL), false,
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
BEST_TIME("memcpy ",
memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
if(!justdata) BEST_TIME("memcpy ",
(memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat,
volume, true);
volume, !justdata);
free((void *)p.data());
free(ast_buffer);
free(buffer);

27
scripts/bar.gnuplot Normal file
View File

@ -0,0 +1,27 @@
set term pdfcairo fontscale 1
set output name
set boxwidth 0.8
set style fill solid
set ylabel "cycles per input byte"
set style line 80 lt rgb "#000000"
# Line style for grid
set style line 81 lt 0 # dashed
set style line 81 lt rgb "#808080" # grey
set grid back linestyle 81
set border 3 back linestyle 80 # Remove border on top and right. These
# borders are useless and make it harder
# to see plotted lines near the border.
# Also, put it in grey; no need for so much emphasis on a border.
set xtics nomirror
set ytics nomirror
set yrange [0:]
set format y "%0.1f";
set style line 1 lt rgb "#A0A0A0" lw 1 pt 1 ps 1
plot filename using 0:2:xtic(1) with boxes notitle ls 1, '' using 0:(1):(sprintf("%.1f", $2)) with labels notitle

77
scripts/plotparse.sh Executable file
View File

@ -0,0 +1,77 @@
#!/bin/bash
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
cd $SCRIPTPATH/..
plotdirectory=$SCRIPTPATH/plots/$(uname -n)
mkdir -p $plotdirectory
os=$(uname)
if [ "$os" = "Linux" ]; then
echo "You are using linux."
echo "We are going to just parse using simdjson, and collect perf stats."
make parse
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
shortname=$(basename $SCRIPTPATH/$i"justparse.table")
corename=$(basename ${shortname%.*})".pdf"
$SCRIPTPATH/../parse -t $i > $plotdirectory/$shortname
gnuplot -e "filename='$plotdirectory/$shortname';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
rm $plotdirectory/$shortname
echo
done
fi
make parsingcompetition
echo "parsing (with competition)"
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
shortname=$(basename $SCRIPTPATH/$i.table)
corename=$(basename ${shortname%.*})".pdf"
$SCRIPTPATH/../parsingcompetition -t $i > $plotdirectory/$shortname
sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
rm $plotdirectory/$shortname
rm $plotdirectory/$shortname.table.sorted
echo
done
make parseandstatcompetition
echo "parsing and collecting basic stats on json documents as quickly as possible"
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
shortname=$(basename $SCRIPTPATH/$i"parseandstat.table")
corename=$(basename ${shortname%.*})".pdf"
$SCRIPTPATH/../parseandstatcompetition -t $i> $plotdirectory/$shortname
sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
rm $plotdirectory/$shortname
rm $plotdirectory/$shortname.table.sorted
echo
done
make distinctuseridcompetition
echo "parsing and finding all user.id"
echo
for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
[ -f "$i" ] || break
echo $i
shortname=$(basename $SCRIPTPATH/$i"distinctuserid.table")
corename=$(basename ${shortname%.*})".pdf"
$SCRIPTPATH/../distinctuseridcompetition -t jsonexamples/twitter.json> $plotdirectory/$shortname
sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
rm $plotdirectory/$shortname
rm $plotdirectory/$shortname.table.sorted
echo
done
echo "see results in "$plotdirectory