Displaying the numbers of documents parsed per second (#652)
* Some users are interested, as a metric, in the number of documents parsed per second. Obviously, this means reusing the same parser again and again. * Adding a sentence * This update the parsingcompetition benchmark so that it displays the number of documents parsed per second.
This commit is contained in:
parent
56bc8a778d
commit
21dce6cca9
|
@ -74,7 +74,8 @@ Performance results
|
|||
|
||||
The simdjson library uses three-quarters less instructions than state-of-the-art parser RapidJSON and
|
||||
fifty percent less than sajson. To our knowledge, simdjson is the first fully-validating JSON parser
|
||||
to run at gigabytes per second on commodity processors.
|
||||
to run at gigabytes per second on commodity processors. It can parse millions of JSON documents
|
||||
per second on a single core.
|
||||
|
||||
The following figure represents parsing speed in GB/s for parsing various files
|
||||
on an Intel Skylake processor (3.4 GHz) using the GNU GCC 9 compiler (with the -O3 flag).
|
||||
|
|
|
@ -130,12 +130,16 @@ double diff(timespec start, timespec end) {
|
|||
if (verbose) \
|
||||
printf(" %7.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
||||
if (verbose) \
|
||||
printf(" %7.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
||||
printf(" %7.3f %s (avg) ", avg_cycle_per_op, unitname); \
|
||||
if (verbose) \
|
||||
printf(" %7.3f GB/s (error margin: %.3f GB/s)", max_gb_per_s, \
|
||||
printf(" %7.3f GB/s (error margin: %5.3f GB/s)", max_gb_per_s, \
|
||||
-avg_gb_per_s + max_gb_per_s); \
|
||||
if (verbose) \
|
||||
printf(" %13.0f documents/s (best)", 1.0/min_sumclockdiff); \
|
||||
if (verbose) \
|
||||
printf(" %13.0f documents/s (avg)", 1.0/(sumclockdiff/repeat)); \
|
||||
if (!verbose) \
|
||||
printf(" %20.3f %20.3f %20.3f %20.3f ", cycle_per_op, \
|
||||
printf(" %20.3f %20.3f %20.3f %20.3f", cycle_per_op, \
|
||||
avg_cycle_per_op - cycle_per_op, max_gb_per_s, \
|
||||
-avg_gb_per_s + max_gb_per_s); \
|
||||
printf("\n"); \
|
||||
|
|
|
@ -258,6 +258,8 @@ struct benchmarker {
|
|||
event_aggregate stage2;
|
||||
// Speed and event summary for allocation
|
||||
event_aggregate allocate_stage;
|
||||
// Speed and event summary for the repeatly-parsing mode
|
||||
event_aggregate loop;
|
||||
|
||||
benchmarker(const char *_filename, event_collector& _collector)
|
||||
: filename(_filename), collector(_collector), stats(NULL) {
|
||||
|
@ -346,10 +348,30 @@ struct benchmarker {
|
|||
}
|
||||
}
|
||||
|
||||
void run_loop(size_t iterations) {
|
||||
dom::parser parser;
|
||||
auto firstresult = parser.parse((const uint8_t *)json.data(), json.size());
|
||||
if (firstresult.error()) {
|
||||
exit_error(string("Failed to parse ") + filename + string(":") + error_message(firstresult.error()));
|
||||
}
|
||||
|
||||
collector.start();
|
||||
// some users want something closer to "number of documents per second"
|
||||
for(size_t i = 0; i < iterations; i++) {
|
||||
auto result = parser.parse((const uint8_t *)json.data(), json.size());
|
||||
if (result.error()) {
|
||||
exit_error(string("Failed to parse ") + filename + string(":") + error_message(result.error()));
|
||||
}
|
||||
}
|
||||
event_count all_loop_count = collector.end();
|
||||
loop << all_loop_count;
|
||||
}
|
||||
|
||||
really_inline void run_iterations(size_t iterations, bool stage1_only, bool hotbuffers=false) {
|
||||
for (size_t i = 0; i<iterations; i++) {
|
||||
run_iteration(stage1_only, hotbuffers);
|
||||
}
|
||||
run_loop(iterations);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
|
@ -397,7 +419,7 @@ struct benchmarker {
|
|||
}
|
||||
}
|
||||
|
||||
void print(bool tabbed_output) const {
|
||||
void print(bool tabbed_output, size_t iterations) const {
|
||||
if (tabbed_output) {
|
||||
char* filename_copy = (char*)malloc(strlen(filename)+1);
|
||||
strcpy(filename_copy, filename);
|
||||
|
@ -458,9 +480,9 @@ struct benchmarker {
|
|||
printf("|- Allocation\n");
|
||||
print_aggregate("| ", allocate_stage.best);
|
||||
}
|
||||
printf("|- Stage 1\n");
|
||||
printf("|- Stage 1\n");
|
||||
print_aggregate("| ", stage1.best);
|
||||
printf("|- Stage 2\n");
|
||||
printf("|- Stage 2\n");
|
||||
print_aggregate("| ", stage2.best);
|
||||
if (collector.has_events()) {
|
||||
double freq1 = (stage1.best.cycles() / stage1.best.elapsed_sec()) / 1000000000.0;
|
||||
|
@ -475,6 +497,7 @@ struct benchmarker {
|
|||
freqmin, freqmax, freqall);
|
||||
}
|
||||
}
|
||||
printf("\n%.1f documents parsed per second\n", iterations/loop.best.elapsed_sec());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -212,7 +212,7 @@ int main(int argc, char *argv[]) {
|
|||
if (!options.verbose) { progress.erase(); }
|
||||
|
||||
for (size_t i=0; i<options.files.size(); i++) {
|
||||
benchmarkers[i]->print(options.tabbed_output);
|
||||
benchmarkers[i]->print(options.tabbed_output, options.iterations);
|
||||
delete benchmarkers[i];
|
||||
}
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ bool fastjson_parse(const char *input) {
|
|||
// end of fastjson stuff
|
||||
#endif
|
||||
|
||||
size_t sum_line_lengths(char * data, size_t length) {
|
||||
never_inline size_t sum_line_lengths(char * data, size_t length) {
|
||||
std::stringstream is;
|
||||
is.rdbuf()->pubsetbuf(data, length);
|
||||
std::string line;
|
||||
|
@ -124,19 +124,25 @@ bool bench(const char *filename, bool verbose, bool just_data, int repeat_multip
|
|||
#ifndef ALLPARSER
|
||||
if (!just_data)
|
||||
#endif
|
||||
{
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
BEST_TIME("RapidJSON ",
|
||||
d.Parse<kParseValidateEncodingFlag>((const char *)buffer)
|
||||
.HasParseError(),
|
||||
false, memcpy(buffer, p.data(), p.size()), repeat, volume,
|
||||
false, , repeat, volume,
|
||||
!just_data);
|
||||
}
|
||||
#ifndef ALLPARSER
|
||||
if (!just_data)
|
||||
#endif
|
||||
{
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
BEST_TIME("RapidJSON (accurate number parsing) ",
|
||||
d.Parse<kParseValidateEncodingFlag|kParseFullPrecisionFlag>((const char *)buffer)
|
||||
.HasParseError(),
|
||||
false, memcpy(buffer, p.data(), p.size()), repeat, volume,
|
||||
false, , repeat, volume,
|
||||
!just_data);
|
||||
}
|
||||
BEST_TIME("RapidJSON (insitu)",
|
||||
d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError(),
|
||||
false,
|
||||
|
@ -167,10 +173,10 @@ bool bench(const char *filename, bool verbose, bool just_data, int repeat_multip
|
|||
.is_valid(),
|
||||
true, memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
|
||||
|
||||
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
size_t expected = json::parse(p.data(), p.data() + p.size()).size();
|
||||
BEST_TIME("nlohmann-json", json::parse(buffer, buffer + p.size()).size(),
|
||||
expected, memcpy(buffer, p.data(), p.size()), repeat, volume,
|
||||
expected, , repeat, volume,
|
||||
!just_data);
|
||||
|
||||
#ifdef ALLPARSER
|
||||
|
|
Loading…
Reference in New Issue