Adding a "get_corpus" benchmark. (#456)

* Adding a "get_corpus" benchmark.

* Improving portability.
This commit is contained in:
Daniel Lemire 2020-01-20 17:27:25 -05:00 committed by GitHub
parent 80b4dd2e8a
commit aea79912ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 0 deletions

View File

@ -160,6 +160,10 @@ $(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJ
parse: benchmark/parse.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
get_corpus_benchmark: benchmark/get_corpus_benchmark.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o get_corpus_benchmark $(LIBFILES) benchmark/get_corpus_benchmark.cpp $(LIBFLAGS)
parse_stream: benchmark/parse_stream.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse_stream $(LIBFILES) benchmark/parse_stream.cpp $(LIBFLAGS)

View File

@ -7,5 +7,6 @@ target_include_directories(${SIMDJSON_LIB_NAME}
add_cpp_benchmark(parse)
add_cpp_benchmark(statisticalmodel)
add_cpp_benchmark(parse_stream)
add_cpp_benchmark(get_corpus_benchmark)
add_executable(perfdiff perfdiff.cpp)

View File

@ -0,0 +1,54 @@
#include "simdjson/common_defs.h"
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
#include <chrono>
#include <cstring>
#include <iostream>
never_inline
double bench(std::string filename, simdjson::padded_string& p) {
std::chrono::time_point<std::chrono::steady_clock> start_clock =
std::chrono::steady_clock::now();
simdjson::get_corpus(filename).swap(p);
std::chrono::time_point<std::chrono::steady_clock> end_clock =
std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed = end_clock - start_clock;
return (p.size() / (1024. * 1024 * 1024.)) / elapsed.count();
}
int main(int argc, char *argv[]) {
int optind = 1;
if (optind >= argc) {
std::cerr << "Reads document as far as possible. " << std::endl;
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
exit(1);
}
const char *filename = argv[optind];
if (optind + 1 < argc) {
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
<< std::endl;
}
simdjson::padded_string p;
bench(filename, p);
double meanval = 0;
double maxval = 0;
double minval = 10000;
std::cout << "file size: "<< (p.size() / (1024. * 1024 * 1024.)) << " GB" <<std::endl;
size_t times = p.size() > 1024*1024*1024 ? 5 : 50;
try {
for(size_t i = 0; i < times; i++) {
double tval = bench(filename, p);
if(maxval < tval) maxval = tval;
if(minval > tval) minval = tval;
meanval += tval;
}
} catch (const std::exception &) { // caught by reference to base
std::cerr << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
std::cout << "average speed: " << meanval / times << " GB/s"<< std::endl;
std::cout << "min speed : " << minval << " GB/s" << std::endl;
std::cout << "max speed : " << maxval << " GB/s" << std::endl;
return EXIT_SUCCESS;
}