2020-02-25 12:59:38 +08:00
|
|
|
#include <benchmark/benchmark.h>
|
2020-03-13 05:48:30 +08:00
|
|
|
#include "simdjson.h"
|
2020-03-10 00:08:06 +08:00
|
|
|
#include <sstream>
|
|
|
|
|
2020-02-25 12:59:38 +08:00
|
|
|
using namespace simdjson;
|
|
|
|
using namespace benchmark;
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
const padded_string EMPTY_ARRAY("[]", 2);
|
|
|
|
|
2020-04-15 22:15:48 +08:00
|
|
|
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
|
|
|
const char *NUMBERS_JSON = SIMDJSON_BENCHMARK_DATA_DIR "numbers.json";
|
|
|
|
|
2020-08-01 03:29:10 +08:00
|
|
|
static void recover_one_string(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
const std::string_view data = "\"one string\"";
|
|
|
|
padded_string docdata{data};
|
|
|
|
// we do not want mem. alloc. in the loop.
|
|
|
|
auto error = parser.allocate(docdata.size());
|
|
|
|
if(error) {
|
|
|
|
cout << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dom::element doc;
|
2020-08-09 10:24:09 +08:00
|
|
|
if ((error = parser.parse(docdata).get(doc))) {
|
2020-08-01 03:29:10 +08:00
|
|
|
cerr << "could not parse string" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-08-01 03:29:10 +08:00
|
|
|
std::string_view v;
|
|
|
|
error = doc.get(v);
|
|
|
|
if (error) {
|
|
|
|
cerr << "could not get string" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(recover_one_string);
|
|
|
|
|
2020-04-15 22:15:48 +08:00
|
|
|
|
2020-08-01 03:29:10 +08:00
|
|
|
static void serialize_twitter(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// we do not want mem. alloc. in the loop.
|
2020-08-09 10:24:09 +08:00
|
|
|
if((error = parser.allocate(docdata.size()))) {
|
2020-08-01 03:29:10 +08:00
|
|
|
cout << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dom::element doc;
|
|
|
|
if ((error = parser.parse(docdata).get(doc))) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
size_t bytes = 0;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-08-01 03:29:10 +08:00
|
|
|
std::string serial = simdjson::minify(doc);
|
|
|
|
bytes += serial.size();
|
|
|
|
benchmark::DoNotOptimize(serial);
|
|
|
|
}
|
2020-09-23 22:00:39 +08:00
|
|
|
// we validate the result
|
|
|
|
{
|
|
|
|
auto serial = simdjson::minify(doc);
|
|
|
|
dom::element doc2; // we parse the minified output
|
|
|
|
if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
|
|
|
|
auto serial2 = simdjson::minify(doc2); // we minify a second time
|
|
|
|
if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
|
|
|
|
}
|
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(serialize_twitter)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
|
|
|
|
|
|
|
static void serialize_big_string_to_string(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
std::vector<char> content;
|
|
|
|
content.push_back('\"');
|
|
|
|
for(size_t i = 0 ; i < 100000; i ++) {
|
|
|
|
content.push_back('0' + char(i%10)); // we add what looks like a long list of digits
|
|
|
|
}
|
|
|
|
content.push_back('\"');
|
|
|
|
dom::element doc;
|
|
|
|
simdjson::error_code error;
|
|
|
|
if ((error = parser.parse(content.data(), content.size()).get(doc))) {
|
|
|
|
cerr << "could not parse big string" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
size_t bytes = 0;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-09-23 22:00:39 +08:00
|
|
|
auto serial = simdjson::to_string(doc);
|
|
|
|
bytes += serial.size();
|
|
|
|
benchmark::DoNotOptimize(serial);
|
|
|
|
}
|
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(serialize_big_string_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
|
|
|
|
|
|
|
static void serialize_twitter_to_string(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// we do not want mem. alloc. in the loop.
|
|
|
|
if((error = parser.allocate(docdata.size()))) {
|
|
|
|
cout << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dom::element doc;
|
|
|
|
if ((error = parser.parse(docdata).get(doc))) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
size_t bytes = 0;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-09-23 22:00:39 +08:00
|
|
|
auto serial = simdjson::to_string(doc);
|
|
|
|
bytes += serial.size();
|
|
|
|
benchmark::DoNotOptimize(serial);
|
|
|
|
}
|
|
|
|
// we validate the result
|
|
|
|
{
|
|
|
|
auto serial = simdjson::to_string(doc);
|
|
|
|
dom::element doc2; // we parse the stringify output
|
|
|
|
if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
|
|
|
|
auto serial2 = simdjson::to_string(doc2); // we stringify again
|
|
|
|
if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
|
|
|
|
}
|
2020-08-01 03:29:10 +08:00
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
2020-09-23 22:00:39 +08:00
|
|
|
BENCHMARK(serialize_twitter_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
2020-08-01 03:29:10 +08:00
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
2020-04-15 22:15:48 +08:00
|
|
|
|
2020-09-23 22:00:39 +08:00
|
|
|
static void serialize_twitter_string_builder(State& state) {
|
|
|
|
dom::parser parser;
|
|
|
|
padded_string docdata;
|
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(docdata);
|
|
|
|
if(error) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// we do not want mem. alloc. in the loop.
|
|
|
|
if((error = parser.allocate(docdata.size()))) {
|
|
|
|
cout << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dom::element doc;
|
|
|
|
if ((error = parser.parse(docdata).get(doc))) {
|
|
|
|
cerr << "could not parse twitter.json" << error << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
size_t bytes = 0;
|
|
|
|
simdjson::internal::string_builder<> sb;// not part of our public API, for internal use
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-09-23 22:00:39 +08:00
|
|
|
sb.clear();
|
|
|
|
sb.append(doc);
|
|
|
|
std::string_view serial = sb.str();
|
|
|
|
bytes += serial.size();
|
|
|
|
benchmark::DoNotOptimize(serial);
|
|
|
|
}
|
|
|
|
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
|
|
|
state.counters["Gigabytes"] = benchmark::Counter(
|
|
|
|
double(bytes), benchmark::Counter::kIsRate,
|
|
|
|
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
|
|
|
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
|
|
|
}
|
|
|
|
BENCHMARK(serialize_twitter_string_builder)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
|
|
|
return *(std::max_element(std::begin(v), std::end(v)));
|
|
|
|
})->DisplayAggregatesOnly(true);
|
|
|
|
|
|
|
|
|
2020-04-15 22:15:48 +08:00
|
|
|
static void numbers_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr;
|
|
|
|
simdjson::error_code error;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(NUMBERS_JSON).get(arr))) {
|
2020-06-20 07:00:11 +08:00
|
|
|
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
|
2020-04-15 22:15:48 +08:00
|
|
|
return;
|
|
|
|
}
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
for (auto e : arr) {
|
|
|
|
double x;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = e.get(x))) { cerr << "found a node that is not an number: " << error << endl; break;}
|
2020-04-15 22:15:48 +08:00
|
|
|
container.push_back(x);
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_scan);
|
|
|
|
|
|
|
|
static void numbers_size_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr;
|
|
|
|
simdjson::error_code error;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(NUMBERS_JSON).get(arr))) {
|
2020-06-20 07:00:11 +08:00
|
|
|
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
|
2020-04-15 22:15:48 +08:00
|
|
|
return;
|
|
|
|
}
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
container.resize(arr.size());
|
|
|
|
size_t pos = 0;
|
|
|
|
for (auto e : arr) {
|
|
|
|
double x;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = e.get(x))) { cerr << "found a node that is not an number: " << error << endl; break;}
|
2020-04-15 22:15:48 +08:00
|
|
|
container[pos++] = x;
|
|
|
|
}
|
|
|
|
if(pos != container.size()) { cerr << "bad count" << endl; }
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_size_scan);
|
|
|
|
|
|
|
|
|
2020-04-16 08:09:45 +08:00
|
|
|
static void numbers_type_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr;
|
|
|
|
simdjson::error_code error;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(NUMBERS_JSON).get(arr))) {
|
2020-04-16 08:09:45 +08:00
|
|
|
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
|
|
|
|
return;
|
|
|
|
}
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-16 08:09:45 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
for (auto e : arr) {
|
|
|
|
dom::element_type actual_type = e.type();
|
|
|
|
if(actual_type != dom::element_type::DOUBLE) {
|
|
|
|
cerr << "found a node that is not an number?" << endl; break;
|
|
|
|
}
|
|
|
|
double x;
|
2020-06-21 03:04:23 +08:00
|
|
|
error = e.get(x);
|
2020-04-16 08:09:45 +08:00
|
|
|
container.push_back(x);
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_type_scan);
|
|
|
|
|
|
|
|
static void numbers_type_size_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr;
|
|
|
|
simdjson::error_code error;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(NUMBERS_JSON).get(arr))) {
|
2020-06-20 07:00:11 +08:00
|
|
|
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
|
2020-04-16 08:09:45 +08:00
|
|
|
return;
|
|
|
|
}
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-16 08:09:45 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
container.resize(arr.size());
|
|
|
|
size_t pos = 0;
|
|
|
|
for (auto e : arr) {
|
|
|
|
dom::element_type actual_type = e.type();
|
|
|
|
if(actual_type != dom::element_type::DOUBLE) {
|
|
|
|
cerr << "found a node that is not an number?" << endl; break;
|
|
|
|
}
|
|
|
|
double x;
|
2020-06-21 03:04:23 +08:00
|
|
|
error = e.get(x);
|
2020-04-16 08:09:45 +08:00
|
|
|
container[pos++] = x;
|
|
|
|
}
|
|
|
|
if(pos != container.size()) { cerr << "bad count" << endl; }
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_type_size_scan);
|
|
|
|
|
2020-04-15 22:15:48 +08:00
|
|
|
static void numbers_load_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr;
|
|
|
|
simdjson::error_code error;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
// this may hit the disk, but probably just once
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(NUMBERS_JSON).get(arr))) {
|
2020-06-20 07:00:11 +08:00
|
|
|
cerr << "could not read " << NUMBERS_JSON << " as an array: " << error << endl;
|
2020-04-15 22:15:48 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
std::vector<double> container;
|
|
|
|
for (auto e : arr) {
|
|
|
|
double x;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = e.get(x))) { cerr << "found a node that is not an number: " << error << endl; break;}
|
2020-04-15 22:15:48 +08:00
|
|
|
container.push_back(x);
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_load_scan);
|
|
|
|
|
|
|
|
static void numbers_load_size_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr;
|
|
|
|
simdjson::error_code error;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
// this may hit the disk, but probably just once
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(NUMBERS_JSON).get(arr))) {
|
2020-04-15 22:15:48 +08:00
|
|
|
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
std::vector<double> container;
|
|
|
|
container.resize(arr.size());
|
|
|
|
size_t pos = 0;
|
|
|
|
for (auto e : arr) {
|
|
|
|
double x;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = e.get(x))) { cerr << "found a node that is not an number?" << endl; break;}
|
2020-04-15 22:15:48 +08:00
|
|
|
container[pos++] = x;
|
|
|
|
}
|
|
|
|
if(pos != container.size()) { cerr << "bad count" << endl; }
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_load_size_scan);
|
|
|
|
|
2020-04-07 00:45:45 +08:00
|
|
|
|
2020-03-14 08:43:24 +08:00
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
|
2020-04-15 22:15:48 +08:00
|
|
|
|
|
|
|
static void numbers_exceptions_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr = parser.load(NUMBERS_JSON);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
for (double x : arr) {
|
|
|
|
container.push_back(x);
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_exceptions_scan);
|
|
|
|
|
|
|
|
static void numbers_exceptions_size_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr = parser.load(NUMBERS_JSON);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
container.resize(arr.size());
|
|
|
|
size_t pos = 0;
|
2020-04-16 08:45:40 +08:00
|
|
|
for (auto e : arr) {
|
|
|
|
container[pos++] = double(e);
|
2020-04-15 22:15:48 +08:00
|
|
|
}
|
|
|
|
if(pos != container.size()) { cerr << "bad count" << endl; }
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_exceptions_size_scan);
|
|
|
|
|
|
|
|
|
2020-04-16 08:45:40 +08:00
|
|
|
|
|
|
|
static void numbers_type_exceptions_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr = parser.load(NUMBERS_JSON);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-16 08:45:40 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
for (auto e : arr) {
|
|
|
|
dom::element_type actual_type = e.type();
|
|
|
|
if(actual_type != dom::element_type::DOUBLE) {
|
|
|
|
cerr << "found a node that is not an number?" << endl; break;
|
|
|
|
}
|
|
|
|
container.push_back(double(e));
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_type_exceptions_scan);
|
|
|
|
|
|
|
|
static void numbers_type_exceptions_size_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
|
|
|
dom::array arr = parser.load(NUMBERS_JSON);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-16 08:45:40 +08:00
|
|
|
std::vector<double> container;
|
|
|
|
container.resize(arr.size());
|
|
|
|
size_t pos = 0;
|
|
|
|
for (auto e : arr) {
|
|
|
|
dom::element_type actual_type = e.type();
|
|
|
|
if(actual_type != dom::element_type::DOUBLE) {
|
|
|
|
cerr << "found a node that is not an number?" << endl; break;
|
|
|
|
}
|
|
|
|
container[pos++] = double(e);
|
|
|
|
}
|
|
|
|
if(pos != container.size()) { cerr << "bad count" << endl; }
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_type_exceptions_size_scan);
|
|
|
|
|
2020-04-15 22:15:48 +08:00
|
|
|
static void numbers_exceptions_load_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
// this may hit the disk, but probably just once
|
|
|
|
dom::array arr = parser.load(NUMBERS_JSON);
|
|
|
|
std::vector<double> container;
|
|
|
|
for (double x : arr) {
|
|
|
|
container.push_back(x);
|
|
|
|
}
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_exceptions_load_scan);
|
|
|
|
|
|
|
|
static void numbers_exceptions_load_size_scan(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
|
|
|
dom::parser parser;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-04-15 22:15:48 +08:00
|
|
|
// this may hit the disk, but probably just once
|
|
|
|
dom::array arr = parser.load(NUMBERS_JSON);
|
|
|
|
std::vector<double> container;
|
|
|
|
container.resize(arr.size());
|
|
|
|
size_t pos = 0;
|
|
|
|
for (double x : arr) {
|
|
|
|
container[pos++] = x;
|
|
|
|
}
|
|
|
|
if(pos != container.size()) { cerr << "bad count" << endl; }
|
|
|
|
benchmark::DoNotOptimize(container.data());
|
|
|
|
benchmark::ClobberMemory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(numbers_exceptions_load_size_scan);
|
|
|
|
|
|
|
|
|
2020-02-25 12:59:38 +08:00
|
|
|
static void twitter_count(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-04-07 00:45:45 +08:00
|
|
|
dom::element doc = parser.load(TWITTER_JSON);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-25 12:59:38 +08:00
|
|
|
uint64_t result_count = doc["search_metadata"]["count"];
|
|
|
|
if (result_count != 100) { return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(twitter_count);
|
|
|
|
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_PUSH_DISABLE_WARNINGS
|
|
|
|
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
2020-02-25 12:59:38 +08:00
|
|
|
static void iterator_twitter_count(State& state) {
|
|
|
|
// Prints the number of results in twitter.json
|
2020-04-07 00:45:45 +08:00
|
|
|
padded_string json = padded_string::load(TWITTER_JSON);
|
2020-03-26 03:03:03 +08:00
|
|
|
ParsedJson pj = build_parsed_json(json);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-03-26 03:03:03 +08:00
|
|
|
ParsedJson::Iterator iter(pj);
|
2020-02-25 12:59:38 +08:00
|
|
|
// uint64_t result_count = doc["search_metadata"]["count"];
|
|
|
|
if (!iter.move_to_key("search_metadata")) { return; }
|
|
|
|
if (!iter.move_to_key("count")) { return; }
|
|
|
|
if (!iter.is_integer()) { return; }
|
|
|
|
int64_t result_count = iter.get_integer();
|
|
|
|
|
|
|
|
if (result_count != 100) { return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(iterator_twitter_count);
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_POP_DISABLE_WARNINGS
|
2020-02-25 12:59:38 +08:00
|
|
|
|
|
|
|
static void twitter_default_profile(State& state) {
|
|
|
|
// Count unique users with a default profile.
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-04-07 00:45:45 +08:00
|
|
|
dom::element doc = parser.load(TWITTER_JSON);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-25 12:59:38 +08:00
|
|
|
set<string_view> default_users;
|
2020-06-20 07:00:11 +08:00
|
|
|
for (dom::object tweet : doc["statuses"]) {
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::object user = tweet["user"];
|
2020-02-25 12:59:38 +08:00
|
|
|
if (user["default_profile"]) {
|
|
|
|
default_users.insert(user["screen_name"]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (default_users.size() != 86) { return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(twitter_default_profile);
|
|
|
|
|
2020-03-14 08:43:24 +08:00
|
|
|
static void twitter_image_sizes(State& state) {
|
|
|
|
// Count unique image sizes
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-04-07 00:45:45 +08:00
|
|
|
dom::element doc = parser.load(TWITTER_JSON);
|
2020-06-20 07:00:11 +08:00
|
|
|
simdjson::error_code error;
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-03-14 08:43:24 +08:00
|
|
|
set<tuple<uint64_t, uint64_t>> image_sizes;
|
2020-06-20 07:00:11 +08:00
|
|
|
for (dom::object tweet : doc["statuses"]) {
|
|
|
|
dom::array media;
|
2020-06-21 03:04:23 +08:00
|
|
|
if (not (error = tweet["entities"]["media"].get(media))) {
|
2020-06-20 07:00:11 +08:00
|
|
|
for (dom::object image : media) {
|
2020-04-15 08:26:26 +08:00
|
|
|
for (auto size : image["sizes"].get<dom::object>()) {
|
|
|
|
image_sizes.insert({ size.value["w"], size.value["h"] });
|
2020-03-14 08:43:24 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (image_sizes.size() != 15) { return; };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(twitter_image_sizes);
|
|
|
|
|
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
|
|
|
|
static void error_code_twitter_count(State& state) noexcept {
|
|
|
|
// Prints the number of results in twitter.json
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-06-20 07:00:11 +08:00
|
|
|
simdjson::error_code error;
|
|
|
|
dom::element doc;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(TWITTER_JSON).get(doc))) { return; }
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-06-20 07:00:11 +08:00
|
|
|
uint64_t value;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = doc["search_metadata"]["count"].get(value))) { return; }
|
2020-03-14 08:43:24 +08:00
|
|
|
if (value != 100) { return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(error_code_twitter_count);
|
|
|
|
|
2020-02-25 12:59:38 +08:00
|
|
|
static void error_code_twitter_default_profile(State& state) noexcept {
|
|
|
|
// Count unique users with a default profile.
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-06-20 07:00:11 +08:00
|
|
|
simdjson::error_code error;
|
|
|
|
dom::element doc;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(TWITTER_JSON).get(doc))) { std::cerr << error << std::endl; return; }
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-25 12:59:38 +08:00
|
|
|
set<string_view> default_users;
|
|
|
|
|
2020-06-20 07:00:11 +08:00
|
|
|
dom::array tweets;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = doc["statuses"].get(tweets))) { return; }
|
2020-03-29 02:43:41 +08:00
|
|
|
for (dom::element tweet : tweets) {
|
2020-06-20 07:00:11 +08:00
|
|
|
dom::object user;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = tweet["user"].get(user))) { return; }
|
2020-06-20 07:00:11 +08:00
|
|
|
bool default_profile;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = user["default_profile"].get(default_profile))) { return; }
|
2020-02-25 12:59:38 +08:00
|
|
|
if (default_profile) {
|
2020-06-20 07:00:11 +08:00
|
|
|
std::string_view screen_name;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = user["screen_name"].get(screen_name))) { return; }
|
2020-02-25 12:59:38 +08:00
|
|
|
default_users.insert(screen_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (default_users.size() != 86) { return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(error_code_twitter_default_profile);
|
|
|
|
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_PUSH_DISABLE_WARNINGS
|
|
|
|
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
2020-02-25 12:59:38 +08:00
|
|
|
static void iterator_twitter_default_profile(State& state) {
|
|
|
|
// Count unique users with a default profile.
|
2020-06-21 03:04:23 +08:00
|
|
|
padded_string json;
|
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(json);
|
|
|
|
if (error) { std::cerr << error << std::endl; return; }
|
2020-03-26 03:03:03 +08:00
|
|
|
ParsedJson pj = build_parsed_json(json);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-25 12:59:38 +08:00
|
|
|
set<string_view> default_users;
|
2020-03-26 03:03:03 +08:00
|
|
|
ParsedJson::Iterator iter(pj);
|
2020-02-25 12:59:38 +08:00
|
|
|
|
2020-06-22 05:36:38 +08:00
|
|
|
// for (dom::object tweet : doc["statuses"]) {
|
2020-02-25 12:59:38 +08:00
|
|
|
if (!(iter.move_to_key("statuses") && iter.is_array())) { return; }
|
|
|
|
if (iter.down()) { // first status
|
|
|
|
do {
|
|
|
|
|
2020-03-29 02:43:41 +08:00
|
|
|
// dom::object user = tweet["user"];
|
2020-02-25 12:59:38 +08:00
|
|
|
if (!(iter.move_to_key("user") && iter.is_object())) { return; }
|
|
|
|
|
|
|
|
// if (user["default_profile"]) {
|
|
|
|
if (iter.move_to_key("default_profile")) {
|
|
|
|
if (iter.is_true()) {
|
|
|
|
if (!iter.up()) { return; } // back to user
|
|
|
|
|
|
|
|
// default_users.insert(user["screen_name"]);
|
|
|
|
if (!(iter.move_to_key("screen_name") && iter.is_string())) { return; }
|
|
|
|
default_users.insert(string_view(iter.get_string(), iter.get_string_length()));
|
|
|
|
}
|
|
|
|
if (!iter.up()) { return; } // back to user
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!iter.up()) { return; } // back to status
|
|
|
|
|
|
|
|
} while (iter.next()); // next status
|
|
|
|
}
|
|
|
|
|
|
|
|
if (default_users.size() != 86) { return; }
|
|
|
|
}
|
|
|
|
}
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_POP_DISABLE_WARNINGS
|
2020-02-25 12:59:38 +08:00
|
|
|
BENCHMARK(iterator_twitter_default_profile);
|
|
|
|
|
|
|
|
static void error_code_twitter_image_sizes(State& state) noexcept {
|
|
|
|
// Count unique image sizes
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-06-20 07:00:11 +08:00
|
|
|
simdjson::error_code error;
|
|
|
|
dom::element doc;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = parser.load(TWITTER_JSON).get(doc))) { std::cerr << error << std::endl; return; }
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-25 12:59:38 +08:00
|
|
|
set<tuple<uint64_t, uint64_t>> image_sizes;
|
2020-06-20 07:00:11 +08:00
|
|
|
dom::array statuses;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = doc["statuses"].get(statuses))) { return; }
|
2020-03-29 02:43:41 +08:00
|
|
|
for (dom::element tweet : statuses) {
|
2020-06-20 07:00:11 +08:00
|
|
|
dom::array images;
|
2020-06-21 03:04:23 +08:00
|
|
|
if (not (error = tweet["entities"]["media"].get(images))) {
|
2020-03-29 02:43:41 +08:00
|
|
|
for (dom::element image : images) {
|
2020-06-20 07:00:11 +08:00
|
|
|
dom::object sizes;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = image["sizes"].get(sizes))) { return; }
|
2020-04-15 08:26:26 +08:00
|
|
|
for (auto size : sizes) {
|
2020-06-20 07:00:11 +08:00
|
|
|
uint64_t width, height;
|
2020-06-21 03:04:23 +08:00
|
|
|
if ((error = size.value["w"].get(width))) { return; }
|
|
|
|
if ((error = size.value["h"].get(height))) { return; }
|
2020-02-25 12:59:38 +08:00
|
|
|
image_sizes.insert({ width, height });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (image_sizes.size() != 15) { return; };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(error_code_twitter_image_sizes);
|
|
|
|
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_PUSH_DISABLE_WARNINGS
|
|
|
|
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
2020-02-25 12:59:38 +08:00
|
|
|
static void iterator_twitter_image_sizes(State& state) {
|
|
|
|
// Count unique image sizes
|
2020-06-20 07:00:11 +08:00
|
|
|
padded_string json;
|
2020-06-21 03:04:23 +08:00
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(json);
|
|
|
|
if (error) { std::cerr << error << std::endl; return; }
|
2020-03-26 03:03:03 +08:00
|
|
|
ParsedJson pj = build_parsed_json(json);
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-02-25 12:59:38 +08:00
|
|
|
set<tuple<uint64_t, uint64_t>> image_sizes;
|
2020-03-26 03:03:03 +08:00
|
|
|
ParsedJson::Iterator iter(pj);
|
2020-02-25 12:59:38 +08:00
|
|
|
|
2020-06-22 05:36:38 +08:00
|
|
|
// for (dom::object tweet : doc["statuses"]) {
|
2020-02-25 12:59:38 +08:00
|
|
|
if (!(iter.move_to_key("statuses") && iter.is_array())) { return; }
|
|
|
|
if (iter.down()) { // first status
|
|
|
|
do {
|
|
|
|
|
2020-06-22 06:26:44 +08:00
|
|
|
// dom::object media;
|
|
|
|
// not_found = tweet["entities"]["media"].get(media);
|
2020-02-25 12:59:38 +08:00
|
|
|
// if (!not_found) {
|
|
|
|
if (iter.move_to_key("entities")) {
|
|
|
|
if (!iter.is_object()) { return; }
|
|
|
|
if (iter.move_to_key("media")) {
|
|
|
|
if (!iter.is_array()) { return; }
|
|
|
|
|
2020-06-22 05:36:38 +08:00
|
|
|
// for (dom::object image : media) {
|
2020-02-25 12:59:38 +08:00
|
|
|
if (iter.down()) { // first media
|
|
|
|
do {
|
|
|
|
|
2020-06-22 06:26:44 +08:00
|
|
|
// for (auto [key, size] : dom::object(image["sizes"])) {
|
2020-02-25 12:59:38 +08:00
|
|
|
if (!(iter.move_to_key("sizes") && iter.is_object())) { return; }
|
|
|
|
if (iter.down()) { // first size
|
|
|
|
do {
|
|
|
|
iter.move_to_value();
|
|
|
|
|
|
|
|
// image_sizes.insert({ size["w"], size["h"] });
|
|
|
|
if (!(iter.move_to_key("w")) && !iter.is_integer()) { return; }
|
|
|
|
uint64_t width = iter.get_integer();
|
|
|
|
if (!iter.up()) { return; } // back to size
|
|
|
|
if (!(iter.move_to_key("h")) && !iter.is_integer()) { return; }
|
|
|
|
uint64_t height = iter.get_integer();
|
|
|
|
if (!iter.up()) { return; } // back to size
|
|
|
|
image_sizes.insert({ width, height });
|
|
|
|
|
|
|
|
} while (iter.next()); // next size
|
|
|
|
if (!iter.up()) { return; } // back to sizes
|
|
|
|
}
|
|
|
|
if (!iter.up()) { return; } // back to image
|
|
|
|
} while (iter.next()); // next image
|
|
|
|
if (!iter.up()) { return; } // back to media
|
|
|
|
}
|
|
|
|
if (!iter.up()) { return; } // back to entities
|
|
|
|
}
|
|
|
|
if (!iter.up()) { return; } // back to status
|
|
|
|
}
|
|
|
|
} while (iter.next()); // next status
|
|
|
|
}
|
|
|
|
|
|
|
|
if (image_sizes.size() != 15) { return; };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(iterator_twitter_image_sizes);
|
|
|
|
|
2020-03-10 00:08:06 +08:00
|
|
|
static void print_json(State& state) noexcept {
|
|
|
|
// Prints the number of results in twitter.json
|
2020-03-29 02:43:41 +08:00
|
|
|
dom::parser parser;
|
2020-06-21 03:04:23 +08:00
|
|
|
|
|
|
|
padded_string json;
|
|
|
|
auto error = padded_string::load(TWITTER_JSON).get(json);
|
|
|
|
if (error) { std::cerr << error << std::endl; return; }
|
|
|
|
|
2020-06-20 07:00:11 +08:00
|
|
|
int code = json_parse(json, parser);
|
|
|
|
if (code) { cerr << error_message(code) << endl; return; }
|
2020-10-13 21:18:54 +08:00
|
|
|
for (simdjson_unused auto _ : state) {
|
2020-03-10 00:08:06 +08:00
|
|
|
std::stringstream s;
|
|
|
|
if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(print_json);
|
2020-03-29 05:48:43 +08:00
|
|
|
SIMDJSON_POP_DISABLE_WARNINGS
|
2020-03-10 00:08:06 +08:00
|
|
|
|
2020-02-25 12:59:38 +08:00
|
|
|
BENCHMARK_MAIN();
|