Fix SAX benchmarks to actually push to vector

This commit is contained in:
John Keiser 2020-08-21 13:46:56 -07:00
parent 2657e5e226
commit 98be2c91df
3 changed files with 37 additions and 37 deletions

View File

@ -22,7 +22,6 @@ using std::cerr;
using std::endl;
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
const int REPETITIONS = 10;
#if SIMDJSON_IMPLEMENTATION_HASWELL
@ -188,20 +187,21 @@ static void sax_tweets(State &state) {
padded_string json;
if (auto error = padded_string::load(TWITTER_JSON).get(json)) { cerr << error << endl; return; }
// Allocate
twitter::sax_tweet_reader reader;
if (auto error = reader.set_capacity(json.size())) { cerr << error << endl; return; }
// Warm the vector
if (auto error = reader.read_tweets(json)) { throw error; }
// Read tweets
size_t bytes = 0;
size_t tweets = 0;
for (SIMDJSON_UNUSED auto _ : state) {
{
// Yes, we leak this. Destructor issues. TODO fix that
twitter::sax_tweet_reader reader;
// Warm the vector and allocate capacity
if (auto error = reader.read_tweets(json)) { throw error; }
bytes += json.size();
tweets += reader.tweets.size();
// Read tweets
for (SIMDJSON_UNUSED auto _ : state) {
if (auto error = reader.read_tweets(json)) { throw error; }
bytes += json.size();
tweets += reader.tweets.size();
}
}
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
state.counters["bytes"] = benchmark::Counter(

View File

@ -2,8 +2,8 @@
#define TWITTER_SAX_TWEET_READER_H
#include "simdjson.h"
#include "sax_tweet_reader_visitor.h"
#include "tweet.h"
#include "sax_tweet_reader_visitor.h"
#include <vector>
SIMDJSON_TARGET_HASWELL
@ -16,20 +16,16 @@ using namespace haswell;
using namespace haswell::stage2;
struct sax_tweet_reader {
std::vector<tweet> tweets;
std::unique_ptr<uint8_t[]> string_buf;
size_t capacity;
dom_parser_implementation dom_parser;
std::vector<tweet> tweets{};
std::unique_ptr<uint8_t[]> string_buf{};
size_t capacity{};
dom_parser_implementation dom_parser{};
sax_tweet_reader();
error_code set_capacity(size_t new_capacity);
error_code allocate(size_t new_capacity);
error_code read_tweets(padded_string &json);
}; // struct tweet_reader
sax_tweet_reader::sax_tweet_reader() : tweets{}, string_buf{}, capacity{0}, dom_parser() {
}
error_code sax_tweet_reader::set_capacity(size_t new_capacity) {
error_code sax_tweet_reader::allocate(size_t new_capacity) {
// string_capacity copied from document::allocate
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
@ -46,11 +42,11 @@ error_code sax_tweet_reader::read_tweets(padded_string &json) {
// Allocate capacity if needed
tweets.clear();
if (capacity < json.size()) {
if (auto error = set_capacity(capacity)) { return error; }
if (auto error = allocate(json.size())) { return error; }
}
// Run stage 1 first.
if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { return error; }
if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { std::cout << error << std::endl; return error; }
// Then walk the document, parsing the tweets as we go
json_iterator iter(dom_parser, 0);

View File

@ -8,6 +8,7 @@
SIMDJSON_TARGET_HASWELL
namespace twitter {
namespace {
using namespace simdjson;
using namespace haswell;
@ -15,7 +16,7 @@ using namespace haswell::stage2;
struct sax_tweet_reader_visitor {
public:
sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *string_buf);
simdjson_really_inline sax_tweet_reader_visitor(std::vector<tweet> &tweets, uint8_t *string_buf);
simdjson_really_inline error_code visit_document_start(json_iterator &iter);
simdjson_really_inline error_code visit_object_start(json_iterator &iter);
@ -68,8 +69,8 @@ private:
field_type type{field_type::any};
};
containers container{containers::document};
std::vector<tweet> &tweets;
containers container{containers::document};
uint8_t *current_string_buf_loc;
const uint8_t *current_key{};
@ -94,9 +95,9 @@ private:
static field_lookup fields;
}; // sax_tweet_reader_visitor
sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *string_buf)
simdjson_really_inline sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *_string_buf)
: tweets{_tweets},
current_string_buf_loc{string_buf} {
current_string_buf_loc{_string_buf} {
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_start(json_iterator &iter) {
@ -112,6 +113,7 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_start(js
switch (fields.get(current_key, container).type) {
case field_type::array: // { "statuses": [
start_container(iter);
current_key = nullptr;
return SUCCESS;
case field_type::any:
return SUCCESS;
@ -190,6 +192,7 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_primitive(json
iter.log_error("unexpected primitive");
return INCORRECT_TYPE;
}
current_key = nullptr;
}
// If it's not a field, it's a child of an array.
@ -202,16 +205,17 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_end(json
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_object_end(json_iterator &iter) {
current_key = nullptr;
if (in_container(iter)) { end_container(iter); }
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &iter) {
iter.log_end_value("document");
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &) {
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_array(json_iterator &) {
current_key = nullptr;
return SUCCESS;
}
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_object(json_iterator &) {
@ -233,16 +237,15 @@ simdjson_really_inline bool sax_tweet_reader_visitor::in_container_child(json_it
simdjson_really_inline void sax_tweet_reader_visitor::start_container(json_iterator &iter) {
SIMDJSON_ASSUME(iter.depth <= MAX_SUPPORTED_DEPTH); // Asserts in debug mode
container = containers(iter.depth);
if (logger::LOG_ENABLED) { iter.log_value(STATE_NAMES[iter.depth]); }
if (container == containers::tweet) { tweets.push_back({}); }
if (logger::LOG_ENABLED) { iter.log_start_value(STATE_NAMES[iter.depth]); }
}
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &iter) {
if (logger::LOG_ENABLED) { iter.log_end_value(STATE_NAMES[int(container)]); }
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &) {
container = containers(int(container) - 1);
}
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
iter.log_value(f.key);
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
if (auto error = numberparsing::parse_unsigned(value).get(*i)) {
// If number parsing failed, check if it's null before returning the error
if (!atomparsing::is_valid_null_atom(value)) { iter.log_error("expected number or null"); return error; }
@ -252,12 +255,12 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsig
}
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
iter.log_value(f.key);
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
return numberparsing::parse_unsigned(value).get(*i);
}
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_string(json_iterator &iter, const uint8_t *value, const field &f) {
iter.log_value(f.key);
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
return stringparsing::parse_string_to_buffer(value, current_string_buf_loc, *s);
}
@ -513,6 +516,7 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() {
// }
// }
} // unnamed namespace
} // namespace twitter
SIMDJSON_UNTARGET_REGION