Recommend simdjson::ondemand over simdjson::builtin::ondemand (#1380)

Co-authored-by: Daniel Lemire <lemire@gmail.com>
This commit is contained in:
John Keiser 2021-01-14 14:33:49 -08:00 committed by GitHub
parent 92372412d9
commit 55faf4c5bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 288 additions and 33 deletions

View File

@ -79,7 +79,6 @@ alpha right now. More information can be found in the [On Demand Guide](doc/onde
```c++
#include "simdjson.h"
using namespace simdjson;
using namespace simdjson::builtin; // for ondemand
int main(void) {
ondemand::parser parser;
padded_string json = padded_string::load("twitter.json");

View File

@ -7,7 +7,6 @@
namespace distinct_user_id {
using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
ondemand::parser parser{};

View File

@ -7,7 +7,6 @@
namespace find_tweet {
using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
using StringType=std::string_view;

View File

@ -7,7 +7,6 @@
namespace kostya {
using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
static constexpr diff_flags DiffFlags = diff_flags::NONE;

View File

@ -7,7 +7,6 @@
namespace large_random {
using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
static constexpr diff_flags DiffFlags = diff_flags::NONE;

View File

@ -7,7 +7,6 @@
namespace large_random {
using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand_unordered {
static constexpr diff_flags DiffFlags = diff_flags::NONE;

View File

@ -0,0 +1,52 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
class Iter {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
simdjson_really_inline double first_double(ondemand::json_iterator &iter) {
if (iter.start_object().error() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
return iter.consume_double();
}
simdjson_really_inline double next_double(ondemand::json_iterator &iter) {
if (!iter.has_next_field() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
return iter.consume_double();
}
};
simdjson_really_inline bool Iter::Run(const padded_string &json) {
container.clear();
auto iter = parser.iterate_raw(json).value();
if (iter.start_array()) {
do {
container.emplace_back(my_point{first_double(iter), next_double(iter), next_double(iter)});
if (iter.has_next_field()) { throw "Too many fields"; }
} while (iter.has_next_element());
}
return true;
}
BENCHMARK_TEMPLATE(LargeRandom, Iter);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,37 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
class OnDemand {
public:
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
ondemand::parser parser{};
std::vector<my_point> container{};
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
container.clear();
auto doc = parser.iterate(json);
for (ondemand::object coord : doc) {
container.emplace_back(my_point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
}
return true;
}
BENCHMARK_TEMPLATE(LargeRandom, OnDemand);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

120
benchmark/largerandom/sax.h Normal file
View File

@ -0,0 +1,120 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "largerandom.h"
namespace largerandom {
using namespace simdjson;
using namespace simdjson::builtin::stage2;
class Sax {
public:
simdjson_really_inline bool Run(const padded_string &json) noexcept;
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
simdjson_really_inline size_t ItemCount() { return container.size(); }
private:
simdjson_really_inline error_code RunNoExcept(const padded_string &json) noexcept;
error_code Allocate(size_t new_capacity);
std::unique_ptr<uint8_t[]> string_buf{};
size_t capacity{};
dom_parser_implementation dom_parser{};
std::vector<my_point> container{};
};
struct sax_point_reader_visitor {
public:
std::vector<my_point> &points;
enum {GOT_X=0, GOT_Y=1, GOT_Z=2, GOT_SOMETHING_ELSE=4};
size_t idx{GOT_SOMETHING_ELSE};
double buffer[3]={};
explicit sax_point_reader_visitor(std::vector<my_point> &_points) : points(_points) {}
simdjson_really_inline error_code visit_object_start(json_iterator &) {
idx = 0;
return SUCCESS;
}
simdjson_really_inline error_code visit_primitive(json_iterator &, const uint8_t *value) {
if(idx == GOT_SOMETHING_ELSE) { return simdjson::SUCCESS; }
return numberparsing::parse_double(value).get(buffer[idx]);
}
simdjson_really_inline error_code visit_object_end(json_iterator &) {
points.emplace_back(my_point{buffer[0], buffer[1], buffer[2]});
return SUCCESS;
}
simdjson_really_inline error_code visit_document_start(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t * key) {
switch(key[1]) {
// Technically, we should check the other characters
// in the key, but we are cheating to go as fast
// as possible.
case 'x':
idx = GOT_X;
break;
case 'y':
idx = GOT_Y;
break;
case 'z':
idx = GOT_Z;
break;
default:
idx = GOT_SOMETHING_ELSE;
}
return SUCCESS;
}
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_document_end(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_empty_array(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_empty_object(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_root_primitive(json_iterator &, const uint8_t *) { return SUCCESS; }
simdjson_really_inline error_code increment_count(json_iterator &) { return SUCCESS; }
};
// NOTE: this assumes the dom_parser is already allocated
bool Sax::Run(const padded_string &json) noexcept {
auto error = RunNoExcept(json);
if (error) { std::cerr << error << std::endl; return false; }
return true;
}
error_code Sax::RunNoExcept(const padded_string &json) noexcept {
container.clear();
// Allocate capacity if needed
if (capacity < json.size()) {
SIMDJSON_TRY( Allocate(json.size()) );
}
// Run stage 1 first.
SIMDJSON_TRY( dom_parser.stage1(json.u8data(), json.size(), false) );
// Then walk the document, parsing the tweets as we go
json_iterator iter(dom_parser, 0);
sax_point_reader_visitor visitor(container);
SIMDJSON_TRY( iter.walk_document<false>(visitor) );
return SUCCESS;
}
error_code Sax::Allocate(size_t new_capacity) {
// string_capacity copied from document::allocate
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
if (capacity == 0) { // set max depth the first time only
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
}
capacity = new_capacity;
return SUCCESS;
}
BENCHMARK_TEMPLATE(LargeRandom, Sax);
} // namespace largerandom
#endif // SIMDJSON_EXCEPTIONS

View File

@ -0,0 +1,63 @@
#pragma once
#if SIMDJSON_EXCEPTIONS
#include "partial_tweets.h"
namespace partial_tweets {
using namespace simdjson;
class OnDemand {
public:
OnDemand() {
if(!displayed_implementation) {
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
displayed_implementation = true;
}
}
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
private:
ondemand::parser parser{};
std::vector<tweet> tweets{};
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
if (value.is_null()) { return 0; }
return value;
}
simdjson_really_inline twitter_user read_user(ondemand::object user) {
return { user.find_field("id"), user.find_field("screen_name") };
}
static inline bool displayed_implementation = false;
};
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
tweets.clear();
// Walk the document, parsing the tweets as we go
auto doc = parser.iterate(json);
for (ondemand::object tweet : doc.find_field("statuses")) {
tweets.emplace_back(partial_tweets::tweet{
tweet.find_field("created_at"),
tweet.find_field("id"),
tweet.find_field("text"),
nullable_int(tweet.find_field("in_reply_to_status_id")),
read_user(tweet.find_field("user")),
tweet.find_field("retweet_count"),
tweet.find_field("favorite_count")
});
}
return true;
}
BENCHMARK_TEMPLATE(PartialTweets, OnDemand);
} // namespace partial_tweets
#endif // SIMDJSON_EXCEPTIONS

View File

@ -7,7 +7,6 @@
namespace partial_tweets {
using namespace simdjson;
using namespace simdjson::builtin;
struct simdjson_ondemand {
using StringType=std::string_view;

View File

@ -37,7 +37,6 @@ into your project. Then include it in your project with:
```c++
#include "simdjson.h"
using namespace simdjson; // optional
using namespace simdjson::builtin; // optional, for On Demand
```
You can generally compile with:

View File

@ -1,6 +1,5 @@
#include "simdjson.h"
using namespace simdjson;
using namespace simdjson::builtin; // for ondemand
int main(void) {
ondemand::parser parser;
padded_string json = padded_string::load("twitter.json");

View File

@ -1,6 +1,5 @@
#include "simdjson.h"
using namespace simdjson;
using namespace simdjson::builtin;
int main(void) {
padded_string json;
auto error = padded_string::load("twitter.json").get(json);

View File

@ -16,7 +16,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
#if SIMDJSON_EXCEPTIONS
try {
#endif
simdjson::builtin::ondemand::parser parser;
simdjson::ondemand::parser parser;
simdjson::padded_string padded(strings[0]);
auto doc = parser.iterate(padded);
if (doc.error()) {

View File

@ -31,6 +31,10 @@ namespace simdjson {
* code that uses it) will use westmere.
*/
namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION;
/**
* @overload simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand
*/
namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand;
/**
* Function which returns a pointer to an implementation matching the "builtin" implementation.
* The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling

View File

@ -212,7 +212,7 @@ public:
* JSON `{ "x": 1, "y": 2, "z": 3 }`:
*
* ```c++
* simdjson::builtin::ondemand::parser parser;
* simdjson::ondemand::parser parser;
* auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
* double z = obj.find_field("z");
* double y = obj.find_field("y");

View File

@ -26,7 +26,7 @@ public:
* JSON `{ "x": 1, "y": 2, "z": 3 }`:
*
* ```c++
* simdjson::builtin::ondemand::parser parser;
* simdjson::ondemand::parser parser;
* auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
* double z = obj.find_field("z");
* double y = obj.find_field("y");

View File

@ -221,7 +221,7 @@ public:
* JSON `{ "x": 1, "y": 2, "z": 3 }`:
*
* ```c++
* simdjson::builtin::ondemand::parser parser;
* simdjson::ondemand::parser parser;
* auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
* double z = obj.find_field("z");
* double y = obj.find_field("y");
@ -377,7 +377,7 @@ public:
* JSON `{ "x": 1, "y": 2, "z": 3 }`:
*
* ```c++
* simdjson::builtin::ondemand::parser parser;
* simdjson::ondemand::parser parser;
* auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
* double z = obj.find_field("z");
* double y = obj.find_field("y");

View File

@ -1,7 +1,7 @@
#include <iostream>
#include "simdjson.h"
using namespace simdjson::builtin;
using namespace simdjson;
int main() {
ondemand::parser parser;

View File

@ -2,7 +2,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace active_tests {

View File

@ -6,7 +6,6 @@
#include "simdjson.h"
using namespace simdjson;
using namespace simdjson::builtin;
// This ensures the compiler can't rearrange them into the proper order (which causes it to work!)
simdjson_never_inline bool check_point(simdjson_result<ondemand::value> xval, simdjson_result<ondemand::value> yval) {

View File

@ -3,7 +3,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
#if SIMDJSON_EXCEPTIONS

View File

@ -2,7 +2,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace dom_api_tests {
using namespace std;

View File

@ -2,7 +2,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace error_tests {
using namespace std;

View File

@ -2,7 +2,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace key_string_tests {
#if SIMDJSON_EXCEPTIONS

View File

@ -3,7 +3,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace number_tests {
@ -166,7 +165,7 @@ namespace number_tests {
void github_issue_1273() {
padded_string bad(std::string_view("0.0300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000024000000000000000000000000000000000000000000000000000000000000122978293824"));
simdjson::builtin::ondemand::parser parser;
simdjson::ondemand::parser parser;
simdjson_unused auto blah=parser.iterate(bad);
double x;
simdjson_unused auto blah2=blah.get(x);

View File

@ -2,7 +2,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace ordering_tests {
using namespace std;

View File

@ -2,7 +2,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace parse_api_tests {
using namespace std;

View File

@ -3,7 +3,6 @@
using namespace std;
using namespace simdjson;
using namespace simdjson::builtin;
using error_code=simdjson::error_code;
#if SIMDJSON_EXCEPTIONS

View File

@ -3,7 +3,6 @@
#include "test_ondemand.h"
using namespace simdjson;
using namespace simdjson::builtin;
namespace twitter_tests {
using namespace std;

View File

@ -7,7 +7,7 @@
#include "test_macros.h"
template<typename T, typename F>
bool test_ondemand(simdjson::builtin::ondemand::parser &parser, const simdjson::padded_string &json, const F& f) {
bool test_ondemand(simdjson::ondemand::parser &parser, const simdjson::padded_string &json, const F& f) {
auto doc = parser.iterate(json);
T val;
ASSERT_SUCCESS( doc.get(val) );
@ -15,17 +15,17 @@ bool test_ondemand(simdjson::builtin::ondemand::parser &parser, const simdjson::
}
template<typename T, typename F>
bool test_ondemand(const simdjson::padded_string &json, const F& f) {
simdjson::builtin::ondemand::parser parser;
simdjson::ondemand::parser parser;
return test_ondemand<T, F>(parser, json, f);
}
template<typename F>
bool test_ondemand_doc(simdjson::builtin::ondemand::parser &parser, const simdjson::padded_string &json, const F& f) {
bool test_ondemand_doc(simdjson::ondemand::parser &parser, const simdjson::padded_string &json, const F& f) {
return f(parser.iterate(json));
}
template<typename F>
bool test_ondemand_doc(const simdjson::padded_string &json, const F& f) {
simdjson::builtin::ondemand::parser parser;
simdjson::ondemand::parser parser;
return test_ondemand_doc(parser, json, f);
}

View File

@ -32,7 +32,7 @@ simdjson_really_inline bool equals_expected<const char *, const char *>(const ch
return !strcmp(actual, expected);
}
template<>
simdjson_really_inline bool equals_expected<simdjson::builtin::ondemand::raw_json_string, const char *>(simdjson::builtin::ondemand::raw_json_string actual, const char * expected) {
simdjson_really_inline bool equals_expected<simdjson::ondemand::raw_json_string, const char *>(simdjson::ondemand::raw_json_string actual, const char * expected) {
return actual == expected;
}