Document navigation API
This commit is contained in:
parent
9a9ca974c2
commit
0b21203141
|
@ -57,6 +57,7 @@ objs
|
|||
/allparsingcompetition
|
||||
/basictests
|
||||
/benchfeatures
|
||||
/benchmark/bench_dom_api
|
||||
/benchmark/bench_parse_call
|
||||
/benchmark/get_corpus_benchmark
|
||||
/benchmark/parse
|
||||
|
|
|
@ -12,6 +12,10 @@ add_executable(perfdiff perfdiff.cpp)
|
|||
|
||||
# Google Benchmarks
|
||||
if (SIMDJSON_GOOGLE_BENCHMARKS)
|
||||
add_cpp_benchmark(bench_parse_call bench_parse_call.cpp)
|
||||
add_cpp_benchmark(bench_parse_call)
|
||||
target_link_libraries(bench_parse_call benchmark::benchmark)
|
||||
|
||||
add_cpp_benchmark(bench_dom_api)
|
||||
target_link_libraries(bench_dom_api benchmark::benchmark)
|
||||
target_compile_definitions(bench_dom_api PRIVATE JSON_TEST_PATH="${PROJECT_SOURCE_DIR}/jsonexamples/twitter.json")
|
||||
endif()
|
|
@ -0,0 +1,233 @@
|
|||
#include <benchmark/benchmark.h>
|
||||
#include "simdjson/document.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
using namespace simdjson;
|
||||
using namespace benchmark;
|
||||
using namespace std;
|
||||
|
||||
#ifndef JSON_TEST_PATH
|
||||
#define JSON_TEST_PATH "jsonexamples/twitter.json"
|
||||
#endif
|
||||
|
||||
const padded_string EMPTY_ARRAY("[]", 2);
|
||||
|
||||
static void twitter_count(State& state) {
|
||||
// Prints the number of results in twitter.json
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
uint64_t result_count = doc["search_metadata"]["count"];
|
||||
if (result_count != 100) { return; }
|
||||
}
|
||||
}
|
||||
BENCHMARK(twitter_count);
|
||||
|
||||
static void error_code_twitter_count(State& state) noexcept {
|
||||
// Prints the number of results in twitter.json
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
auto [value, error] = doc["search_metadata"]["count"];
|
||||
if (error) { return; }
|
||||
if (uint64_t(value) != 100) { return; }
|
||||
}
|
||||
}
|
||||
BENCHMARK(error_code_twitter_count);
|
||||
|
||||
static void iterator_twitter_count(State& state) {
|
||||
// Prints the number of results in twitter.json
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
document::iterator iter(doc);
|
||||
// uint64_t result_count = doc["search_metadata"]["count"];
|
||||
if (!iter.move_to_key("search_metadata")) { return; }
|
||||
if (!iter.move_to_key("count")) { return; }
|
||||
if (!iter.is_integer()) { return; }
|
||||
int64_t result_count = iter.get_integer();
|
||||
|
||||
if (result_count != 100) { return; }
|
||||
}
|
||||
}
|
||||
BENCHMARK(iterator_twitter_count);
|
||||
|
||||
static void twitter_default_profile(State& state) {
|
||||
// Count unique users with a default profile.
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
set<string_view> default_users;
|
||||
for (document::object tweet : doc["statuses"].as_array()) {
|
||||
document::object user = tweet["user"];
|
||||
if (user["default_profile"]) {
|
||||
default_users.insert(user["screen_name"]);
|
||||
}
|
||||
}
|
||||
if (default_users.size() != 86) { return; }
|
||||
}
|
||||
}
|
||||
BENCHMARK(twitter_default_profile);
|
||||
|
||||
static void error_code_twitter_default_profile(State& state) noexcept {
|
||||
// Count unique users with a default profile.
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
set<string_view> default_users;
|
||||
|
||||
auto [tweets, error] = doc["statuses"].as_array();
|
||||
if (error) { return; }
|
||||
for (document::element tweet : tweets) {
|
||||
auto [user, error2] = tweet["user"].as_object();
|
||||
if (error2) { return; }
|
||||
auto [default_profile, error3] = user["default_profile"].as_bool();
|
||||
if (error3) { return; }
|
||||
if (default_profile) {
|
||||
auto [screen_name, error4] = user["screen_name"].as_string();
|
||||
if (error4) { return; }
|
||||
default_users.insert(screen_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (default_users.size() != 86) { return; }
|
||||
}
|
||||
}
|
||||
BENCHMARK(error_code_twitter_default_profile);
|
||||
|
||||
static void iterator_twitter_default_profile(State& state) {
|
||||
// Count unique users with a default profile.
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
set<string_view> default_users;
|
||||
document::iterator iter(doc);
|
||||
|
||||
// for (document::object tweet : doc["statuses"].as_array()) {
|
||||
if (!(iter.move_to_key("statuses") && iter.is_array())) { return; }
|
||||
if (iter.down()) { // first status
|
||||
do {
|
||||
|
||||
// document::object user = tweet["user"];
|
||||
if (!(iter.move_to_key("user") && iter.is_object())) { return; }
|
||||
|
||||
// if (user["default_profile"]) {
|
||||
if (iter.move_to_key("default_profile")) {
|
||||
if (iter.is_true()) {
|
||||
if (!iter.up()) { return; } // back to user
|
||||
|
||||
// default_users.insert(user["screen_name"]);
|
||||
if (!(iter.move_to_key("screen_name") && iter.is_string())) { return; }
|
||||
default_users.insert(string_view(iter.get_string(), iter.get_string_length()));
|
||||
}
|
||||
if (!iter.up()) { return; } // back to user
|
||||
}
|
||||
|
||||
if (!iter.up()) { return; } // back to status
|
||||
|
||||
} while (iter.next()); // next status
|
||||
}
|
||||
|
||||
if (default_users.size() != 86) { return; }
|
||||
}
|
||||
}
|
||||
BENCHMARK(iterator_twitter_default_profile);
|
||||
|
||||
static void twitter_image_sizes(State& state) {
|
||||
// Count unique image sizes
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
set<tuple<uint64_t, uint64_t>> image_sizes;
|
||||
for (document::object tweet : doc["statuses"].as_array()) {
|
||||
auto [media, not_found] = tweet["entities"]["media"];
|
||||
if (!not_found) {
|
||||
for (document::object image : media.as_array()) {
|
||||
for (auto [key, size] : image["sizes"].as_object()) {
|
||||
image_sizes.insert({ size["w"], size["h"] });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (image_sizes.size() != 15) { return; };
|
||||
}
|
||||
}
|
||||
BENCHMARK(twitter_image_sizes);
|
||||
|
||||
static void error_code_twitter_image_sizes(State& state) noexcept {
|
||||
// Count unique image sizes
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
set<tuple<uint64_t, uint64_t>> image_sizes;
|
||||
auto [statuses, error] = doc["statuses"].as_array();
|
||||
if (error) { return; }
|
||||
for (document::element tweet : statuses) {
|
||||
auto [images, not_found] = tweet["entities"]["media"].as_array();
|
||||
if (!not_found) {
|
||||
for (document::element image : images) {
|
||||
auto [sizes, error2] = image["sizes"].as_object();
|
||||
if (error2) { return; }
|
||||
for (auto [key, size] : sizes) {
|
||||
auto [width, error3] = size["w"].as_uint64_t();
|
||||
auto [height, error4] = size["h"].as_uint64_t();
|
||||
if (error3 || error4) { return; }
|
||||
image_sizes.insert({ width, height });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (image_sizes.size() != 15) { return; };
|
||||
}
|
||||
}
|
||||
BENCHMARK(error_code_twitter_image_sizes);
|
||||
|
||||
static void iterator_twitter_image_sizes(State& state) {
|
||||
// Count unique image sizes
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (auto _ : state) {
|
||||
set<tuple<uint64_t, uint64_t>> image_sizes;
|
||||
document::iterator iter(doc);
|
||||
|
||||
// for (document::object tweet : doc["statuses"].as_array()) {
|
||||
if (!(iter.move_to_key("statuses") && iter.is_array())) { return; }
|
||||
if (iter.down()) { // first status
|
||||
do {
|
||||
|
||||
// auto [media, not_found] = tweet["entities"]["media"];
|
||||
// if (!not_found) {
|
||||
if (iter.move_to_key("entities")) {
|
||||
if (!iter.is_object()) { return; }
|
||||
if (iter.move_to_key("media")) {
|
||||
if (!iter.is_array()) { return; }
|
||||
|
||||
// for (document::object image : media.as_array()) {
|
||||
if (iter.down()) { // first media
|
||||
do {
|
||||
|
||||
// for (auto [key, size] : image["sizes"].as_object()) {
|
||||
if (!(iter.move_to_key("sizes") && iter.is_object())) { return; }
|
||||
if (iter.down()) { // first size
|
||||
do {
|
||||
iter.move_to_value();
|
||||
|
||||
// image_sizes.insert({ size["w"], size["h"] });
|
||||
if (!(iter.move_to_key("w")) && !iter.is_integer()) { return; }
|
||||
uint64_t width = iter.get_integer();
|
||||
if (!iter.up()) { return; } // back to size
|
||||
if (!(iter.move_to_key("h")) && !iter.is_integer()) { return; }
|
||||
uint64_t height = iter.get_integer();
|
||||
if (!iter.up()) { return; } // back to size
|
||||
image_sizes.insert({ width, height });
|
||||
|
||||
} while (iter.next()); // next size
|
||||
if (!iter.up()) { return; } // back to sizes
|
||||
}
|
||||
if (!iter.up()) { return; } // back to image
|
||||
} while (iter.next()); // next image
|
||||
if (!iter.up()) { return; } // back to media
|
||||
}
|
||||
if (!iter.up()) { return; } // back to entities
|
||||
}
|
||||
if (!iter.up()) { return; } // back to status
|
||||
}
|
||||
} while (iter.next()); // next status
|
||||
}
|
||||
|
||||
if (image_sizes.size() != 15) { return; };
|
||||
}
|
||||
}
|
||||
BENCHMARK(iterator_twitter_image_sizes);
|
||||
|
||||
BENCHMARK_MAIN();
|
File diff suppressed because it is too large
Load Diff
|
@ -24,13 +24,16 @@ enum error_code {
|
|||
UNESCAPED_CHARS, // found unescaped characters in a string.
|
||||
UNCLOSED_STRING, // missing quote at the end
|
||||
UNSUPPORTED_ARCHITECTURE, // unsupported architecture
|
||||
INCORRECT_TYPE, // JSON element has a different type than user expected
|
||||
NUMBER_OUT_OF_RANGE, // JSON number does not fit in 64 bits
|
||||
NO_SUCH_FIELD, // JSON field not found in object
|
||||
UNEXPECTED_ERROR // indicative of a bug in simdjson
|
||||
};
|
||||
|
||||
const std::string &error_message(error_code error) noexcept;
|
||||
|
||||
struct invalid_json : public std::exception {
|
||||
invalid_json(error_code _error) : error{_error} {}
|
||||
invalid_json(error_code _error) : error{_error} { }
|
||||
const char *what() const noexcept { return error_message(error).c_str(); }
|
||||
error_code error;
|
||||
};
|
||||
|
|
|
@ -10,11 +10,243 @@
|
|||
// implementation.
|
||||
|
||||
#include "simdjson/implementation.h"
|
||||
|
||||
#include <iostream>
|
||||
namespace simdjson {
|
||||
|
||||
// TODO inline?
|
||||
document::doc_ref_result document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
|
||||
//
|
||||
// document::element_result<T> inline implementation
|
||||
//
|
||||
template<typename T>
|
||||
inline document::element_result<T>::element_result(T _value) noexcept : value(_value), error{SUCCESS} {}
|
||||
template<typename T>
|
||||
inline document::element_result<T>::element_result(error_code _error) noexcept : value(), error{_error} {}
|
||||
template<>
|
||||
inline document::element_result<std::string_view>::operator std::string_view() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
template<>
|
||||
inline document::element_result<const char *>::operator const char *() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
template<>
|
||||
inline document::element_result<bool>::operator bool() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
template<>
|
||||
inline document::element_result<uint64_t>::operator uint64_t() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
template<>
|
||||
inline document::element_result<int64_t>::operator int64_t() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
template<>
|
||||
inline document::element_result<double>::operator double() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
|
||||
//
|
||||
// document::element_result<document::array> inline implementation
|
||||
//
|
||||
inline document::element_result<document::array>::element_result(document::array _value) noexcept : value(_value), error{SUCCESS} {}
|
||||
inline document::element_result<document::array>::element_result(error_code _error) noexcept : value(), error{_error} {}
|
||||
inline document::element_result<document::array>::operator document::array() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
inline document::array::iterator document::element_result<document::array>::begin() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value.begin();
|
||||
}
|
||||
inline document::array::iterator document::element_result<document::array>::end() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value.end();
|
||||
}
|
||||
|
||||
//
|
||||
// document::element_result<document::object> inline implementation
|
||||
//
|
||||
inline document::element_result<document::object>::element_result(document::object _value) noexcept : value(_value), error{SUCCESS} {}
|
||||
inline document::element_result<document::object>::element_result(error_code _error) noexcept : value(), error{_error} {}
|
||||
inline document::element_result<document::object>::operator document::object() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value;
|
||||
}
|
||||
inline document::element_result<document::element> document::element_result<document::object>::operator[](const std::string_view &key) const noexcept {
|
||||
if (error) { return error; }
|
||||
return value[key];
|
||||
}
|
||||
inline document::element_result<document::element> document::element_result<document::object>::operator[](const char *key) const noexcept {
|
||||
if (error) { return error; }
|
||||
return value[key];
|
||||
}
|
||||
inline document::object::iterator document::element_result<document::object>::begin() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value.begin();
|
||||
}
|
||||
inline document::object::iterator document::element_result<document::object>::end() const noexcept(false) {
|
||||
if (error) { throw invalid_json(error); }
|
||||
return value.end();
|
||||
}
|
||||
|
||||
//
|
||||
// document::element_result<document::element> inline implementation
|
||||
//
|
||||
inline document::element_result<document::element>::element_result(document::element _value) noexcept : value(_value), error{SUCCESS} {}
|
||||
inline document::element_result<document::element>::element_result(error_code _error) noexcept : value(), error{_error} {}
|
||||
inline document::element_result<bool> document::element_result<document::element>::is_null() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.is_null();
|
||||
}
|
||||
inline document::element_result<bool> document::element_result<document::element>::as_bool() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_bool();
|
||||
}
|
||||
inline document::element_result<const char*> document::element_result<document::element>::as_c_str() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_c_str();
|
||||
}
|
||||
inline document::element_result<std::string_view> document::element_result<document::element>::as_string() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_string();
|
||||
}
|
||||
inline document::element_result<uint64_t> document::element_result<document::element>::as_uint64_t() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_uint64_t();
|
||||
}
|
||||
inline document::element_result<int64_t> document::element_result<document::element>::as_int64_t() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_int64_t();
|
||||
}
|
||||
inline document::element_result<double> document::element_result<document::element>::as_double() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_double();
|
||||
}
|
||||
inline document::element_result<document::array> document::element_result<document::element>::as_array() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_array();
|
||||
}
|
||||
inline document::element_result<document::object> document::element_result<document::element>::as_object() const noexcept {
|
||||
if (error) { return error; }
|
||||
return value.as_object();
|
||||
}
|
||||
|
||||
inline document::element_result<document::element>::operator bool() const noexcept(false) {
|
||||
return as_bool();
|
||||
}
|
||||
inline document::element_result<document::element>::operator const char *() const noexcept(false) {
|
||||
return as_c_str();
|
||||
}
|
||||
inline document::element_result<document::element>::operator std::string_view() const noexcept(false) {
|
||||
return as_string();
|
||||
}
|
||||
inline document::element_result<document::element>::operator uint64_t() const noexcept(false) {
|
||||
return as_uint64_t();
|
||||
}
|
||||
inline document::element_result<document::element>::operator int64_t() const noexcept(false) {
|
||||
return as_int64_t();
|
||||
}
|
||||
inline document::element_result<document::element>::operator double() const noexcept(false) {
|
||||
return as_double();
|
||||
}
|
||||
inline document::element_result<document::element>::operator document::array() const noexcept(false) {
|
||||
return as_array();
|
||||
}
|
||||
inline document::element_result<document::element>::operator document::object() const noexcept(false) {
|
||||
return as_object();
|
||||
}
|
||||
inline document::element_result<document::element> document::element_result<document::element>::operator[](const std::string_view &key) const noexcept {
|
||||
if (error) { return *this; }
|
||||
return value[key];
|
||||
}
|
||||
inline document::element_result<document::element> document::element_result<document::element>::operator[](const char *key) const noexcept {
|
||||
if (error) { return *this; }
|
||||
return value[key];
|
||||
}
|
||||
|
||||
//
|
||||
// document inline implementation
|
||||
//
|
||||
inline document::element document::root() const noexcept {
|
||||
return document::element(this, 1);
|
||||
}
|
||||
inline document::element_result<document::array> document::as_array() const noexcept {
|
||||
return root().as_array();
|
||||
}
|
||||
inline document::element_result<document::object> document::as_object() const noexcept {
|
||||
return root().as_object();
|
||||
}
|
||||
inline document::operator document::element() const noexcept {
|
||||
return root();
|
||||
}
|
||||
inline document::operator document::array() const noexcept(false) {
|
||||
return root();
|
||||
}
|
||||
inline document::operator document::object() const noexcept(false) {
|
||||
return root();
|
||||
}
|
||||
inline document::element_result<document::element> document::operator[](const std::string_view &key) const noexcept {
|
||||
return root()[key];
|
||||
}
|
||||
inline document::element_result<document::element> document::operator[](const char *key) const noexcept {
|
||||
return root()[key];
|
||||
}
|
||||
|
||||
//
|
||||
// document::doc_ref_result inline implementation
|
||||
//
|
||||
inline document::doc_ref_result::doc_ref_result(document &_doc, error_code _error) noexcept : doc(_doc), error(_error) { }
|
||||
inline document::doc_ref_result::operator document&() noexcept(false) {
|
||||
if (error) {
|
||||
throw invalid_json(error);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
inline const std::string &document::doc_ref_result::get_error_message() const noexcept {
|
||||
return error_message(error);
|
||||
}
|
||||
|
||||
//
|
||||
// document::doc_result inline implementation
|
||||
//
|
||||
inline document::doc_result::doc_result(document &&_doc, error_code _error) noexcept : doc(std::move(_doc)), error(_error) { }
|
||||
inline document::doc_result::doc_result(document &&_doc) noexcept : doc(std::move(_doc)), error(SUCCESS) { }
|
||||
inline document::doc_result::doc_result(error_code _error) noexcept : doc(), error(_error) { }
|
||||
inline document::doc_result::operator document() noexcept(false) {
|
||||
if (error) {
|
||||
throw invalid_json(error);
|
||||
}
|
||||
return std::move(doc);
|
||||
}
|
||||
inline const std::string &document::doc_result::get_error_message() const noexcept {
|
||||
return error_message(error);
|
||||
}
|
||||
|
||||
//
|
||||
// document::parser inline implementation
|
||||
//
|
||||
inline bool document::parser::is_valid() const noexcept { return valid; }
|
||||
inline int document::parser::get_error_code() const noexcept { return error; }
|
||||
inline std::string document::parser::get_error_message() const noexcept { return error_message(error); }
|
||||
inline bool document::parser::print_json(std::ostream &os) const noexcept {
|
||||
return is_valid() ? doc.print_json(os) : false;
|
||||
}
|
||||
inline bool document::parser::dump_raw_tape(std::ostream &os) const noexcept {
|
||||
return is_valid() ? doc.dump_raw_tape(os) : false;
|
||||
}
|
||||
inline const document &document::parser::get_document() const noexcept(false) {
|
||||
if (!is_valid()) {
|
||||
throw invalid_json(error);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
inline document::doc_ref_result document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
|
||||
error_code code = init_parse(len);
|
||||
if (code) { return document::doc_ref_result(doc, code); }
|
||||
|
||||
|
@ -39,14 +271,13 @@ document::doc_ref_result document::parser::parse(const uint8_t *buf, size_t len,
|
|||
really_inline document::doc_ref_result document::parser::parse(const char *buf, size_t len, bool realloc_if_needed) noexcept {
|
||||
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
||||
}
|
||||
really_inline document::doc_ref_result document::parser::parse(const std::string &s, bool realloc_if_needed) noexcept {
|
||||
return parse(s.data(), s.length(), realloc_if_needed);
|
||||
really_inline document::doc_ref_result document::parser::parse(const std::string &s) noexcept {
|
||||
return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
|
||||
}
|
||||
really_inline document::doc_ref_result document::parser::parse(const padded_string &s) noexcept {
|
||||
return parse(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
// TODO really_inline?
|
||||
inline document::doc_result document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
|
||||
document::parser parser;
|
||||
if (!parser.allocate_capacity(len)) {
|
||||
|
@ -58,8 +289,8 @@ inline document::doc_result document::parse(const uint8_t *buf, size_t len, bool
|
|||
really_inline document::doc_result document::parse(const char *buf, size_t len, bool realloc_if_needed) noexcept {
|
||||
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
||||
}
|
||||
really_inline document::doc_result document::parse(const std::string &s, bool realloc_if_needed) noexcept {
|
||||
return parse(s.data(), s.length(), realloc_if_needed);
|
||||
really_inline document::doc_result document::parse(const std::string &s) noexcept {
|
||||
return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
|
||||
}
|
||||
really_inline document::doc_result document::parse(const padded_string &s) noexcept {
|
||||
return parse(s.data(), s.length(), false);
|
||||
|
@ -70,7 +301,7 @@ really_inline document::doc_result document::parse(const padded_string &s) noexc
|
|||
//
|
||||
|
||||
WARN_UNUSED
|
||||
inline error_code document::parser::init_parse(size_t len) {
|
||||
inline error_code document::parser::init_parse(size_t len) noexcept {
|
||||
if (len > capacity()) {
|
||||
return error = CAPACITY;
|
||||
}
|
||||
|
@ -83,79 +314,79 @@ inline error_code document::parser::init_parse(size_t len) {
|
|||
return SUCCESS;
|
||||
}
|
||||
|
||||
inline void document::parser::init_stage2() {
|
||||
inline void document::parser::init_stage2() noexcept {
|
||||
current_string_buf_loc = doc.string_buf.get();
|
||||
current_loc = 0;
|
||||
valid = false;
|
||||
error = UNINITIALIZED;
|
||||
}
|
||||
|
||||
really_inline error_code document::parser::on_error(error_code new_error_code) {
|
||||
really_inline error_code document::parser::on_error(error_code new_error_code) noexcept {
|
||||
error = new_error_code;
|
||||
return new_error_code;
|
||||
}
|
||||
really_inline error_code document::parser::on_success(error_code success_code) {
|
||||
really_inline error_code document::parser::on_success(error_code success_code) noexcept {
|
||||
error = success_code;
|
||||
valid = true;
|
||||
return success_code;
|
||||
}
|
||||
really_inline bool document::parser::on_start_document(uint32_t depth) {
|
||||
really_inline bool document::parser::on_start_document(uint32_t depth) noexcept {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, 'r');
|
||||
write_tape(0, tape_type::ROOT);
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_start_object(uint32_t depth) {
|
||||
really_inline bool document::parser::on_start_object(uint32_t depth) noexcept {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, '{');
|
||||
write_tape(0, tape_type::START_OBJECT);
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_start_array(uint32_t depth) {
|
||||
really_inline bool document::parser::on_start_array(uint32_t depth) noexcept {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, '[');
|
||||
write_tape(0, tape_type::START_ARRAY);
|
||||
return true;
|
||||
}
|
||||
// TODO we're not checking this bool
|
||||
really_inline bool document::parser::on_end_document(uint32_t depth) {
|
||||
really_inline bool document::parser::on_end_document(uint32_t depth) noexcept {
|
||||
// write our doc.tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
write_tape(containing_scope_offset[depth], 'r');
|
||||
write_tape(containing_scope_offset[depth], tape_type::ROOT);
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_end_object(uint32_t depth) {
|
||||
really_inline bool document::parser::on_end_object(uint32_t depth) noexcept {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope_offset[depth], '}');
|
||||
write_tape(containing_scope_offset[depth], tape_type::END_OBJECT);
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_end_array(uint32_t depth) {
|
||||
really_inline bool document::parser::on_end_array(uint32_t depth) noexcept {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope_offset[depth], ']');
|
||||
write_tape(containing_scope_offset[depth], tape_type::END_ARRAY);
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool document::parser::on_true_atom() {
|
||||
write_tape(0, 't');
|
||||
really_inline bool document::parser::on_true_atom() noexcept {
|
||||
write_tape(0, tape_type::TRUE_VALUE);
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_false_atom() {
|
||||
write_tape(0, 'f');
|
||||
really_inline bool document::parser::on_false_atom() noexcept {
|
||||
write_tape(0, tape_type::FALSE_VALUE);
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_null_atom() {
|
||||
write_tape(0, 'n');
|
||||
really_inline bool document::parser::on_null_atom() noexcept {
|
||||
write_tape(0, tape_type::NULL_VALUE);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline uint8_t *document::parser::on_start_string() {
|
||||
really_inline uint8_t *document::parser::on_start_string() noexcept {
|
||||
/* we advance the point, accounting for the fact that we have a NULL
|
||||
* termination */
|
||||
write_tape(current_string_buf_loc - doc.string_buf.get(), '"');
|
||||
write_tape(current_string_buf_loc - doc.string_buf.get(), tape_type::STRING);
|
||||
return current_string_buf_loc + sizeof(uint32_t);
|
||||
}
|
||||
|
||||
really_inline bool document::parser::on_end_string(uint8_t *dst) {
|
||||
really_inline bool document::parser::on_end_string(uint8_t *dst) noexcept {
|
||||
uint32_t str_length = dst - (current_string_buf_loc + sizeof(uint32_t));
|
||||
// TODO check for overflow in case someone has a crazy string (>=4GB?)
|
||||
// But only add the overflow check when the document itself exceeds 4GB
|
||||
|
@ -168,25 +399,303 @@ really_inline bool document::parser::on_end_string(uint8_t *dst) {
|
|||
return true;
|
||||
}
|
||||
|
||||
really_inline bool document::parser::on_number_s64(int64_t value) {
|
||||
write_tape(0, 'l');
|
||||
really_inline bool document::parser::on_number_s64(int64_t value) noexcept {
|
||||
write_tape(0, tape_type::INT64);
|
||||
std::memcpy(&doc.tape[current_loc], &value, sizeof(value));
|
||||
++current_loc;
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_number_u64(uint64_t value) {
|
||||
write_tape(0, 'u');
|
||||
really_inline bool document::parser::on_number_u64(uint64_t value) noexcept {
|
||||
write_tape(0, tape_type::UINT64);
|
||||
doc.tape[current_loc++] = value;
|
||||
return true;
|
||||
}
|
||||
really_inline bool document::parser::on_number_double(double value) {
|
||||
write_tape(0, 'd');
|
||||
really_inline bool document::parser::on_number_double(double value) noexcept {
|
||||
write_tape(0, tape_type::DOUBLE);
|
||||
static_assert(sizeof(value) == sizeof(doc.tape[current_loc]), "mismatch size");
|
||||
memcpy(&doc.tape[current_loc++], &value, sizeof(double));
|
||||
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline void document::parser::write_tape(uint64_t val, document::tape_type t) noexcept {
|
||||
doc.tape[current_loc++] = val | ((static_cast<uint64_t>(static_cast<char>(t))) << 56);
|
||||
}
|
||||
|
||||
really_inline void document::parser::annotate_previous_loc(uint32_t saved_loc, uint64_t val) noexcept {
|
||||
doc.tape[saved_loc] |= val;
|
||||
}
|
||||
|
||||
//
|
||||
// document::tape_ref inline implementation
|
||||
//
|
||||
really_inline document::tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {}
|
||||
really_inline document::tape_ref::tape_ref(const document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {}
|
||||
|
||||
inline size_t document::tape_ref::after_element() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::START_ARRAY:
|
||||
case tape_type::START_OBJECT:
|
||||
return tape_value();
|
||||
case tape_type::UINT64:
|
||||
case tape_type::INT64:
|
||||
case tape_type::DOUBLE:
|
||||
return json_index + 2;
|
||||
default:
|
||||
return json_index + 1;
|
||||
}
|
||||
}
|
||||
really_inline document::tape_type document::tape_ref::type() const noexcept {
|
||||
return static_cast<tape_type>(doc->tape[json_index] >> 56);
|
||||
}
|
||||
really_inline uint64_t document::tape_ref::tape_value() const noexcept {
|
||||
return doc->tape[json_index] & JSON_VALUE_MASK;
|
||||
}
|
||||
template<typename T>
|
||||
really_inline T document::tape_ref::next_tape_value() const noexcept {
|
||||
static_assert(sizeof(T) == sizeof(uint64_t));
|
||||
return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
|
||||
}
|
||||
|
||||
//
|
||||
// document::array inline implementation
|
||||
//
|
||||
really_inline document::array::array() noexcept : tape_ref() {}
|
||||
really_inline document::array::array(const document *_doc, size_t _json_index) noexcept : tape_ref(_doc, _json_index) {}
|
||||
inline document::array::iterator document::array::begin() const noexcept {
|
||||
return iterator(doc, json_index + 1);
|
||||
}
|
||||
inline document::array::iterator document::array::end() const noexcept {
|
||||
return iterator(doc, after_element() - 1);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// document::array::iterator inline implementation
|
||||
//
|
||||
really_inline document::array::iterator::iterator(const document *_doc, size_t _json_index) noexcept : tape_ref(_doc, _json_index) { }
|
||||
inline document::element document::array::iterator::operator*() const noexcept {
|
||||
return element(doc, json_index);
|
||||
}
|
||||
inline bool document::array::iterator::operator!=(const document::array::iterator& other) const noexcept {
|
||||
return json_index != other.json_index;
|
||||
}
|
||||
inline void document::array::iterator::operator++() noexcept {
|
||||
json_index = after_element();
|
||||
}
|
||||
|
||||
//
|
||||
// document::object inline implementation
|
||||
//
|
||||
really_inline document::object::object() noexcept : tape_ref() {}
|
||||
really_inline document::object::object(const document *_doc, size_t _json_index) noexcept : tape_ref(_doc, _json_index) { };
|
||||
inline document::object::iterator document::object::begin() const noexcept {
|
||||
return iterator(doc, json_index + 1);
|
||||
}
|
||||
inline document::object::iterator document::object::end() const noexcept {
|
||||
return iterator(doc, after_element() - 1);
|
||||
}
|
||||
inline document::element_result<document::element> document::object::operator[](const std::string_view &key) const noexcept {
|
||||
iterator end_field = end();
|
||||
for (iterator field = begin(); field != end_field; ++field) {
|
||||
if (key == field.key()) {
|
||||
return field.value();
|
||||
}
|
||||
}
|
||||
return NO_SUCH_FIELD;
|
||||
}
|
||||
inline document::element_result<document::element> document::object::operator[](const char *key) const noexcept {
|
||||
iterator end_field = end();
|
||||
for (iterator field = begin(); field != end_field; ++field) {
|
||||
if (!strcmp(key, field.key_c_str())) {
|
||||
return field.value();
|
||||
}
|
||||
}
|
||||
return NO_SUCH_FIELD;
|
||||
}
|
||||
|
||||
//
|
||||
// document::object::iterator inline implementation
|
||||
//
|
||||
really_inline document::object::iterator::iterator(const document *_doc, size_t _json_index) noexcept : tape_ref(_doc, _json_index) { }
|
||||
inline const document::key_value_pair document::object::iterator::operator*() const noexcept {
|
||||
return key_value_pair(key(), value());
|
||||
}
|
||||
inline bool document::object::iterator::operator!=(const document::object::iterator& other) const noexcept {
|
||||
return json_index != other.json_index;
|
||||
}
|
||||
inline void document::object::iterator::operator++() noexcept {
|
||||
json_index++;
|
||||
json_index = after_element();
|
||||
}
|
||||
inline std::string_view document::object::iterator::key() const noexcept {
|
||||
size_t string_buf_index = tape_value();
|
||||
uint32_t len;
|
||||
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
|
||||
return std::string_view(
|
||||
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
|
||||
len
|
||||
);
|
||||
}
|
||||
inline const char* document::object::iterator::key_c_str() const noexcept {
|
||||
return reinterpret_cast<const char *>(&doc->string_buf[tape_value() + sizeof(uint32_t)]);
|
||||
}
|
||||
inline document::element document::object::iterator::value() const noexcept {
|
||||
return element(doc, json_index + 1);
|
||||
}
|
||||
|
||||
//
|
||||
// document::key_value_pair inline implementation
|
||||
//
|
||||
inline document::key_value_pair::key_value_pair(std::string_view _key, document::element _value) noexcept :
|
||||
key(_key), value(_value) {}
|
||||
|
||||
//
|
||||
// document::element inline implementation
|
||||
//
|
||||
really_inline document::element::element() noexcept : tape_ref() {}
|
||||
really_inline document::element::element(const document *_doc, size_t _json_index) noexcept : tape_ref(_doc, _json_index) { }
|
||||
really_inline bool document::element::is_null() const noexcept {
|
||||
return type() == tape_type::NULL_VALUE;
|
||||
}
|
||||
really_inline bool document::element::is_bool() const noexcept {
|
||||
return type() == tape_type::TRUE_VALUE || type() == tape_type::FALSE_VALUE;
|
||||
}
|
||||
really_inline bool document::element::is_number() const noexcept {
|
||||
return type() == tape_type::UINT64 || type() == tape_type::INT64 || type() == tape_type::DOUBLE;
|
||||
}
|
||||
really_inline bool document::element::is_integer() const noexcept {
|
||||
return type() == tape_type::UINT64 || type() == tape_type::INT64;
|
||||
}
|
||||
really_inline bool document::element::is_string() const noexcept {
|
||||
return type() == tape_type::STRING;
|
||||
}
|
||||
really_inline bool document::element::is_array() const noexcept {
|
||||
return type() == tape_type::START_ARRAY;
|
||||
}
|
||||
really_inline bool document::element::is_object() const noexcept {
|
||||
return type() == tape_type::START_OBJECT;
|
||||
}
|
||||
inline document::element::operator bool() const noexcept(false) { return as_bool(); }
|
||||
inline document::element::operator const char*() const noexcept(false) { return as_c_str(); }
|
||||
inline document::element::operator std::string_view() const noexcept(false) { return as_string(); }
|
||||
inline document::element::operator uint64_t() const noexcept(false) { return as_uint64_t(); }
|
||||
inline document::element::operator int64_t() const noexcept(false) { return as_int64_t(); }
|
||||
inline document::element::operator double() const noexcept(false) { return as_double(); }
|
||||
inline document::element::operator document::array() const noexcept(false) { return as_array(); }
|
||||
inline document::element::operator document::object() const noexcept(false) { return as_object(); }
|
||||
inline document::element_result<bool> document::element::as_bool() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::TRUE_VALUE:
|
||||
return true;
|
||||
case tape_type::FALSE_VALUE:
|
||||
return false;
|
||||
default:
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<const char *> document::element::as_c_str() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::STRING: {
|
||||
size_t string_buf_index = tape_value();
|
||||
return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
|
||||
}
|
||||
default:
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<std::string_view> document::element::as_string() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::STRING: {
|
||||
size_t string_buf_index = tape_value();
|
||||
uint32_t len;
|
||||
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
|
||||
return std::string_view(
|
||||
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
|
||||
len
|
||||
);
|
||||
}
|
||||
default:
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<uint64_t> document::element::as_uint64_t() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::UINT64:
|
||||
return next_tape_value<uint64_t>();
|
||||
case tape_type::INT64: {
|
||||
int64_t result = next_tape_value<int64_t>();
|
||||
if (result < 0) {
|
||||
return NUMBER_OUT_OF_RANGE;
|
||||
}
|
||||
return static_cast<uint64_t>(result);
|
||||
}
|
||||
default:
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<int64_t> document::element::as_int64_t() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::UINT64: {
|
||||
uint64_t result = next_tape_value<uint64_t>();
|
||||
// Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std
|
||||
if (result > (std::numeric_limits<uint64_t>::max)()) {
|
||||
return NUMBER_OUT_OF_RANGE;
|
||||
}
|
||||
return static_cast<int64_t>(result);
|
||||
}
|
||||
case tape_type::INT64:
|
||||
return next_tape_value<int64_t>();
|
||||
default:
|
||||
std::cout << "Incorrect " << json_index << " = " << char(type()) << std::endl;
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<double> document::element::as_double() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::UINT64:
|
||||
return next_tape_value<uint64_t>();
|
||||
case tape_type::INT64: {
|
||||
return next_tape_value<int64_t>();
|
||||
int64_t result = tape_value();
|
||||
if (result < 0) {
|
||||
return NUMBER_OUT_OF_RANGE;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
case tape_type::DOUBLE:
|
||||
return next_tape_value<double>();
|
||||
default:
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<document::array> document::element::as_array() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::START_ARRAY:
|
||||
return array(doc, json_index);
|
||||
default:
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<document::object> document::element::as_object() const noexcept {
|
||||
switch (type()) {
|
||||
case tape_type::START_OBJECT:
|
||||
return object(doc, json_index);
|
||||
default:
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
}
|
||||
inline document::element_result<document::element> document::element::operator[](const std::string_view &key) const noexcept {
|
||||
auto [obj, error] = as_object();
|
||||
if (error) { return error; }
|
||||
return obj[key];
|
||||
}
|
||||
inline document::element_result<document::element> document::element::operator[](const char *key) const noexcept {
|
||||
auto [obj, error] = as_object();
|
||||
if (error) { return error; }
|
||||
return obj[key];
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_INLINE_DOCUMENT_H
|
||||
|
|
|
@ -25,8 +25,7 @@ bool document::set_capacity(size_t capacity) {
|
|||
return string_buf && tape;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
bool document::print_json(std::ostream &os, size_t max_depth) const {
|
||||
bool document::print_json(std::ostream &os, size_t max_depth) const noexcept {
|
||||
uint32_t string_length;
|
||||
size_t tape_idx = 0;
|
||||
uint64_t tape_val = tape[tape_idx];
|
||||
|
@ -132,8 +131,7 @@ bool document::print_json(std::ostream &os, size_t max_depth) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
bool document::dump_raw_tape(std::ostream &os) const {
|
||||
bool document::dump_raw_tape(std::ostream &os) const noexcept {
|
||||
uint32_t string_length;
|
||||
size_t tape_idx = 0;
|
||||
uint64_t tape_val = tape[tape_idx];
|
||||
|
|
|
@ -23,6 +23,9 @@ const std::map<int, const std::string> error_strings = {
|
|||
{UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation"
|
||||
" supported by this CPU architecture (perhaps"
|
||||
" it's a non-SIMD CPU?)."},
|
||||
{INCORRECT_TYPE, "The JSON element does not have the requested type."},
|
||||
{NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type."},
|
||||
{NO_SUCH_FIELD, "The JSON field referenced does not exist in this object."},
|
||||
{UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as"
|
||||
" you may have found a bug in simdjson"},
|
||||
};
|
||||
|
|
|
@ -209,7 +209,7 @@ really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
|
|||
really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) {
|
||||
uint64_t follows_match = follows(match, overflow);
|
||||
uint64_t result;
|
||||
overflow |= add_overflow(follows_match, filler, &result);
|
||||
overflow |= uint64_t(add_overflow(follows_match, filler, &result));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,6 @@ namespace simdjson::haswell::simd {
|
|||
really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); }
|
||||
really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); }
|
||||
really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); }
|
||||
really_inline Child operator~() const { return *this ^ 0xFFu; }
|
||||
really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; }
|
||||
really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; }
|
||||
really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; }
|
||||
|
@ -71,6 +70,7 @@ namespace simdjson::haswell::simd {
|
|||
|
||||
really_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); }
|
||||
really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
|
||||
really_inline simd8<bool> operator~() const { return *this ^ true; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
@ -105,6 +105,9 @@ namespace simdjson::haswell::simd {
|
|||
really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *(simd8<T>*)this; }
|
||||
really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *(simd8<T>*)this; }
|
||||
|
||||
// Override to distinguish from bool version
|
||||
really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
|
||||
|
||||
// Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
|
||||
|
|
|
@ -31,7 +31,6 @@ namespace simdjson::westmere::simd {
|
|||
really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); }
|
||||
really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); }
|
||||
really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); }
|
||||
really_inline Child operator~() const { return *this ^ 0xFFu; }
|
||||
really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; }
|
||||
really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; }
|
||||
really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; }
|
||||
|
@ -71,6 +70,7 @@ namespace simdjson::westmere::simd {
|
|||
|
||||
really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
|
||||
really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
|
||||
really_inline simd8<bool> operator~() const { return *this ^ true; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
@ -97,6 +97,9 @@ namespace simdjson::westmere::simd {
|
|||
// Store to array
|
||||
really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
|
||||
|
||||
// Override to distinguish from bool version
|
||||
really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
|
||||
|
||||
// Addition/subtraction are the same for signed and unsigned
|
||||
really_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
|
||||
really_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
|
||||
|
|
|
@ -18,9 +18,10 @@ add_cpp_test(jsonstream_test)
|
|||
add_cpp_test(pointercheck)
|
||||
add_cpp_test(integer_tests)
|
||||
|
||||
target_compile_definitions(basictests PRIVATE JSON_TEST_PATH="${PROJECT_SOURCE_DIR}/jsonexamples/twitter.json")
|
||||
|
||||
## This causes problems
|
||||
# add_executable(singleheader ./singleheadertest.cpp ${PROJECT_SOURCE_DIR}/singleheader/simdjson.cpp)
|
||||
# target_compile_definitions(singleheader PRIVATE JSON_TEST_PATH="${PROJECT_SOURCE_DIR}/jsonexamples/twitter.json")
|
||||
# target_link_libraries(singleheader ${SIMDJSON_LIB_NAME})
|
||||
# add_test(singleheader singleheader)
|
||||
|
||||
|
|
|
@ -7,11 +7,17 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <set>
|
||||
#include <string_view>
|
||||
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "simdjson/jsonstream.h"
|
||||
#include "simdjson/document.h"
|
||||
|
||||
#ifndef JSON_TEST_PATH
|
||||
#define JSON_TEST_PATH "jsonexamples/twitter.json"
|
||||
#endif
|
||||
|
||||
// ulp distance
|
||||
// Marc B. Reynolds, 2016-2019
|
||||
// Public Domain under http://unlicense.org, see link for details.
|
||||
|
@ -569,6 +575,205 @@ bool skyprophet_test() {
|
|||
return true;
|
||||
}
|
||||
|
||||
namespace dom_api {
|
||||
using namespace std;
|
||||
using namespace simdjson;
|
||||
bool object_iterator() {
|
||||
string json(R"({ "a": 1, "b": 2, "c": 3 })");
|
||||
const char* expected_key[] = { "a", "b", "c" };
|
||||
uint64_t expected_value[] = { 1, 2, 3 };
|
||||
int i = 0;
|
||||
|
||||
document doc = document::parse(json);
|
||||
for (auto [key, value] : document::object(doc)) {
|
||||
if (key != expected_key[i] || uint64_t(value) != expected_value[i]) { cerr << "Expected " << expected_key[i] << " = " << expected_value[i] << ", got " << key << "=" << uint64_t(value) << endl; return false; }
|
||||
i++;
|
||||
}
|
||||
if (i*sizeof(uint64_t) != sizeof(expected_value)) { cout << "Expected " << sizeof(expected_value) << " values, got " << i << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool array_iterator() {
|
||||
string json(R"([ 1, 10, 100 ])");
|
||||
uint64_t expected_value[] = { 1, 10, 100 };
|
||||
int i=0;
|
||||
|
||||
document doc = document::parse(json);
|
||||
for (uint64_t value : doc.as_array()) {
|
||||
if (value != expected_value[i]) { cerr << "Expected " << expected_value[i] << ", got " << value << endl; return false; }
|
||||
i++;
|
||||
}
|
||||
if (i*sizeof(uint64_t) != sizeof(expected_value)) { cout << "Expected " << sizeof(expected_value) << " values, got " << i << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool object_iterator_empty() {
|
||||
string json(R"({})");
|
||||
int i = 0;
|
||||
|
||||
document doc = document::parse(json);
|
||||
for (auto [key, value] : doc.as_object()) {
|
||||
cout << "Unexpected " << key << " = " << uint64_t(value) << endl;
|
||||
i++;
|
||||
}
|
||||
if (i > 0) { cout << "Expected 0 values, got " << i << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool array_iterator_empty() {
|
||||
string json(R"([])");
|
||||
int i=0;
|
||||
|
||||
document doc = document::parse(json);
|
||||
for (uint64_t value : doc.as_array()) {
|
||||
cout << "Unexpected value " << value << endl;
|
||||
i++;
|
||||
}
|
||||
if (i > 0) { cout << "Expected 0 values, got " << i << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool string_value() {
|
||||
string json(R"([ "hi", "has backslash\\" ])");
|
||||
document doc = document::parse(json);
|
||||
auto val = document::array(doc).begin();
|
||||
if (strcmp((const char*)*val, "hi")) { cerr << "Expected const char*(\"hi\") to be \"hi\", was " << (const char*)*val << endl; return false; }
|
||||
if (string_view(*val) != "hi") { cerr << "Expected string_view(\"hi\") to be \"hi\", was " << string_view(*val) << endl; return false; }
|
||||
++val;
|
||||
if (strcmp((const char*)*val, "has backslash\\")) { cerr << "Expected const char*(\"has backslash\\\\\") to be \"has backslash\\\", was " << (const char*)*val << endl; return false; }
|
||||
if (string_view(*val) != "has backslash\\") { cerr << "Expected string_view(\"has backslash\\\\\") to be \"has backslash\\\", was " << string_view(*val) << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool numeric_values() {
|
||||
string json(R"([ 0, 1, -1, 1.1 ])");
|
||||
document doc = document::parse(json);
|
||||
auto val = document::array(doc).begin();
|
||||
if (uint64_t(*val) != 0) { cerr << "Expected uint64_t(0) to be 0, was " << uint64_t(*val) << endl; return false; }
|
||||
if (int64_t(*val) != 0) { cerr << "Expected int64_t(0) to be 0, was " << int64_t(*val) << endl; return false; }
|
||||
if (double(*val) != 0) { cerr << "Expected double(0) to be 0, was " << double(*val) << endl; return false; }
|
||||
++val;
|
||||
if (uint64_t(*val) != 1) { cerr << "Expected uint64_t(1) to be 1, was " << uint64_t(*val) << endl; return false; }
|
||||
if (int64_t(*val) != 1) { cerr << "Expected int64_t(1) to be 1, was " << int64_t(*val) << endl; return false; }
|
||||
if (double(*val) != 1) { cerr << "Expected double(1) to be 1, was " << double(*val) << endl; return false; }
|
||||
++val;
|
||||
if (int64_t(*val) != -1) { cerr << "Expected int64_t(-1) to be -1, was " << int64_t(*val) << endl; return false; }
|
||||
if (double(*val) != -1) { cerr << "Expected double(-1) to be -1, was " << double(*val) << endl; return false; }
|
||||
++val;
|
||||
if (double(*val) != 1.1) { cerr << "Expected double(1.1) to be 1.1, was " << double(*val) << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool boolean_values() {
|
||||
string json(R"([ true, false ])");
|
||||
document doc = document::parse(json);
|
||||
auto val = document::array(doc).begin();
|
||||
if (bool(*val) != true) { cerr << "Expected bool(true) to be true, was " << bool(*val) << endl; return false; }
|
||||
++val;
|
||||
if (bool(*val) != false) { cerr << "Expected bool(false) to be false, was " << bool(*val) << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool null_value() {
|
||||
string json(R"([ null ])");
|
||||
document doc = document::parse(json);
|
||||
auto val = document::array(doc).begin();
|
||||
if (!(*val).is_null()) { cerr << "Expected null to be null!" << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool document_object_index() {
|
||||
string json(R"({ "a": 1, "b": 2, "c": 3})");
|
||||
document doc = document::parse(json);
|
||||
if (uint64_t(doc["a"]) != 1) { cerr << "Expected uint64_t(doc[\"a\"]) to be 1, was " << uint64_t(doc["a"]) << endl; return false; }
|
||||
if (uint64_t(doc["b"]) != 2) { cerr << "Expected uint64_t(doc[\"b\"]) to be 2, was " << uint64_t(doc["b"]) << endl; return false; }
|
||||
if (uint64_t(doc["c"]) != 3) { cerr << "Expected uint64_t(doc[\"c\"]) to be 3, was " << uint64_t(doc["c"]) << endl; return false; }
|
||||
// Check all three again in backwards order, to ensure we can go backwards
|
||||
if (uint64_t(doc["c"]) != 3) { cerr << "Expected uint64_t(doc[\"c\"]) to be 3, was " << uint64_t(doc["c"]) << endl; return false; }
|
||||
if (uint64_t(doc["b"]) != 2) { cerr << "Expected uint64_t(doc[\"b\"]) to be 2, was " << uint64_t(doc["b"]) << endl; return false; }
|
||||
if (uint64_t(doc["a"]) != 1) { cerr << "Expected uint64_t(doc[\"a\"]) to be 1, was " << uint64_t(doc["a"]) << endl; return false; }
|
||||
|
||||
auto [val, error] = doc["d"];
|
||||
if (error != simdjson::NO_SUCH_FIELD) { cerr << "Expected NO_SUCH_FIELD error for uint64_t(doc[\"d\"]), got " << error_message(error) << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool object_index() {
|
||||
string json(R"({ "obj": { "a": 1, "b": 2, "c": 3 } })");
|
||||
document doc = document::parse(json);
|
||||
if (uint64_t(doc["obj"]["a"]) != 1) { cerr << "Expected uint64_t(doc[\"obj\"][\"a\"]) to be 1, was " << uint64_t(doc["obj"]["a"]) << endl; return false; }
|
||||
document::object obj = doc["obj"];
|
||||
if (uint64_t(obj["a"]) != 1) { cerr << "Expected uint64_t(obj[\"a\"]) to be 1, was " << uint64_t(obj["a"]) << endl; return false; }
|
||||
if (uint64_t(obj["b"]) != 2) { cerr << "Expected uint64_t(obj[\"b\"]) to be 2, was " << uint64_t(obj["b"]) << endl; return false; }
|
||||
if (uint64_t(obj["c"]) != 3) { cerr << "Expected uint64_t(obj[\"c\"]) to be 3, was " << uint64_t(obj["c"]) << endl; return false; }
|
||||
// Check all three again in backwards order, to ensure we can go backwards
|
||||
if (uint64_t(obj["c"]) != 3) { cerr << "Expected uint64_t(obj[\"c\"]) to be 3, was " << uint64_t(obj["c"]) << endl; return false; }
|
||||
if (uint64_t(obj["b"]) != 2) { cerr << "Expected uint64_t(obj[\"b\"]) to be 2, was " << uint64_t(obj["b"]) << endl; return false; }
|
||||
if (uint64_t(obj["a"]) != 1) { cerr << "Expected uint64_t(obj[\"a\"]) to be 1, was " << uint64_t(obj["a"]) << endl; return false; }
|
||||
|
||||
auto [val, error] = obj["d"];
|
||||
if (error != simdjson::NO_SUCH_FIELD) { cerr << "Expected NO_SUCH_FIELD error for uint64_t(obj[\"d\"]), got " << error_message(error) << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool twitter_count() {
|
||||
// Prints the number of results in twitter.json
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
uint64_t result_count = doc["search_metadata"]["count"];
|
||||
if (result_count != 100) { cerr << "Expected twitter.json[metadata_count][count] = 100, got " << result_count << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool twitter_default_profile() {
|
||||
// Print users with a default profile.
|
||||
set<string_view> default_users;
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (document::object tweet : doc["statuses"].as_array()) {
|
||||
document::object user = tweet["user"];
|
||||
if (user["default_profile"]) {
|
||||
default_users.insert(user["screen_name"]);
|
||||
}
|
||||
}
|
||||
if (default_users.size() != 86) { cerr << "Expected twitter.json[statuses][user] to contain 86 default_profile users, got " << default_users.size() << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool twitter_image_sizes() {
|
||||
// Print image names and sizes
|
||||
set<tuple<uint64_t, uint64_t>> image_sizes;
|
||||
document doc = document::parse(get_corpus(JSON_TEST_PATH));
|
||||
for (document::object tweet : doc["statuses"].as_array()) {
|
||||
auto [media, not_found] = tweet["entities"]["media"];
|
||||
if (!not_found) {
|
||||
for (document::object image : media.as_array()) {
|
||||
for (auto [key, size] : image["sizes"].as_object()) {
|
||||
image_sizes.insert({ size["w"], size["h"] });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (image_sizes.size() != 15) { cerr << "Expected twitter.json[statuses][entities][media][sizes] to contain 15 different sizes, got " << image_sizes.size() << endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run_tests() {
|
||||
if (!object_iterator()) { return false; }
|
||||
if (!array_iterator()) { return false; }
|
||||
if (!object_iterator_empty()) { return false; }
|
||||
if (!array_iterator_empty()) { return false; }
|
||||
if (!string_value()) { return false; }
|
||||
if (!numeric_values()) { return false; }
|
||||
if (!boolean_values()) { return false; }
|
||||
if (!null_value()) { return false; }
|
||||
if (!document_object_index()) { return false; }
|
||||
if (!object_index()) { return false; }
|
||||
if (!twitter_count()) { return false; }
|
||||
if (!twitter_default_profile()) { return false; }
|
||||
if (!twitter_image_sizes()) { return false; }
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
// this is put here deliberately to check that the documentation is correct (README),
|
||||
// should this fail to compile, you should update the documentation:
|
||||
|
@ -596,6 +801,8 @@ int main() {
|
|||
return EXIT_FAILURE;
|
||||
if (!skyprophet_test())
|
||||
return EXIT_FAILURE;
|
||||
if (!dom_api::run_tests())
|
||||
return EXIT_FAILURE;
|
||||
std::cout << "Basic tests are ok." << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,8 @@ using namespace std;
|
|||
using namespace simdjson;
|
||||
|
||||
void document_parse_error_code() {
|
||||
cout << __func__ << endl;
|
||||
|
||||
string json("[ 1, 2, 3 ]");
|
||||
auto [doc, error] = document::parse(json);
|
||||
if (error) { cerr << "Error: " << error_message(error) << endl; exit(1); }
|
||||
|
@ -13,6 +15,8 @@ void document_parse_error_code() {
|
|||
}
|
||||
|
||||
void document_parse_exception() {
|
||||
cout << __func__ << endl;
|
||||
|
||||
string json("[ 1, 2, 3 ]");
|
||||
document doc = document::parse(json);
|
||||
doc.print_json(cout);
|
||||
|
@ -20,6 +24,8 @@ void document_parse_exception() {
|
|||
}
|
||||
|
||||
void document_parse_padded_string() {
|
||||
cout << __func__ << endl;
|
||||
|
||||
padded_string json(string("[ 1, 2, 3 ]"));
|
||||
document doc = document::parse(json);
|
||||
doc.print_json(cout);
|
||||
|
@ -27,6 +33,8 @@ void document_parse_padded_string() {
|
|||
}
|
||||
|
||||
void document_parse_get_corpus() {
|
||||
cout << __func__ << endl;
|
||||
|
||||
padded_string json(get_corpus("jsonexamples/small/demo.json"));
|
||||
document doc = document::parse(json);
|
||||
doc.print_json(cout);
|
||||
|
@ -34,6 +42,8 @@ void document_parse_get_corpus() {
|
|||
}
|
||||
|
||||
void parser_parse() {
|
||||
cout << __func__ << endl;
|
||||
|
||||
// Allocate a parser big enough for all files
|
||||
document::parser parser;
|
||||
if (!parser.allocate_capacity(1024*1024)) { exit(1); }
|
||||
|
|
Loading…
Reference in New Issue