On-demand parse implementation

This commit is contained in:
John Keiser 2020-08-13 13:36:20 -07:00
parent 04267e0f6b
commit ebcb3c6b3b
30 changed files with 1897 additions and 19 deletions

View File

@ -5,6 +5,7 @@ link_libraries(simdjson-windows-headers test-data)
if (TARGET benchmark::benchmark) if (TARGET benchmark::benchmark)
add_executable(bench_sax bench_sax.cpp) add_executable(bench_sax bench_sax.cpp)
target_link_libraries(bench_sax simdjson-internal-flags simdjson-include-source benchmark::benchmark) target_link_libraries(bench_sax simdjson-internal-flags simdjson-include-source benchmark::benchmark)
target_compile_options(bench_sax PRIVATE -mavx2 -mbmi -mpclmul -mlzcnt)
endif (TARGET benchmark::benchmark) endif (TARGET benchmark::benchmark)
link_libraries(simdjson simdjson-flags) link_libraries(simdjson simdjson-flags)

View File

@ -26,6 +26,76 @@ const int REPETITIONS = 10;
#if SIMDJSON_IMPLEMENTATION_HASWELL #if SIMDJSON_IMPLEMENTATION_HASWELL
#include "twitter/tweet.h"
#include <vector>
SIMDJSON_TARGET_HASWELL
namespace ondemand_bench {
using namespace simdjson;
using namespace haswell;
simdjson_really_inline uint64_t nullable_int(ondemand::value && value) {
if (value.is_null()) { return 0; }
return std::move(value);
}
simdjson_really_inline twitter::twitter_user read_user(ondemand::object && u) {
return { u["id"], u["screen_name"] };
}
simdjson_really_inline void read_tweets(ondemand::parser &parser, padded_string &json, std::vector<twitter::tweet> &tweets) {
// Walk the document, parsing the tweets as we go
auto doc = parser.parse(json);
for (ondemand::object tweet : doc["statuses"]) {
tweets.emplace_back(twitter::tweet{
tweet["created_at"],
tweet["id"],
tweet["text"],
nullable_int(tweet["in_reply_to_status_id"]),
read_user(tweet["user"]),
tweet["retweet_count"],
tweet["favorite_count"]
});
}
}
static void bench_tweets(State &state) {
// Load twitter.json to a buffer
padded_string json;
if (auto error = padded_string::load(TWITTER_JSON).get(json)) { cerr << error << endl; return; }
// Allocate and warm the vector
std::vector<twitter::tweet> tweets;
ondemand::parser parser;
read_tweets(parser, json, tweets);
// Read tweets
size_t byte_count = 0;
size_t tweet_count = 0;
for (SIMDJSON_UNUSED auto _ : state) {
tweets.clear();
read_tweets(parser, json, tweets);
byte_count += json.size();
tweet_count += tweets.size();
}
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
state.counters["Gigabytes"] = benchmark::Counter(
double(byte_count), benchmark::Counter::kIsRate,
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
state.counters["tweets"] = Counter(double(tweet_count), benchmark::Counter::kIsRate);
}
BENCHMARK(bench_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
return *(std::max_element(std::begin(v), std::end(v)));
})->DisplayAggregatesOnly(true);
} // namespace ondemand_bench
SIMDJSON_UNTARGET_REGION
#include "twitter/sax_tweet_reader.h" #include "twitter/sax_tweet_reader.h"
static void sax_tweets(State &state) { static void sax_tweets(State &state) {
@ -70,17 +140,15 @@ simdjson_really_inline uint64_t nullable_int(dom::element element) {
simdjson_really_inline void read_dom_tweets(dom::parser &parser, padded_string &json, std::vector<twitter::tweet> &tweets) { simdjson_really_inline void read_dom_tweets(dom::parser &parser, padded_string &json, std::vector<twitter::tweet> &tweets) {
for (dom::element tweet : parser.parse(json)["statuses"]) { for (dom::element tweet : parser.parse(json)["statuses"]) {
auto user = tweet["user"]; auto user = tweet["user"];
tweets.push_back( tweets.emplace_back(twitter::tweet{
{ tweet["created_at"],
tweet["id"], tweet["id"],
tweet["text"], tweet["text"],
tweet["created_at"], nullable_int(tweet["in_reply_to_status_id"]),
nullable_int(tweet["in_reply_to_status_id"]), { user["id"], user["screen_name"] },
tweet["retweet_count"], tweet["retweet_count"],
tweet["favorite_count"], tweet["favorite_count"]
{ user["id"], user["screen_name"] } });
}
);
} }
} }
@ -221,6 +289,45 @@ BENCHMARK(dom_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("m
#if SIMDJSON_IMPLEMENTATION_HASWELL #if SIMDJSON_IMPLEMENTATION_HASWELL
SIMDJSON_TARGET_HASWELL
/***
* On Demand approach.
**/
static void ondemand_parse_largerandom(State &state) {
using namespace haswell;
// Load twitter.json to a buffer
const padded_string& json = get_my_json_str();
// Allocate
ondemand::parser parser;
error_code error;
if ((error = parser.allocate(json.size()))) { throw error; };
// Read
size_t bytes = 0;
for (SIMDJSON_UNUSED auto _ : state) {
std::vector<my_point> container;
for (ondemand::object p : parser.parse(json)) {
container.emplace_back(my_point{p["x"], p["y"], p["z"]});
}
bytes += json.size();
benchmark::DoNotOptimize(container.data());
}
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
state.counters["Gigabytes"] = benchmark::Counter(
double(bytes), benchmark::Counter::kIsRate,
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
}
SIMDJSON_UNTARGET_REGION
BENCHMARK(ondemand_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
return *(std::max_element(std::begin(v), std::end(v)));
})->DisplayAggregatesOnly(true);
/*** /***
* Next we are going to code the SAX approach. * Next we are going to code the SAX approach.
**/ **/

View File

@ -6,14 +6,31 @@
namespace twitter { namespace twitter {
// {
// "statuses": [
// {
// "created_at": "Sun Aug 31 00:29:15 +0000 2014",
// "id": 505874924095815700,
// "text": "@aym0566x \n\n名前:前田あゆみ\n第一印象:なんか怖っ!\n今の印象:とりあえずキモい。噛み合わない\n好きなところ:ぶすでキモいとこ😋✨✨\n思い出:んーーー、ありすぎ😊❤️\nLINE交換できる:あぁ……ごめん✋\nトプ画をみて:照れますがな😘✨\n一言:お前は一生もんのダチ💖",
// "in_reply_to_status_id": null,
// "user": {
// "id": 1186275104,
// "screen_name": "ayuu0123"
// },
// "retweet_count": 0,
// "favorite_count": 0
// }
// ]
// }
struct tweet { struct tweet {
std::string_view created_at{};
uint64_t id{}; uint64_t id{};
std::string_view text{}; std::string_view text{};
std::string_view created_at{};
uint64_t in_reply_to_status_id{}; uint64_t in_reply_to_status_id{};
twitter_user user{};
uint64_t retweet_count{}; uint64_t retweet_count{};
uint64_t favorite_count{}; uint64_t favorite_count{};
twitter_user user{};
}; };
} // namespace twitter } // namespace twitter

View File

@ -133,6 +133,11 @@ if(SIMDJSON_ENABLE_THREADS)
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_THREADS_ENABLED=1) # This will be set in the code automatically. target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_THREADS_ENABLED=1) # This will be set in the code automatically.
endif() endif()
option(SIMDJSON_VERBOSE_LOGGING, "Enable verbose logging for internal simdjson library development." OFF)
if (SIMDJSON_VERBOSE_LOGGING)
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_VERBOSE_LOGGING=1)
endif()
if(SIMDJSON_USE_LIBCPP) if(SIMDJSON_USE_LIBCPP)
target_link_libraries(simdjson-flags INTERFACE -stdlib=libc++ -lc++abi) target_link_libraries(simdjson-flags INTERFACE -stdlib=libc++ -lc++abi)
# instead of the above line, we could have used # instead of the above line, we could have used

View File

@ -34,6 +34,7 @@ enum error_code {
INVALID_JSON_POINTER, ///< Invalid JSON pointer reference INVALID_JSON_POINTER, ///< Invalid JSON pointer reference
INVALID_URI_FRAGMENT, ///< Invalid URI fragment INVALID_URI_FRAGMENT, ///< Invalid URI fragment
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
PARSER_IN_USE, ///< parser is already in use.
/** @private Number of error codes */ /** @private Number of error codes */
NUM_ERROR_CODES NUM_ERROR_CODES
}; };

View File

@ -152,8 +152,20 @@ protected:
* Defaults to DEFAULT_MAX_DEPTH. * Defaults to DEFAULT_MAX_DEPTH.
*/ */
size_t _max_depth{0}; size_t _max_depth{0};
// Declaring these so that subclasses can use them to implement their constructors.
simdjson_really_inline dom_parser_implementation() noexcept;
simdjson_really_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
simdjson_really_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
simdjson_really_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete;
simdjson_really_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete;
}; // class dom_parser_implementation }; // class dom_parser_implementation
simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
simdjson_really_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept { simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept {
return _capacity; return _capacity;
} }

View File

@ -27,7 +27,8 @@ namespace internal {
{ IO_ERROR, "Error reading the file." }, { IO_ERROR, "Error reading the file." },
{ INVALID_JSON_POINTER, "Invalid JSON pointer syntax." }, { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." },
{ INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." }, { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
{ UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" } { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
{ PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }
}; // error_messages[] }; // error_messages[]
} // namespace internal } // namespace internal

View File

@ -25,9 +25,11 @@ public:
/** Document passed to stage 2 */ /** Document passed to stage 2 */
dom::document *doc{}; dom::document *doc{};
simdjson_really_inline dom_parser_implementation(); simdjson_really_inline dom_parser_implementation() noexcept;
dom_parser_implementation(dom_parser_implementation &&other) noexcept;
dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation(const dom_parser_implementation &) = delete;
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
SIMDJSON_WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; SIMDJSON_WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
SIMDJSON_WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; SIMDJSON_WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final;
@ -47,7 +49,9 @@ public:
namespace { namespace {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
simdjson_really_inline dom_parser_implementation::dom_parser_implementation() {} simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
simdjson_really_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
// Leaving these here so they can be inlined if so desired // Leaving these here so they can be inlined if so desired
SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {

19
src/generic/ondemand.h Normal file
View File

@ -0,0 +1,19 @@
#include "generic/ondemand/logger.h"
#include "generic/ondemand/raw_json_string.h"
#include "generic/ondemand/token_iterator.h"
#include "generic/ondemand/document.h"
#include "generic/ondemand/value.h"
#include "generic/ondemand/array.h"
#include "generic/ondemand/field.h"
#include "generic/ondemand/object.h"
#include "generic/ondemand/parser.h"
#include "generic/ondemand/logger-inl.h"
#include "generic/ondemand/raw_json_string-inl.h"
#include "generic/ondemand/token_iterator-inl.h"
#include "generic/ondemand/document-inl.h"
#include "generic/ondemand/value-inl.h"
#include "generic/ondemand/array-inl.h"
#include "generic/ondemand/field-inl.h"
#include "generic/ondemand/object-inl.h"
#include "generic/ondemand/parser-inl.h"

View File

@ -0,0 +1,137 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
//
// ### Live States
//
// While iterating or looking up values, depth >= doc->iter.depth. at_start may vary. Error is
// always SUCCESS:
//
// - Start: This is the state when the array is first found and the iterator is just past the `{`.
// In this state, at_start == true.
// - Next: After we hand a scalar value to the user, or an array/object which they then fully
// iterate over, the iterator is at the `,` before the next value (or `]`). In this state,
// depth == doc->iter.depth, at_start == false, and error == SUCCESS.
// - Unfinished Business: When we hand an array/object to the user which they do not fully
// iterate over, we need to finish that iteration by skipping child values until we reach the
// Next state. In this state, depth > doc->iter.depth, at_start == false, and error == SUCCESS.
//
// ## Error States
//
// In error states, we will yield exactly one more value before stopping. doc->iter.depth == depth
// and at_start is always false. We decrement after yielding the error, moving to the Finished
// state.
//
// - Chained Error: When the array iterator is part of an error chain--for example, in
// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an
// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
// doc->iter.depth == depth, and at_start == false. We decrement depth when we yield the error.
// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements,
// we flag that as an error and treat it exactly the same as a Chained Error. In this state,
// error == TAPE_ERROR, doc->iter.depth == depth, and at_start == false.
//
// ## Terminal State
//
// The terminal state has doc->iter.depth < depth. at_start is always false.
//
// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this
// by decrementing depth. In this state, doc->iter.depth < depth, at_start == false, and
// error == SUCCESS.
//
simdjson_really_inline array::array() noexcept = default;
simdjson_really_inline array::array(document *_doc, error_code _error) noexcept
: doc{_doc}, depth{_doc->iter.depth}, at_start{!_error}, error{_error}
{
}
simdjson_really_inline bool array::finished() const noexcept {
return doc->iter.depth < depth;
}
simdjson_really_inline void array::finish(bool log_end) noexcept {
doc->iter.depth = depth - 1;
if (log_end) { logger::log_end_value(doc->iter, "array"); }
}
simdjson_really_inline array array::begin(document *doc, error_code error) noexcept {
doc->iter.depth++;
return array(doc, error);
}
simdjson_really_inline array array::begin() noexcept {
return *this;
}
simdjson_really_inline array array::end() noexcept {
return {};
}
simdjson_really_inline simdjson_result<value> array::operator*() noexcept {
if (error) { finish(); return { doc, error }; }
return value::start(doc);
}
simdjson_really_inline bool array::operator==(const array &other) noexcept {
return !(*this != other);
}
simdjson_really_inline bool array::operator!=(const array &) noexcept {
// If we're at the start, check for empty array.
if (at_start) {
if (*doc->iter.peek() == ']') {
doc->iter.advance();
logger::log_value(doc->iter, "empty array");
finish();
} else {
logger::log_start_value(doc->iter, "array");
}
}
return !finished();
}
simdjson_really_inline array &array::operator++() noexcept {
if (!finished()) {
SIMDJSON_ASSUME(!error);
SIMDJSON_ASSUME(!at_start);
doc->iter.skip_unfinished_children(depth);
switch (*doc->iter.advance()) {
case ',':
break;
case ']':
finish(true);
break;
default:
logger::log_error(doc->iter, "Missing comma between array elements");
finish();
error = TAPE_ERROR;
}
}
return *this;
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // unnamed namespace
namespace simdjson {
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::array &&value
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>(
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::array>(value)
)
{
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::document *doc,
error_code error
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>({ doc, error }, error)
{
}
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::begin() noexcept {
return first;
}
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::end() noexcept {
return {};
}
} // namespace simdjson

View File

@ -0,0 +1,111 @@
#include "simdjson/error.h"
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
/**
* A forward-only JSON array.
*/
class array {
public:
simdjson_really_inline array() noexcept;
simdjson_really_inline array begin() noexcept;
simdjson_really_inline array end() noexcept;
//
// Iterator interface
//
// Reads key and value, yielding them to the user.
simdjson_really_inline simdjson_result<value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
// Assumes it's being compared with the end. true if depth < doc->iter.depth.
simdjson_really_inline bool operator==(const array &) noexcept;
// Assumes it's being compared with the end. true if depth >= doc->iter.depth.
simdjson_really_inline bool operator!=(const array &) noexcept;
// Checks for ']' and ','
simdjson_really_inline array &operator++() noexcept;
protected:
/**
* Begin array iteration.
*
* @param doc The document containing the array. The iterator must be just after the opening `[`.
* doc->iter.depth will be incremented automatically to reflect the nesting level.
* @param error If this is not SUCCESS, creates an error chained array.
*/
static simdjson_really_inline array begin(document *doc, error_code error=SUCCESS) noexcept;
/**
* Internal array creation. Call array::begin(doc[, error]) instead of this.
*
* @param doc The document containing the array. doc->iter.depth must already be incremented to
* reflect the array's depth. If there is no error, the iterator must be just after
* the opening `[`.
* @param error The error to report. If the error is not SUCCESS, this is an error chained object.
*/
simdjson_really_inline array(document *doc, error_code error) noexcept;
/** Check whether iteration is complete. */
bool finished() const noexcept;
/** Decrements depth to mark iteration as complete. */
void finish(bool log_end=false) noexcept;
/**
* Document containing this array.
*
* PERF NOTE: expected to be elided in favor of the parent document: this is set when the array
* is first used, and never changes afterwards.
*/
document *doc{};
/**
* Depth of the array.
*
* If doc->iter.depth < json.depth, we have finished.
*
* PERF NOTE: expected to be elided entirely, as any individual array's depth is a constant
* knowable at compile time, incremented each time we nest an object or array.
*/
uint32_t depth{};
/**
* Whether we're at the beginning of the array, or after.
*
* PERF NOTE: expected to be elided into inline control flow, as it is true for the first
* iteration and false thereafter, and compilers with SSA optimization tend to analyze the first
* iteration of any loop separately.
*/
bool at_start{};
/**
* Error, if there is one. Errors are only yielded once.
*
* PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first
* iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If
* this is not elided, we should make sure it's at least not using up a register. Failing that,
* we should store it in document so there's only one of them.
*/
error_code error{};
friend class value;
friend struct simdjson_result<value>;
friend struct simdjson_result<array>;
};
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
template<>
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> : public internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array> {
public:
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) noexcept;
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array begin() noexcept;
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array end() noexcept;
};
} // namespace simdjson

View File

@ -0,0 +1,130 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
simdjson_really_inline document::document(document &&other) noexcept :
iter{std::forward<token_iterator>(other.iter)},
parser{other.parser}
{
if (!at_start()) { logger::log_error(iter, "Cannot move document after it has been used"); abort(); }
other.parser = nullptr;
}
simdjson_really_inline document &document::operator=(document &&other) noexcept {
iter = std::forward<token_iterator>(other.iter);
parser = other.parser;
if (!at_start()) { logger::log_error(iter, "Cannot move document after it has been used"); abort(); }
other.parser = nullptr;
return *this;
}
simdjson_really_inline document::document(ondemand::parser *_parser) noexcept
: iter(_parser->dom_parser.buf, _parser->dom_parser.structural_indexes.get(), 0), parser{_parser}
{
logger::log_headers();
parser->current_string_buf_loc = parser->string_buf.get();
logger::log_start_value(iter, "document");
}
simdjson_really_inline document::~document() noexcept {
// Release the string buf so it can be reused by the next document
if (parser) {
logger::log_end_value(iter, "document");
parser->current_string_buf_loc = nullptr;
}
}
simdjson_really_inline value document::as_value() noexcept {
if (!at_start()) {
logger::log_error(iter, "Document value can only be used once! ondemand::document is a forward-only input iterator.");
abort(); // TODO is there anything softer we can do? I'd rather not make this a simdjson_result just for user error.
}
return value::start(this);
}
simdjson_really_inline bool document::at_start() const noexcept { return iter.index == parser->dom_parser.structural_indexes.get(); }
simdjson_really_inline simdjson_result<array> document::get_array() & noexcept { return as_value().get_array(); }
simdjson_really_inline simdjson_result<object> document::get_object() & noexcept { return as_value().get_object(); }
simdjson_really_inline simdjson_result<uint64_t> document::get_uint64() noexcept { return as_value().get_uint64(); }
simdjson_really_inline simdjson_result<int64_t> document::get_int64() noexcept { return as_value().get_int64(); }
simdjson_really_inline simdjson_result<double> document::get_double() noexcept { return as_value().get_double(); }
simdjson_really_inline simdjson_result<std::string_view> document::get_string() & noexcept { return as_value().get_string(); }
simdjson_really_inline simdjson_result<raw_json_string> document::get_raw_json_string() & noexcept { return as_value().get_raw_json_string(); }
simdjson_really_inline simdjson_result<bool> document::get_bool() noexcept { return as_value().get_bool(); }
simdjson_really_inline bool document::is_null() noexcept { return as_value().is_null(); }
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline document::operator array() & noexcept(false) { return as_value(); }
simdjson_really_inline document::operator object() & noexcept(false) { return as_value(); }
simdjson_really_inline document::operator uint64_t() noexcept(false) { return as_value(); }
simdjson_really_inline document::operator int64_t() noexcept(false) { return as_value(); }
simdjson_really_inline document::operator double() noexcept(false) { return as_value(); }
simdjson_really_inline document::operator std::string_view() & noexcept(false) { return as_value(); }
simdjson_really_inline document::operator raw_json_string() & noexcept(false) { return as_value(); }
simdjson_really_inline document::operator bool() noexcept(false) { return as_value(); }
#endif
simdjson_really_inline array document::begin() & noexcept { return as_value().begin(); }
simdjson_really_inline array document::end() & noexcept { return {}; }
simdjson_really_inline simdjson_result<value> document::operator[](std::string_view key) & noexcept { return as_value()[key]; }
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::document &&value
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::document>(
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::document>(value)
)
{
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::document &&value,
error_code error
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::document>(
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::document>(value),
error
)
{
}
// TODO make sure the passing of a pointer here isn't about to cause us trouble
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::as_value() noexcept {
if (error()) { return { &first, error() }; }
return first.as_value();
}
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::begin() & noexcept { return as_value().begin(); }
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::end() & noexcept { return {}; }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator[](std::string_view key) & noexcept {
return as_value()[key];
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator[](const char *key) & noexcept {
return as_value()[key];
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_array() & noexcept { return as_value().get_array(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_object() & noexcept { return as_value().get_object(); }
simdjson_really_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_uint64() noexcept { return as_value().get_uint64(); }
simdjson_really_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_int64() noexcept { return as_value().get_int64(); }
simdjson_really_inline simdjson_result<double> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_double() noexcept { return as_value().get_double(); }
simdjson_really_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_string() & noexcept { return as_value().get_string(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_raw_json_string() & noexcept { return as_value().get_raw_json_string(); }
simdjson_really_inline simdjson_result<bool> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_bool() noexcept { return as_value().get_bool(); }
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::is_null() noexcept { return as_value().is_null(); }
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false) { return as_value(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false) { return as_value(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator uint64_t() noexcept(false) { return as_value(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator int64_t() noexcept(false) { return as_value(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator double() noexcept(false) { return as_value(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator std::string_view() & noexcept(false) { return as_value(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { return as_value(); }
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator bool() noexcept(false) { return as_value(); }
#endif
} // namespace simdjson

View File

@ -0,0 +1,116 @@
#include "simdjson/error.h"
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
class parser;
class array;
class object;
class value;
class raw_json_string;
/**
* A JSON document iteration.
*
* Used by tokens to get text, and string buffer location.
*
* You must keep the document around during iteration.
*/
class document {
public:
simdjson_really_inline document() noexcept = default;
simdjson_really_inline document(document &&other) noexcept;
simdjson_really_inline document &operator=(document &&other) noexcept;
simdjson_really_inline document(const document &other) = delete;
simdjson_really_inline document &operator=(const document &other) = delete;
simdjson_really_inline ~document() noexcept;
simdjson_really_inline simdjson_result<array> get_array() & noexcept;
simdjson_really_inline simdjson_result<object> get_object() & noexcept;
simdjson_really_inline simdjson_result<uint64_t> get_uint64() noexcept;
simdjson_really_inline simdjson_result<int64_t> get_int64() noexcept;
simdjson_really_inline simdjson_result<double> get_double() noexcept;
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
simdjson_really_inline simdjson_result<raw_json_string> get_raw_json_string() & noexcept;
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
simdjson_really_inline bool is_null() noexcept;
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline operator array() & noexcept(false);
simdjson_really_inline operator object() & noexcept(false);
simdjson_really_inline operator uint64_t() noexcept(false);
simdjson_really_inline operator int64_t() noexcept(false);
simdjson_really_inline operator double() noexcept(false);
simdjson_really_inline operator std::string_view() & noexcept(false);
simdjson_really_inline operator raw_json_string() & noexcept(false);
simdjson_really_inline operator bool() noexcept(false);
#endif
simdjson_really_inline array begin() & noexcept;
simdjson_really_inline array end() & noexcept;
simdjson_really_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
simdjson_really_inline simdjson_result<value> operator[](const char *key) & noexcept;
protected:
simdjson_really_inline document(ondemand::parser *parser) noexcept;
simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept;
token_iterator iter; ///< Current position in the document
ondemand::parser *parser;
simdjson_really_inline value as_value() noexcept;
simdjson_really_inline bool at_start() const noexcept;
friend struct simdjson_result<document>;
friend class value;
friend class ondemand::parser;
friend class object;
friend class array;
friend class field;
friend class token;
};
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
template<>
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document> : public internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::document> {
public:
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document &&value, error_code error) noexcept; ///< @private
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> get_array() & noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> get_object() & noexcept;
simdjson_really_inline simdjson_result<uint64_t> get_uint64() noexcept;
simdjson_really_inline simdjson_result<int64_t> get_int64() noexcept;
simdjson_really_inline simdjson_result<double> get_double() noexcept;
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() & noexcept;
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
simdjson_really_inline bool is_null() noexcept;
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false);
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false);
simdjson_really_inline operator uint64_t() noexcept(false);
simdjson_really_inline operator int64_t() noexcept(false);
simdjson_really_inline operator double() noexcept(false);
simdjson_really_inline operator std::string_view() & noexcept(false);
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false);
simdjson_really_inline operator bool() noexcept(false);
#endif
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array begin() & noexcept;
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array end() & noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](const char *key) & noexcept;
protected:
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> as_value() noexcept;
};
} // namespace simdjson

View File

@ -0,0 +1,79 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
simdjson_really_inline field::field(field &&other) noexcept = default;
simdjson_really_inline field &field::operator=(field &&other) noexcept = default;
simdjson_really_inline field::field(const uint8_t *key, ondemand::value &&value) noexcept
: std::pair<raw_json_string, ondemand::value>(key, std::forward<ondemand::value>(value))
{
}
simdjson_really_inline simdjson_result<field> field::start(document *doc) noexcept {
const uint8_t *key = doc->iter.advance();
if ('"' != *key) {
logger::log_error(doc->iter, "Missing key in object field");
return { doc, TAPE_ERROR };
}
return field::start(doc, key);
}
simdjson_really_inline simdjson_result<field> field::start(document *doc, const uint8_t *key) noexcept {
if (':' != *doc->iter.advance()) {
logger::log_error(doc->iter, "Missing colon in object field");
return { doc, TAPE_ERROR };
}
return field(key, value::start(doc));
}
simdjson_really_inline simdjson_result<value> field::start_value(document *doc) noexcept {
if (':' != *doc->iter.advance()) {
logger::log_error(doc->iter, "Missing colon in object field");
return { doc, TAPE_ERROR };
}
return value::start(doc);
}
simdjson_really_inline raw_json_string field::key() const noexcept {
return first;
}
simdjson_really_inline value &field::value() noexcept {
return second;
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::field &&value
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::field>(
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::field>(value)
)
{
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::document *doc,
error_code error
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::field>(
{ nullptr, { doc, nullptr } },
error
)
{
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::key() noexcept {
if (error()) { return error(); }
return first.key();
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::value() noexcept {
return { std::move(first.value()), error() };
}
} // namespace simdjson

View File

@ -0,0 +1,45 @@
#include "simdjson/error.h"
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
/**
* A JSON field in an field.
*/
class field : public std::pair<raw_json_string, value> {
public:
simdjson_really_inline field() noexcept = default;
simdjson_really_inline field(field &&other) noexcept;
simdjson_really_inline field &operator=(field &&other) noexcept;
simdjson_really_inline field(const field &other) noexcept = delete;
simdjson_really_inline field &operator=(const field &other) noexcept = delete;
simdjson_really_inline raw_json_string key() const noexcept;
simdjson_really_inline ondemand::value &value() noexcept;
protected:
simdjson_really_inline field(const uint8_t *key, ondemand::value &&value) noexcept;
static simdjson_really_inline simdjson_result<field> start(document *doc) noexcept;
static simdjson_really_inline simdjson_result<field> start(document *doc, const uint8_t *key) noexcept;
static simdjson_really_inline simdjson_result<ondemand::value> start_value(document *doc) noexcept;
friend struct simdjson_result<field>;
friend class object;
};
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
template<>
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field> : public internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::field> {
public:
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> key() noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> value() noexcept;
};
} // namespace simdjson

View File

@ -0,0 +1,70 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
namespace logger {
static constexpr const int LOG_EVENT_LEN = 20;
static constexpr const int LOG_BUFFER_LEN = 30;
static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
static constexpr const int LOG_INDEX_LEN = 5;
using SIMDJSON_IMPLEMENTATION::logger::DASHES;
using SIMDJSON_IMPLEMENTATION::logger::printable_char;
simdjson_really_inline void log_event(const token_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
log_line(iter, "", type, detail, delta, depth_delta);
}
simdjson_really_inline void log_value(const token_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
log_line(iter, "", type, detail, delta, depth_delta);
}
simdjson_really_inline void log_start_value(const token_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
log_line(iter, "+", type, "", delta, depth_delta);
}
simdjson_really_inline void log_end_value(const token_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
log_line(iter, "-", type, "", delta, depth_delta);
}
simdjson_really_inline void log_error(const token_iterator &iter, const char *error, int delta, int depth_delta) noexcept {
log_line(iter, "", "ERROR", error, delta, depth_delta);
}
simdjson_really_inline void log_headers() noexcept {
if (LOG_ENABLED) {
printf("\n");
printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
}
}
simdjson_really_inline void log_line(const token_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept {
if (LOG_ENABLED) {
const int indent = (iter.depth+depth_delta)*2;
printf("| %*s%s%-*s ",
indent, "",
title_prefix,
LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title
);
{
// Print the current structural.
printf("| ");
for (int i=0;i<LOG_BUFFER_LEN;i++) {
printf("%c", printable_char(iter.peek(delta)[i]));
}
printf(" ");
}
{
// Print the next structural.
printf("| ");
for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
printf("%c", printable_char(iter.peek(delta+1)[i]));
}
printf(" ");
}
printf("| %5u ", iter.peek_index(delta+1));
printf("| %.*s ", int(detail.size()), detail.data());
printf("|\n");
}
}
} // namespace logger
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // unnamed namespace

View File

@ -0,0 +1,22 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
class token_iterator;
namespace logger {
static constexpr const bool LOG_ENABLED = SIMDJSON_IMPLEMENTATION::logger::LOG_ENABLED;
static simdjson_really_inline void log_headers() noexcept;
static simdjson_really_inline void log_line(const token_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
static simdjson_really_inline void log_event(const token_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
static simdjson_really_inline void log_value(const token_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
static simdjson_really_inline void log_start_value(const token_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
static simdjson_really_inline void log_end_value(const token_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
static simdjson_really_inline void log_error(const token_iterator &iter, const char *error, int delta=-1, int depth_delta=0) noexcept;
} // namespace logger
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // unnamed namespace

View File

@ -0,0 +1,189 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
//
// ### Live States
//
// While iterating or looking up values, depth >= doc->iter.depth. at_start may vary. Error is
// always SUCCESS:
//
// - Start: This is the state when the object is first found and the iterator is just past the {.
// In this state, at_start == true.
// - Next: After we hand a scalar value to the user, or an array/object which they then fully
// iterate over, the iterator is at the , or } before the next value. In this state,
// depth == doc->iter.depth, at_start == false, and error == SUCCESS.
// - Unfinished Business: When we hand an array/object to the user which they do not fully
// iterate over, we need to finish that iteration by skipping child values until we reach the
// Next state. In this state, depth > doc->iter.depth, at_start == false, and error == SUCCESS.
//
// ## Error States
//
// In error states, we will yield exactly one more value before stopping. doc->iter.depth == depth
// and at_start is always false. We decrement after yielding the error, moving to the Finished
// state.
//
// - Chained Error: When the object iterator is part of an error chain--for example, in
// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an
// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
// doc->iter.depth == depth, and at_start == false. We decrement depth when we yield the error.
// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields,
// we flag that as an error and treat it exactly the same as a Chained Error. In this state,
// error == TAPE_ERROR, doc->iter.depth == depth, and at_start == false.
//
// Errors that occur while reading a field to give to the user (such as when the key is not a
// string or the field is missing a colon) are yielded immediately. Depth is then decremented,
// moving to the Finished state without transitioning through an Error state at all.
//
// ## Terminal State
//
// The terminal state has doc->iter.depth < depth. at_start is always false.
//
// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth.
// In this state, doc->iter.depth < depth, at_start == false, and error == SUCCESS.
//
simdjson_really_inline object::object() noexcept = default;
simdjson_really_inline object::object(document *_doc, error_code _error) noexcept
: doc{_doc}, depth{_doc->iter.depth}, at_start{!_error}, error{_error}
{
}
simdjson_really_inline bool object::finished() const noexcept {
return doc->iter.depth < depth;
}
simdjson_really_inline void object::finish(bool log_end) noexcept {
doc->iter.depth = depth - 1;
if (log_end) { logger::log_end_value(doc->iter, "object"); }
}
simdjson_really_inline void object::first_field() noexcept {
at_start = false;
// If it's empty, shut down
if (*doc->iter.peek() == '}') {
logger::log_value(doc->iter, "empty object", "", -1, -1);
doc->iter.advance();
finish();
} else {
logger::log_start_value(doc->iter, "object", -1, -1);
}
}
simdjson_really_inline simdjson_result<value> object::operator[](const std::string_view key) noexcept {
if (finished()) { return { doc, NO_SUCH_FIELD }; }
if (error) { finish(); return { doc, error }; }
if (at_start) {
first_field();
} else {
doc->iter.skip_unfinished_children(depth);
switch (*doc->iter.advance()) {
case ',':
break;
case '}':
finish(true);
return { doc, NO_SUCH_FIELD };
default:
logger::log_error(doc->iter, "Missing comma between object fields");
finish();
return { doc, TAPE_ERROR };
}
}
while (true) {
const uint8_t *actual_key = doc->iter.advance();
switch (*(actual_key++)) {
case '"':
if (raw_json_string(actual_key) == key) {
logger::log_event(doc->iter, "match", key);
return field::start_value(doc);
}
logger::log_event(doc->iter, "no match", key);
doc->iter.advance(); // "key" :
doc->iter.skip_value(); // "key" : <value>
switch (*doc->iter.advance()) {
case ',':
break;
case '}':
logger::log_event(doc->iter, "no key found", key);
finish(true);
return { doc, NO_SUCH_FIELD };
default:
logger::log_error(doc->iter, "Missing comma between object fields");
finish();
return { doc, TAPE_ERROR };
}
break;
default:
logger::log_error(doc->iter, "Key is not a string");
finish();
return { doc, TAPE_ERROR };
}
}
}
simdjson_really_inline object object::begin(document *doc, error_code error) noexcept {
doc->iter.depth++;
return object(doc, error);
}
simdjson_really_inline object object::begin() noexcept {
return *this;
}
simdjson_really_inline object object::end() noexcept {
return {};
}
simdjson_really_inline simdjson_result<field> object::operator*() noexcept {
if (error) { finish(); return { doc, error }; }
return field::start(doc);
}
simdjson_really_inline bool object::operator==(const object &other) noexcept {
return !(*this != other);
}
simdjson_really_inline bool object::operator!=(const object &) noexcept {
// If we're at the start, check for the first field.
if (at_start) { first_field(); }
return !finished();
}
simdjson_really_inline object &object::operator++() noexcept {
if (!finished()) {
SIMDJSON_ASSUME(!error);
SIMDJSON_ASSUME(!at_start);
doc->iter.skip_unfinished_children(depth);
switch (*doc->iter.advance()) {
case ',':
break;
case '}':
finish(true);
break;
default:
logger::log_error(doc->iter, "Missing comma between object fields");
finish();
error = TAPE_ERROR;
}
}
return *this;
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::object>(value)) {}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object>({ doc, error }, error) {}
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::begin() noexcept {
return first;
}
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::end() noexcept {
return {};
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::operator[](std::string_view key) noexcept {
if (error()) { return { first.doc, error() }; }
return first[key];
}
} // namespace simdjson

View File

@ -0,0 +1,111 @@
#include "simdjson/error.h"
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
/**
* A forward-only JSON object field iterator.
*/
class object {
public:
simdjson_really_inline object() noexcept;
simdjson_really_inline object begin() noexcept;
simdjson_really_inline object end() noexcept;
simdjson_really_inline simdjson_result<value> operator[](const std::string_view key) noexcept;
//
// Iterator interface
//
// Reads key and value, yielding them to the user.
simdjson_really_inline simdjson_result<field> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
// Assumes it's being compared with the end. true if depth < doc->iter.depth.
simdjson_really_inline bool operator==(const object &) noexcept;
// Assumes it's being compared with the end. true if depth >= doc->iter.depth.
simdjson_really_inline bool operator!=(const object &) noexcept;
// Checks for '}' and ','
simdjson_really_inline object &operator++() noexcept;
protected:
/**
* Begin object iteration.
*
* @param doc The document containing the object. The iterator must be just after the opening `{`.
* @param error If this is not SUCCESS, creates an error chained object.
*/
static simdjson_really_inline object begin(document *doc, error_code error=SUCCESS) noexcept;
/**
* Internal object creation. Call object::begin(doc[, error]) instead of this.
*
* @param doc The document containing the object. doc->depth must already be incremented to
* reflect the object's depth. If there is no error, the iterator must be just after
* the opening `{`.
* @param error The error to report. If the error is not SUCCESS, this is an error chained object.
*/
simdjson_really_inline object(document *doc, error_code error) noexcept;
/** Check whether iteration is complete. */
simdjson_really_inline bool finished() const noexcept;
/** Decrements depth to mark iteration as complete. */
simdjson_really_inline void finish(bool log_end=false) noexcept;
simdjson_really_inline void first_field() noexcept;
/**
* Document containing the primary iterator.
*
* PERF NOTE: expected to be elided in favor of the parent document: this is set when the object
* is first used, and never changes afterwards.
*/
document *doc{};
/**
* Depth of the object.
*
* If doc->iter.depth < json.depth, we have finished.
*
* PERF NOTE: expected to be elided entirely, as any individual object's depth is a constant
* knowable at compile time, incremented each time we nest an object() or array().
*/
uint32_t depth{};
/**
* Whether we're at the beginning of the object, or after.
*
* PERF NOTE: expected to be elided into inline control flow, as it is true for the first
* iteration and false thereafter, and compilers with SSA optimization tend to analyze the first
* iteration of any loop separately.
*/
bool at_start{};
/**
* Error, if there is one. Errors are only yielded once.
*
* PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first
* iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If
* this is not elided, we should make sure it's at least not using up a register. Failing that,
* we should store it in document so there's only one of them.
*/
error_code error{};
friend class value;
friend class document;
friend struct simdjson_result<object>;
};
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
template<>
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> : public internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object> {
public:
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object begin() noexcept;
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object end() noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) noexcept;
};
} // namespace simdjson

View File

@ -0,0 +1,42 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept {
if (new_capacity == _capacity && new_max_depth == _max_depth) { return SUCCESS; }
// string_capacity copied from document::allocate
_capacity = 0;
_max_depth = 0;
// The most string buffer we could possibly need is capacity-2 (a string the whole document long).
// Allocate up to capacity so we don't have to check for capacity == 0 or 1.
string_buf.reset(new (std::nothrow) uint8_t[new_capacity]);
SIMDJSON_TRY( dom_parser.set_capacity(new_capacity) );
SIMDJSON_TRY( dom_parser.set_max_depth(DEFAULT_MAX_DEPTH) );
_capacity = new_capacity;
_max_depth = new_max_depth;
return SUCCESS;
}
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<document> parser::parse(const padded_string &buf) noexcept {
if (current_string_buf_loc) {
return { this, PARSER_IN_USE };
}
// Allocate if needed
error_code error;
if (_capacity < buf.size()) {
error = allocate(buf.size(), _max_depth);
if (error) {
return { this, error };
}
}
// Run stage 1.
error = dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false);
return { this, error };
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {

View File

@ -0,0 +1,40 @@
#include "simdjson/error.h"
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
class array;
class object;
class value;
class raw_json_string;
/**
* A JSON fragment iterator.
*
* This holds the actual iterator as well as the buffer for writing strings.
*/
class parser {
public:
simdjson_really_inline parser() noexcept = default;
simdjson_really_inline parser(parser &&other) noexcept = default;
simdjson_really_inline parser(const parser &other) = delete;
simdjson_really_inline parser &operator=(const parser &other) = delete;
SIMDJSON_WARN_UNUSED error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept;
SIMDJSON_WARN_UNUSED simdjson_result<document> parse(const padded_string &json) noexcept;
private:
dom_parser_implementation dom_parser{};
size_t _capacity{0};
size_t _max_depth{0};
std::unique_ptr<uint8_t[]> string_buf{};
uint8_t *current_string_buf_loc{};
friend class raw_json_string;
friend class document;
friend class value;
};
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {

View File

@ -0,0 +1,28 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
simdjson_really_inline raw_json_string::raw_json_string() noexcept : buf{nullptr} {} // for constructing a simdjson_result
simdjson_really_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {}
simdjson_really_inline raw_json_string::raw_json_string(const raw_json_string &other) noexcept : buf{other.buf} {}
simdjson_really_inline raw_json_string &raw_json_string::operator=(const raw_json_string &other) noexcept { buf = other.buf; return *this; }
simdjson_really_inline const char * raw_json_string::raw() const noexcept { return (const char *)buf; }
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> raw_json_string::unescape(uint8_t *&dst) const noexcept {
uint8_t *end = stage2::stringparsing::parse_string(buf, dst);
if (!end) { return STRING_ERROR; }
std::string_view result((const char *)dst, end-dst);
dst = end;
return result;
}
simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept {
return !strncmp(a.raw(), b.data(), b.size());
}
simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept {
return b == a;
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {

View File

@ -0,0 +1,31 @@
#include "simdjson/error.h"
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
/**
* A string escaped per JSON rules, terminated with quote (")
*
* (In other words, a pointer to the beginning of a string, just after the start quote, inside a
* JSON file.)
*/
class raw_json_string {
public:
simdjson_really_inline raw_json_string() noexcept;
simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept;
simdjson_really_inline raw_json_string(const raw_json_string &other) noexcept;
simdjson_really_inline raw_json_string &operator=(const raw_json_string &other) noexcept;
simdjson_really_inline const char * raw() const noexcept;
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> unescape(uint8_t *&dst) const noexcept;
private:
const uint8_t * buf;
friend class object;
};
simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept;
simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept;
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {

View File

@ -0,0 +1,76 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
simdjson_really_inline token_iterator::token_iterator() noexcept = default;
simdjson_really_inline token_iterator::token_iterator(token_iterator &&other) noexcept = default;
simdjson_really_inline token_iterator &token_iterator::operator=(token_iterator &&other) noexcept = default;
simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, uint32_t *_index, uint32_t _depth) noexcept
: buf{_buf}, index{_index}, depth{_depth}
{
}
simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept {
return &buf[*(index+delta)];
}
simdjson_really_inline const uint8_t *token_iterator::advance() noexcept {
return &buf[*(index++)];
}
simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept {
return *(index+delta);
}
simdjson_really_inline void token_iterator::skip_unfinished_children(uint32_t container_depth) noexcept {
SIMDJSON_ASSUME(depth >= container_depth);
while (depth != container_depth) {
switch (*advance()) {
// TODO consider whether matching braces is a requirement: if non-matching braces indicates
// *missing* braces, then future lookups are not in the object/arrays they think they are,
// violating the rule "validate enough structure that the user can be confident they are
// looking at the right values."
case ']': case '}': depth--; logger::log_end_value(*this, "skip"); break;
// PERF TODO does it skip the depth check when we don't decrement depth?
case '[': case '{': logger::log_start_value(*this, "skip"); depth++; break;
default: logger::log_value(*this, "skip", ""); break;
}
}
}
simdjson_really_inline void token_iterator::skip_value() noexcept {
uint32_t child_depth = 0;
do {
switch (*advance()) {
// TODO consider whether matching braces is a requirement: if non-matching braces indicates
// *missing* braces, then future lookups are not in the object/arrays they think they are,
// violating the rule "validate enough structure that the user can be confident they are
// looking at the right values."
case ']': case '}': child_depth--; logger::log_end_value(*this, "skip", -1, child_depth); break;
// PERF TODO does it skip the depth check when we don't decrement depth?
case '[': case '{': logger::log_start_value(*this, "skip", -1, child_depth); child_depth++; break;
default: logger::log_value(*this, "skip", "", -1, child_depth); break;
}
} while (child_depth != 0);
}
simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept {
return index == other.index;
}
simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept {
return index != other.index;
}
simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept {
return index > other.index;
}
simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept {
return index >= other.index;
}
simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept {
return index < other.index;
}
simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept {
return index <= other.index;
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {

View File

@ -0,0 +1,88 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
/**
* A JSON token iterator.
*
* @private This is not intended for external use.
*/
class token_iterator {
public:
simdjson_really_inline token_iterator() noexcept;
simdjson_really_inline token_iterator(token_iterator &&other) noexcept;
simdjson_really_inline token_iterator &operator=(token_iterator &&other) noexcept;
simdjson_really_inline token_iterator(const token_iterator &other) noexcept = delete;
simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = delete;
/**
* Get the JSON text for a given token (relative).
*
* This is not null-terminated; it is a view into the JSON.
*
* @param delta The relative position of the token to retrieve. e.g. 0 = current token,
* 1 = next token, -1 = prev token.
*
* TODO consider a string_view, assuming the length will get stripped out by the optimizer when
* it isn't used ...
*/
simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept;
/**
* Advance to the next token (returning the current one).
*
* Does not check or update depth/expect_value. Caller is responsible for that.
*/
simdjson_really_inline const uint8_t *advance() noexcept;
/**
* If children were left partially iterated / unfinished, this will complete the iteration so we
* are at a comma or end of document/array/object.
*
* @precondition The iterator MUST at or above the given depth.
* @postcondition The iterator is at the given depth.
*/
simdjson_really_inline void skip_unfinished_children(uint32_t container_depth) noexcept;
/**
* Skips a JSON value, whether it is a scalar, array or object.
*/
simdjson_really_inline void skip_value() noexcept;
// NOTE: we don't support a full C++ iterator interface, because we expect people to make
// different calls to advance the iterator based on *their own* state.
simdjson_really_inline bool operator==(const token_iterator &other) const noexcept;
simdjson_really_inline bool operator!=(const token_iterator &other) const noexcept;
simdjson_really_inline bool operator>(const token_iterator &other) const noexcept;
simdjson_really_inline bool operator>=(const token_iterator &other) const noexcept;
simdjson_really_inline bool operator<(const token_iterator &other) const noexcept;
simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept;
private:
simdjson_really_inline token_iterator(const uint8_t *buf, uint32_t *index, uint32_t depth) noexcept;
/**
* Get the JSON text for a given token (relative).
*
* This is not null-terminated; it is a view into the JSON.
*
* @param delta The relative position of the token to retrieve. e.g. 0 = current token,
* 1 = next token, -1 = prev token.
*
*/
simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept;
const uint8_t *buf{};
const uint32_t *index{};
uint32_t depth{};
friend class document;
friend class object;
friend class array;
friend class value;
friend simdjson_really_inline void logger::log_line(const token_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
};
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {

View File

@ -0,0 +1,257 @@
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
simdjson_really_inline value::value() noexcept = default;
simdjson_really_inline value::value(value &&other) noexcept {
*this = std::forward<value>(other);
};
simdjson_really_inline value &value::operator=(value &&other) noexcept {
doc = other.doc;
json = other.json;
other.json = nullptr;
return *this;
}
simdjson_really_inline value::value(document *_doc, const uint8_t *_json) noexcept : doc{_doc}, json{_json} {
SIMDJSON_ASSUME(doc != nullptr);
SIMDJSON_ASSUME(json != nullptr);
}
simdjson_really_inline value::~value() noexcept {
// If the user didn't actually use the value, we need to check if it's an array/object and bump
// depth so that the array/object iteration routines will work correctly.
// PERF TODO this better be elided entirely when people actually use the value. Don't care if it
// gets bumped on the error path unless that's costing us something important.
if (json) {
if (*json == '[' || *json == '{') {
logger::log_start_value(doc->iter, "unused");
doc->iter.depth++;
} else {
logger::log_value(doc->iter, "unused");
}
}
}
simdjson_really_inline value value::start(document *doc) noexcept {
return { doc, doc->iter.advance() };
}
simdjson_really_inline simdjson_result<array> value::get_array() noexcept {
if (*json != '[') {
log_error("not an array");
return array::begin(doc, INCORRECT_TYPE);
}
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return array::begin(doc);
}
simdjson_really_inline simdjson_result<object> value::get_object() noexcept {
if (*json != '{') {
log_error("not an object");
return object::begin(doc, INCORRECT_TYPE);
}
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return object::begin(doc);
}
simdjson_really_inline simdjson_result<raw_json_string> value::get_raw_json_string() noexcept {
log_value("string");
if (*json != '"') { log_error("not a string"); return INCORRECT_TYPE; }
auto result = raw_json_string{&json[1]};
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return result;
}
simdjson_really_inline simdjson_result<std::string_view> value::get_string() noexcept {
auto [str, error] = get_raw_json_string();
if (error) { return error; }
return str.unescape(doc->parser->current_string_buf_loc);
}
simdjson_really_inline simdjson_result<double> value::get_double() noexcept {
log_value("double");
double result;
error_code error;
if ((error = stage2::numberparsing::parse_double(json).get(result))) { log_error("not a double"); return error; }
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return result;
}
simdjson_really_inline simdjson_result<uint64_t> value::get_uint64() noexcept {
log_value("unsigned");
uint64_t result;
error_code error;
if ((error = stage2::numberparsing::parse_unsigned(json).get(result))) { log_error("not a unsigned integer"); return error; }
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return result;
}
simdjson_really_inline simdjson_result<int64_t> value::get_int64() noexcept {
log_value("integer");
int64_t result;
error_code error;
if ((error = stage2::numberparsing::parse_integer(json).get(result))) { log_error("not an integer"); return error; }
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return result;
}
simdjson_really_inline simdjson_result<bool> value::get_bool() noexcept {
log_value("bool");
auto not_true = stage2::atomparsing::str4ncmp(json, "true");
auto not_false = stage2::atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e');
bool error = (not_true && not_false) || stage2::is_not_structural_or_whitespace(json[not_true ? 5 : 4]);
if (error) { log_error("not a boolean"); return INCORRECT_TYPE; }
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return simdjson_result<bool>(!not_true, error ? INCORRECT_TYPE : SUCCESS);
}
simdjson_really_inline bool value::is_null() noexcept {
log_value("null");
if (stage2::atomparsing::str4ncmp(json, "null")) { return false; }
json = nullptr; // Communicate that we have handled the value PERF TODO elided, right?
return true;
}
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline value::operator array() noexcept(false) { return get_array(); }
simdjson_really_inline value::operator object() noexcept(false) { return get_object(); }
simdjson_really_inline value::operator uint64_t() noexcept(false) { return get_uint64(); }
simdjson_really_inline value::operator int64_t() noexcept(false) { return get_int64(); }
simdjson_really_inline value::operator double() noexcept(false) { return get_double(); }
simdjson_really_inline value::operator std::string_view() noexcept(false) { return get_string(); }
simdjson_really_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); }
simdjson_really_inline value::operator bool() noexcept(false) { return get_bool(); }
#endif
simdjson_really_inline array value::begin() noexcept { return get_array().begin(); }
simdjson_really_inline array value::end() noexcept { return {}; }
// TODO this CANNOT be reused. Each time you try, it will get you a new object.
// Probably make it move-only to avoid this issue.
simdjson_really_inline simdjson_result<value> value::operator[](std::string_view key) noexcept {
return get_object()[key];
}
simdjson_really_inline simdjson_result<value> value::operator[](const char *key) noexcept {
return get_object()[key];
}
simdjson_really_inline void value::log_value(const char *type) const noexcept {
logger::log_value(doc->iter, type);
}
simdjson_really_inline void value::log_error(const char *message) const noexcept {
logger::log_error(doc->iter, message);
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::value &&value
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value>(
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(value)
)
{
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::value &&value,
error_code error
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value>(
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(value),
error
)
{
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::simdjson_result(
SIMDJSON_IMPLEMENTATION::ondemand::document *doc,
error_code error
) noexcept :
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value>({ doc, nullptr }, error)
{
}
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::begin() noexcept {
if (error()) { SIMDJSON_IMPLEMENTATION::ondemand::array::begin(first.doc, error()); }
return first.begin();
}
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::end() noexcept {
if (error()) { return {}; }
return first.end();
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator[](std::string_view key) noexcept {
if (error()) { return { first.doc, error() }; }
return first[key];
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator[](const char *key) noexcept {
if (error()) { return { first.doc, error() }; }
return first[key];
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_array() noexcept {
if (error()) { return { first.doc, error() }; }
return first.get_array();
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_object() noexcept {
if (error()) { return { first.doc, error() }; }
return first.get_object();
}
simdjson_really_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_uint64() noexcept {
if (error()) { return error(); }
return first.get_uint64();
}
simdjson_really_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_int64() noexcept {
if (error()) { return error(); }
return first.get_int64();
}
simdjson_really_inline simdjson_result<double> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_double() noexcept {
if (error()) { return error(); }
return first.get_double();
}
simdjson_really_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_string() noexcept {
if (error()) { return error(); }
return first.get_string();
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_raw_json_string() noexcept {
if (error()) { return error(); }
return first.get_raw_json_string();
}
simdjson_really_inline simdjson_result<bool> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_bool() noexcept {
if (error()) { return error(); }
return first.get_bool();
}
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::is_null() noexcept {
if (error()) { return false; }
return first.is_null();
}
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator uint64_t() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator int64_t() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator double() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator std::string_view() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator bool() noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first;
}
#endif
} // namespace simdjson

View File

@ -0,0 +1,127 @@
#include "simdjson/error.h"
namespace {
namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
class array;
class document;
class field;
class object;
class raw_json_string;
/**
* An ephemeral JSON value returned during iteration.
*
* This object must be destroyed before any other iteration occurs.
*/
class value {
public:
simdjson_really_inline value() noexcept;
simdjson_really_inline value(value &&other) noexcept;
simdjson_really_inline value &operator=(value && other) noexcept;
simdjson_really_inline value(const value &) noexcept = delete;
simdjson_really_inline value &operator=(const value &) noexcept = delete;
// Uses RAII to ensure we skip the value if it is unused.
// TODO assert if two values are ever alive at the same time, to ensure they get destroyed
simdjson_really_inline ~value() noexcept;
simdjson_really_inline void skip() noexcept;
simdjson_really_inline simdjson_result<array> get_array() noexcept;
simdjson_really_inline simdjson_result<object> get_object() noexcept;
simdjson_really_inline simdjson_result<uint64_t> get_uint64() noexcept;
simdjson_really_inline simdjson_result<int64_t> get_int64() noexcept;
simdjson_really_inline simdjson_result<double> get_double() noexcept;
simdjson_really_inline simdjson_result<std::string_view> get_string() noexcept;
simdjson_really_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
simdjson_really_inline bool is_null() noexcept;
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline operator array() noexcept(false);
simdjson_really_inline operator object() noexcept(false);
simdjson_really_inline operator uint64_t() noexcept(false);
simdjson_really_inline operator int64_t() noexcept(false);
simdjson_really_inline operator double() noexcept(false);
simdjson_really_inline operator std::string_view() noexcept(false);
simdjson_really_inline operator raw_json_string() noexcept(false);
simdjson_really_inline operator bool() noexcept(false);
#endif
simdjson_really_inline array begin() noexcept;
simdjson_really_inline array end() noexcept;
simdjson_really_inline simdjson_result<value> operator[](std::string_view key) noexcept;
simdjson_really_inline simdjson_result<value> operator[](const char *key) noexcept;
protected:
/**
* Create a value.
*
* Use value::read() instead of this.
*/
simdjson_really_inline value(document *doc, const uint8_t *json) noexcept;
/**
* Read a value.
*
* If the value is an array or object, only the opening brace will be consumed.
*
* @param doc The document containing the value. Iterator must be at the value start position.
*/
static simdjson_really_inline value start(document *doc) noexcept;
simdjson_really_inline void log_value(const char *type) const noexcept;
simdjson_really_inline void log_error(const char *message) const noexcept;
document *doc{}; // For the string buffer (if we need it)
const uint8_t *json{}; // The JSON text of the value
friend class document;
friend class array;
friend class field;
friend struct simdjson_result<value>;
friend struct simdjson_result<document>;
friend struct simdjson_result<field>;
};
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace {
namespace simdjson {
template<>
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> : public internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value> {
public:
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::value &&value, error_code error) noexcept; ///< @private
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> get_array() noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> get_object() noexcept;
simdjson_really_inline simdjson_result<uint64_t> get_uint64() noexcept;
simdjson_really_inline simdjson_result<int64_t> get_int64() noexcept;
simdjson_really_inline simdjson_result<double> get_double() noexcept;
simdjson_really_inline simdjson_result<std::string_view> get_string() noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() noexcept;
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
simdjson_really_inline bool is_null() noexcept;
#if SIMDJSON_EXCEPTIONS
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false);
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false);
simdjson_really_inline operator uint64_t() noexcept(false);
simdjson_really_inline operator int64_t() noexcept(false);
simdjson_really_inline operator double() noexcept(false);
simdjson_really_inline operator std::string_view() noexcept(false);
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false);
simdjson_really_inline operator bool() noexcept(false);
#endif
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array begin() noexcept;
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array end() noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](const char *key) noexcept;
};
} // namespace simdjson

View File

@ -6,7 +6,11 @@ namespace logger {
static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
#if SIMDJSON_VERBOSE_LOGGING
static constexpr const bool LOG_ENABLED = true;
#else
static constexpr const bool LOG_ENABLED = false; static constexpr const bool LOG_ENABLED = false;
#endif
static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_EVENT_LEN = 20;
static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_BUFFER_LEN = 30;
static constexpr const int LOG_SMALL_BUFFER_LEN = 10; static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
@ -33,7 +37,13 @@ namespace logger {
} }
} }
// Logs a single line of static simdjson_really_inline void log_string(const char *message) {
if (LOG_ENABLED) {
printf("%s\n", message);
}
}
// Logs a single line from the stage 2 DOM parser
template<typename S> template<typename S>
static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
if (LOG_ENABLED) { if (LOG_ENABLED) {

View File

@ -161,4 +161,6 @@ SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *
} // namespace SIMDJSON_IMPLEMENTATION } // namespace SIMDJSON_IMPLEMENTATION
} // unnamed namespace } // unnamed namespace
#include "generic/ondemand.h"
#include "haswell/end_implementation.h" #include "haswell/end_implementation.h"

View File

@ -29,7 +29,7 @@ extern "C" {
SIMDJSON_POP_DISABLE_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS
// fastjson has a tricky interface // fastjson has a tricky interface
void on_json_error(void *, UNUSED const fastjson::ErrorContext &ec) { void on_json_error(void *, SIMDJSON_UNUSED const fastjson::ErrorContext &ec) {
// std::cerr<<"ERROR: "<<ec.mesg<<std::endl; // std::cerr<<"ERROR: "<<ec.mesg<<std::endl;
} }
bool fastjson_parse(const char *input) { bool fastjson_parse(const char *input) {