diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 606796c0..2804dd25 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -5,6 +5,7 @@ link_libraries(simdjson-windows-headers test-data) if (TARGET benchmark::benchmark) add_executable(bench_sax bench_sax.cpp) target_link_libraries(bench_sax simdjson-internal-flags simdjson-include-source benchmark::benchmark) + target_compile_options(bench_sax PRIVATE -mavx2 -mbmi -mpclmul -mlzcnt) endif (TARGET benchmark::benchmark) link_libraries(simdjson simdjson-flags) diff --git a/benchmark/bench_sax.cpp b/benchmark/bench_sax.cpp index 1a554a52..874a55fe 100644 --- a/benchmark/bench_sax.cpp +++ b/benchmark/bench_sax.cpp @@ -26,6 +26,76 @@ const int REPETITIONS = 10; #if SIMDJSON_IMPLEMENTATION_HASWELL +#include "twitter/tweet.h" +#include + +SIMDJSON_TARGET_HASWELL + +namespace ondemand_bench { + +using namespace simdjson; +using namespace haswell; + +simdjson_really_inline uint64_t nullable_int(ondemand::value && value) { + if (value.is_null()) { return 0; } + return std::move(value); +} + +simdjson_really_inline twitter::twitter_user read_user(ondemand::object && u) { + return { u["id"], u["screen_name"] }; +} +simdjson_really_inline void read_tweets(ondemand::parser &parser, padded_string &json, std::vector &tweets) { + // Walk the document, parsing the tweets as we go + auto doc = parser.parse(json); + for (ondemand::object tweet : doc["statuses"]) { + tweets.emplace_back(twitter::tweet{ + tweet["created_at"], + tweet["id"], + tweet["text"], + nullable_int(tweet["in_reply_to_status_id"]), + read_user(tweet["user"]), + tweet["retweet_count"], + tweet["favorite_count"] + }); + } +} + +static void bench_tweets(State &state) { + // Load twitter.json to a buffer + padded_string json; + if (auto error = padded_string::load(TWITTER_JSON).get(json)) { cerr << error << endl; return; } + + // Allocate and warm the vector + std::vector tweets; + ondemand::parser parser; + read_tweets(parser, json, tweets); + + // Read tweets + size_t byte_count = 0; + size_t tweet_count = 0; + for (SIMDJSON_UNUSED auto _ : state) { + tweets.clear(); + read_tweets(parser, json, tweets); + byte_count += json.size(); + tweet_count += tweets.size(); + } + // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte + state.counters["Gigabytes"] = benchmark::Counter( + double(byte_count), benchmark::Counter::kIsRate, + benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024 + state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate); + state.counters["tweets"] = Counter(double(tweet_count), benchmark::Counter::kIsRate); +} + +BENCHMARK(bench_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector& v) -> double { + return *(std::max_element(std::begin(v), std::end(v))); + })->DisplayAggregatesOnly(true); + +} // namespace ondemand_bench + + +SIMDJSON_UNTARGET_REGION + #include "twitter/sax_tweet_reader.h" static void sax_tweets(State &state) { @@ -70,17 +140,15 @@ simdjson_really_inline uint64_t nullable_int(dom::element element) { simdjson_really_inline void read_dom_tweets(dom::parser &parser, padded_string &json, std::vector &tweets) { for (dom::element tweet : parser.parse(json)["statuses"]) { auto user = tweet["user"]; - tweets.push_back( - { - tweet["id"], - tweet["text"], - tweet["created_at"], - nullable_int(tweet["in_reply_to_status_id"]), - tweet["retweet_count"], - tweet["favorite_count"], - { user["id"], user["screen_name"] } - } - ); + tweets.emplace_back(twitter::tweet{ + tweet["created_at"], + tweet["id"], + tweet["text"], + nullable_int(tweet["in_reply_to_status_id"]), + { user["id"], user["screen_name"] }, + tweet["retweet_count"], + tweet["favorite_count"] + }); } } @@ -221,6 +289,45 @@ BENCHMARK(dom_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("m #if SIMDJSON_IMPLEMENTATION_HASWELL +SIMDJSON_TARGET_HASWELL + +/*** + * On Demand approach. + **/ +static void ondemand_parse_largerandom(State &state) { + using namespace haswell; + // Load twitter.json to a buffer + const padded_string& json = get_my_json_str(); + + // Allocate + ondemand::parser parser; + error_code error; + if ((error = parser.allocate(json.size()))) { throw error; }; + + // Read + size_t bytes = 0; + for (SIMDJSON_UNUSED auto _ : state) { + std::vector container; + for (ondemand::object p : parser.parse(json)) { + container.emplace_back(my_point{p["x"], p["y"], p["z"]}); + } + bytes += json.size(); + benchmark::DoNotOptimize(container.data()); + + } + // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte + state.counters["Gigabytes"] = benchmark::Counter( + double(bytes), benchmark::Counter::kIsRate, + benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024 + state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate); +} + +SIMDJSON_UNTARGET_REGION + +BENCHMARK(ondemand_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector& v) -> double { + return *(std::max_element(std::begin(v), std::end(v))); + })->DisplayAggregatesOnly(true); + /*** * Next we are going to code the SAX approach. **/ diff --git a/benchmark/twitter/tweet.h b/benchmark/twitter/tweet.h index a7b11896..3d19c0a8 100644 --- a/benchmark/twitter/tweet.h +++ b/benchmark/twitter/tweet.h @@ -6,14 +6,31 @@ namespace twitter { +// { +// "statuses": [ +// { +// "created_at": "Sun Aug 31 00:29:15 +0000 2014", +// "id": 505874924095815700, +// "text": "@aym0566x \n\n名前:前田あゆみ\n第一印象:なんか怖っ!\n今の印象:とりあえずキモい。噛み合わない\n好きなところ:ぶすでキモいとこ😋✨✨\n思い出:んーーー、ありすぎ😊❤️\nLINE交換できる?:あぁ……ごめん✋\nトプ画をみて:照れますがな😘✨\n一言:お前は一生もんのダチ💖", +// "in_reply_to_status_id": null, +// "user": { +// "id": 1186275104, +// "screen_name": "ayuu0123" +// }, +// "retweet_count": 0, +// "favorite_count": 0 +// } +// ] +// } + struct tweet { + std::string_view created_at{}; uint64_t id{}; std::string_view text{}; - std::string_view created_at{}; uint64_t in_reply_to_status_id{}; + twitter_user user{}; uint64_t retweet_count{}; uint64_t favorite_count{}; - twitter_user user{}; }; } // namespace twitter diff --git a/cmake/simdjson-flags.cmake b/cmake/simdjson-flags.cmake index 1c7d33e5..5f549ea1 100644 --- a/cmake/simdjson-flags.cmake +++ b/cmake/simdjson-flags.cmake @@ -133,6 +133,11 @@ if(SIMDJSON_ENABLE_THREADS) target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_THREADS_ENABLED=1) # This will be set in the code automatically. endif() +option(SIMDJSON_VERBOSE_LOGGING, "Enable verbose logging for internal simdjson library development." OFF) +if (SIMDJSON_VERBOSE_LOGGING) + target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_VERBOSE_LOGGING=1) +endif() + if(SIMDJSON_USE_LIBCPP) target_link_libraries(simdjson-flags INTERFACE -stdlib=libc++ -lc++abi) # instead of the above line, we could have used diff --git a/include/simdjson/error.h b/include/simdjson/error.h index 9f9ee06b..0b8a574b 100644 --- a/include/simdjson/error.h +++ b/include/simdjson/error.h @@ -34,6 +34,7 @@ enum error_code { INVALID_JSON_POINTER, ///< Invalid JSON pointer reference INVALID_URI_FRAGMENT, ///< Invalid URI fragment UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. /** @private Number of error codes */ NUM_ERROR_CODES }; diff --git a/include/simdjson/internal/dom_parser_implementation.h b/include/simdjson/internal/dom_parser_implementation.h index 9eca8efe..01bac46e 100644 --- a/include/simdjson/internal/dom_parser_implementation.h +++ b/include/simdjson/internal/dom_parser_implementation.h @@ -152,8 +152,20 @@ protected: * Defaults to DEFAULT_MAX_DEPTH. */ size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_really_inline dom_parser_implementation() noexcept; + simdjson_really_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_really_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_really_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_really_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; }; // class dom_parser_implementation +simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_really_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } diff --git a/src/error.cpp b/src/error.cpp index 60fb2339..c08bb6c7 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -27,7 +27,8 @@ namespace internal { { IO_ERROR, "Error reading the file." }, { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." }, { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." }, - { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" } + { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, + { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." } }; // error_messages[] } // namespace internal diff --git a/src/generic/dom_parser_implementation.h b/src/generic/dom_parser_implementation.h index 28b6d2b9..cd62b146 100644 --- a/src/generic/dom_parser_implementation.h +++ b/src/generic/dom_parser_implementation.h @@ -25,9 +25,11 @@ public: /** Document passed to stage 2 */ dom::document *doc{}; - simdjson_really_inline dom_parser_implementation(); + simdjson_really_inline dom_parser_implementation() noexcept; + dom_parser_implementation(dom_parser_implementation &&other) noexcept; + dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; SIMDJSON_WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; SIMDJSON_WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; @@ -47,7 +49,9 @@ public: namespace { namespace SIMDJSON_IMPLEMENTATION { -simdjson_really_inline dom_parser_implementation::dom_parser_implementation() {} +simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_really_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; // Leaving these here so they can be inlined if so desired SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { diff --git a/src/generic/ondemand.h b/src/generic/ondemand.h new file mode 100644 index 00000000..280f3e5e --- /dev/null +++ b/src/generic/ondemand.h @@ -0,0 +1,19 @@ +#include "generic/ondemand/logger.h" +#include "generic/ondemand/raw_json_string.h" +#include "generic/ondemand/token_iterator.h" +#include "generic/ondemand/document.h" +#include "generic/ondemand/value.h" +#include "generic/ondemand/array.h" +#include "generic/ondemand/field.h" +#include "generic/ondemand/object.h" +#include "generic/ondemand/parser.h" + +#include "generic/ondemand/logger-inl.h" +#include "generic/ondemand/raw_json_string-inl.h" +#include "generic/ondemand/token_iterator-inl.h" +#include "generic/ondemand/document-inl.h" +#include "generic/ondemand/value-inl.h" +#include "generic/ondemand/array-inl.h" +#include "generic/ondemand/field-inl.h" +#include "generic/ondemand/object-inl.h" +#include "generic/ondemand/parser-inl.h" diff --git a/src/generic/ondemand/array-inl.h b/src/generic/ondemand/array-inl.h new file mode 100644 index 00000000..92af6a62 --- /dev/null +++ b/src/generic/ondemand/array-inl.h @@ -0,0 +1,137 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= doc->iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == doc->iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > doc->iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. doc->iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// doc->iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, doc->iter.depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has doc->iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, doc->iter.depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_really_inline array::array() noexcept = default; +simdjson_really_inline array::array(document *_doc, error_code _error) noexcept + : doc{_doc}, depth{_doc->iter.depth}, at_start{!_error}, error{_error} +{ +} + +simdjson_really_inline bool array::finished() const noexcept { + return doc->iter.depth < depth; +} +simdjson_really_inline void array::finish(bool log_end) noexcept { + doc->iter.depth = depth - 1; + if (log_end) { logger::log_end_value(doc->iter, "array"); } +} + +simdjson_really_inline array array::begin(document *doc, error_code error) noexcept { + doc->iter.depth++; + return array(doc, error); +} +simdjson_really_inline array array::begin() noexcept { + return *this; +} +simdjson_really_inline array array::end() noexcept { + return {}; +} + +simdjson_really_inline simdjson_result array::operator*() noexcept { + if (error) { finish(); return { doc, error }; } + return value::start(doc); +} +simdjson_really_inline bool array::operator==(const array &other) noexcept { + return !(*this != other); +} +simdjson_really_inline bool array::operator!=(const array &) noexcept { + // If we're at the start, check for empty array. + if (at_start) { + if (*doc->iter.peek() == ']') { + doc->iter.advance(); + logger::log_value(doc->iter, "empty array"); + finish(); + } else { + logger::log_start_value(doc->iter, "array"); + } + } + return !finished(); +} +simdjson_really_inline array &array::operator++() noexcept { + if (!finished()) { + SIMDJSON_ASSUME(!error); + SIMDJSON_ASSUME(!at_start); + doc->iter.skip_unfinished_children(depth); + switch (*doc->iter.advance()) { + case ',': + break; + case ']': + finish(true); + break; + default: + logger::log_error(doc->iter, "Missing comma between array elements"); + finish(); + error = TAPE_ERROR; + } + } + return *this; +} + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // unnamed namespace + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::array &&value +) noexcept : + internal::simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::document *doc, + error_code error +) noexcept : + internal::simdjson_result_base({ doc, error }, error) +{ +} + +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result::begin() noexcept { + return first; +} +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result::end() noexcept { + return {}; +} + +} // namespace simdjson diff --git a/src/generic/ondemand/array.h b/src/generic/ondemand/array.h new file mode 100644 index 00000000..04298a8e --- /dev/null +++ b/src/generic/ondemand/array.h @@ -0,0 +1,111 @@ +#include "simdjson/error.h" + +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + simdjson_really_inline array() noexcept; + + simdjson_really_inline array begin() noexcept; + simdjson_really_inline array end() noexcept; + + // + // Iterator interface + // + // Reads key and value, yielding them to the user. + simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < doc->iter.depth. + simdjson_really_inline bool operator==(const array &) noexcept; + // Assumes it's being compared with the end. true if depth >= doc->iter.depth. + simdjson_really_inline bool operator!=(const array &) noexcept; + // Checks for ']' and ',' + simdjson_really_inline array &operator++() noexcept; + +protected: + /** + * Begin array iteration. + * + * @param doc The document containing the array. The iterator must be just after the opening `[`. + * doc->iter.depth will be incremented automatically to reflect the nesting level. + * @param error If this is not SUCCESS, creates an error chained array. + */ + static simdjson_really_inline array begin(document *doc, error_code error=SUCCESS) noexcept; + + /** + * Internal array creation. Call array::begin(doc[, error]) instead of this. + * + * @param doc The document containing the array. doc->iter.depth must already be incremented to + * reflect the array's depth. If there is no error, the iterator must be just after + * the opening `[`. + * @param error The error to report. If the error is not SUCCESS, this is an error chained object. + */ + simdjson_really_inline array(document *doc, error_code error) noexcept; + + /** Check whether iteration is complete. */ + bool finished() const noexcept; + /** Decrements depth to mark iteration as complete. */ + void finish(bool log_end=false) noexcept; + + /** + * Document containing this array. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the array + * is first used, and never changes afterwards. + */ + document *doc{}; + /** + * Depth of the array. + * + * If doc->iter.depth < json.depth, we have finished. + * + * PERF NOTE: expected to be elided entirely, as any individual array's depth is a constant + * knowable at compile time, incremented each time we nest an object or array. + */ + uint32_t depth{}; + /** + * Whether we're at the beginning of the array, or after. + * + * PERF NOTE: expected to be elided into inline control flow, as it is true for the first + * iteration and false thereafter, and compilers with SSA optimization tend to analyze the first + * iteration of any loop separately. + */ + bool at_start{}; + /** + * Error, if there is one. Errors are only yielded once. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{}; + + friend class value; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private + + simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; + + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array begin() noexcept; + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array end() noexcept; +}; + +} // namespace simdjson diff --git a/src/generic/ondemand/document-inl.h b/src/generic/ondemand/document-inl.h new file mode 100644 index 00000000..68946697 --- /dev/null +++ b/src/generic/ondemand/document-inl.h @@ -0,0 +1,130 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline document::document(document &&other) noexcept : + iter{std::forward(other.iter)}, + parser{other.parser} +{ + if (!at_start()) { logger::log_error(iter, "Cannot move document after it has been used"); abort(); } + other.parser = nullptr; +} +simdjson_really_inline document &document::operator=(document &&other) noexcept { + iter = std::forward(other.iter); + parser = other.parser; + if (!at_start()) { logger::log_error(iter, "Cannot move document after it has been used"); abort(); } + other.parser = nullptr; + return *this; +} + +simdjson_really_inline document::document(ondemand::parser *_parser) noexcept + : iter(_parser->dom_parser.buf, _parser->dom_parser.structural_indexes.get(), 0), parser{_parser} +{ + logger::log_headers(); + parser->current_string_buf_loc = parser->string_buf.get(); + logger::log_start_value(iter, "document"); +} +simdjson_really_inline document::~document() noexcept { + // Release the string buf so it can be reused by the next document + if (parser) { + logger::log_end_value(iter, "document"); + parser->current_string_buf_loc = nullptr; + } +} + +simdjson_really_inline value document::as_value() noexcept { + + if (!at_start()) { + logger::log_error(iter, "Document value can only be used once! ondemand::document is a forward-only input iterator."); + abort(); // TODO is there anything softer we can do? I'd rather not make this a simdjson_result just for user error. + } + return value::start(this); +} +simdjson_really_inline bool document::at_start() const noexcept { return iter.index == parser->dom_parser.structural_indexes.get(); } + +simdjson_really_inline simdjson_result document::get_array() & noexcept { return as_value().get_array(); } +simdjson_really_inline simdjson_result document::get_object() & noexcept { return as_value().get_object(); } +simdjson_really_inline simdjson_result document::get_uint64() noexcept { return as_value().get_uint64(); } +simdjson_really_inline simdjson_result document::get_int64() noexcept { return as_value().get_int64(); } +simdjson_really_inline simdjson_result document::get_double() noexcept { return as_value().get_double(); } +simdjson_really_inline simdjson_result document::get_string() & noexcept { return as_value().get_string(); } +simdjson_really_inline simdjson_result document::get_raw_json_string() & noexcept { return as_value().get_raw_json_string(); } +simdjson_really_inline simdjson_result document::get_bool() noexcept { return as_value().get_bool(); } +simdjson_really_inline bool document::is_null() noexcept { return as_value().is_null(); } + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline document::operator array() & noexcept(false) { return as_value(); } +simdjson_really_inline document::operator object() & noexcept(false) { return as_value(); } +simdjson_really_inline document::operator uint64_t() noexcept(false) { return as_value(); } +simdjson_really_inline document::operator int64_t() noexcept(false) { return as_value(); } +simdjson_really_inline document::operator double() noexcept(false) { return as_value(); } +simdjson_really_inline document::operator std::string_view() & noexcept(false) { return as_value(); } +simdjson_really_inline document::operator raw_json_string() & noexcept(false) { return as_value(); } +simdjson_really_inline document::operator bool() noexcept(false) { return as_value(); } +#endif + +simdjson_really_inline array document::begin() & noexcept { return as_value().begin(); } +simdjson_really_inline array document::end() & noexcept { return {}; } +simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { return as_value()[key]; } + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::document &&value +) noexcept : + internal::simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::document &&value, + error_code error +) noexcept : + internal::simdjson_result_base( + std::forward(value), + error + ) +{ +} + +// TODO make sure the passing of a pointer here isn't about to cause us trouble +simdjson_really_inline simdjson_result simdjson_result::as_value() noexcept { + if (error()) { return { &first, error() }; } + return first.as_value(); +} +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result::begin() & noexcept { return as_value().begin(); } +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result::end() & noexcept { return {}; } +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + return as_value()[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + return as_value()[key]; +} + +simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { return as_value().get_array(); } +simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { return as_value().get_object(); } +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { return as_value().get_uint64(); } +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { return as_value().get_int64(); } +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { return as_value().get_double(); } +simdjson_really_inline simdjson_result simdjson_result::get_string() & noexcept { return as_value().get_string(); } +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() & noexcept { return as_value().get_raw_json_string(); } +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { return as_value().get_bool(); } +simdjson_really_inline bool simdjson_result::is_null() noexcept { return as_value().is_null(); } + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false) { return as_value(); } +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false) { return as_value(); } +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { return as_value(); } +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { return as_value(); } +simdjson_really_inline simdjson_result::operator double() noexcept(false) { return as_value(); } +simdjson_really_inline simdjson_result::operator std::string_view() & noexcept(false) { return as_value(); } +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { return as_value(); } +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { return as_value(); } +#endif + +} // namespace simdjson diff --git a/src/generic/ondemand/document.h b/src/generic/ondemand/document.h new file mode 100644 index 00000000..5f5b0a64 --- /dev/null +++ b/src/generic/ondemand/document.h @@ -0,0 +1,116 @@ +#include "simdjson/error.h" + +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +class parser; +class array; +class object; +class value; +class raw_json_string; + +/** + * A JSON document iteration. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + simdjson_really_inline document() noexcept = default; + simdjson_really_inline document(document &&other) noexcept; + simdjson_really_inline document &operator=(document &&other) noexcept; + simdjson_really_inline document(const document &other) = delete; + simdjson_really_inline document &operator=(const document &other) = delete; + simdjson_really_inline ~document() noexcept; + + simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() & noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline bool is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator array() & noexcept(false); + simdjson_really_inline operator object() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() & noexcept(false); + simdjson_really_inline operator raw_json_string() & noexcept(false); + simdjson_really_inline operator bool() noexcept(false); +#endif + + simdjson_really_inline array begin() & noexcept; + simdjson_really_inline array end() & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + +protected: + simdjson_really_inline document(ondemand::parser *parser) noexcept; + simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; + + token_iterator iter; ///< Current position in the document + ondemand::parser *parser; + + simdjson_really_inline value as_value() noexcept; + simdjson_really_inline bool at_start() const noexcept; + + friend struct simdjson_result; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; +}; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document &&value, error_code error) noexcept; ///< @private + + simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() & noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline bool is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() & noexcept(false); + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false); + simdjson_really_inline operator bool() noexcept(false); +#endif + + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array begin() & noexcept; + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array end() & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + +protected: + simdjson_really_inline simdjson_result as_value() noexcept; +}; + +} // namespace simdjson diff --git a/src/generic/ondemand/field-inl.h b/src/generic/ondemand/field-inl.h new file mode 100644 index 00000000..6c6c9072 --- /dev/null +++ b/src/generic/ondemand/field-inl.h @@ -0,0 +1,79 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline field::field(field &&other) noexcept = default; +simdjson_really_inline field &field::operator=(field &&other) noexcept = default; + +simdjson_really_inline field::field(const uint8_t *key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_really_inline simdjson_result field::start(document *doc) noexcept { + const uint8_t *key = doc->iter.advance(); + if ('"' != *key) { + logger::log_error(doc->iter, "Missing key in object field"); + return { doc, TAPE_ERROR }; + } + return field::start(doc, key); +} + +simdjson_really_inline simdjson_result field::start(document *doc, const uint8_t *key) noexcept { + if (':' != *doc->iter.advance()) { + logger::log_error(doc->iter, "Missing colon in object field"); + return { doc, TAPE_ERROR }; + } + return field(key, value::start(doc)); +} + +simdjson_really_inline simdjson_result field::start_value(document *doc) noexcept { + if (':' != *doc->iter.advance()) { + logger::log_error(doc->iter, "Missing colon in object field"); + return { doc, TAPE_ERROR }; + } + return value::start(doc); +} + +simdjson_really_inline raw_json_string field::key() const noexcept { + return first; +} + +simdjson_really_inline value &field::value() noexcept { + return second; +} + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::field &&value +) noexcept : + internal::simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::document *doc, + error_code error +) noexcept : + internal::simdjson_result_base( + { nullptr, { doc, nullptr } }, + error + ) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_really_inline simdjson_result simdjson_result::value() noexcept { + return { std::move(first.value()), error() }; +} + +} // namespace simdjson diff --git a/src/generic/ondemand/field.h b/src/generic/ondemand/field.h new file mode 100644 index 00000000..ce80633f --- /dev/null +++ b/src/generic/ondemand/field.h @@ -0,0 +1,45 @@ +#include "simdjson/error.h" + +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +/** + * A JSON field in an field. + */ +class field : public std::pair { +public: + simdjson_really_inline field() noexcept = default; + simdjson_really_inline field(field &&other) noexcept; + simdjson_really_inline field &operator=(field &&other) noexcept; + simdjson_really_inline field(const field &other) noexcept = delete; + simdjson_really_inline field &operator=(const field &other) noexcept = delete; + + simdjson_really_inline raw_json_string key() const noexcept; + simdjson_really_inline ondemand::value &value() noexcept; +protected: + simdjson_really_inline field(const uint8_t *key, ondemand::value &&value) noexcept; + static simdjson_really_inline simdjson_result start(document *doc) noexcept; + static simdjson_really_inline simdjson_result start(document *doc, const uint8_t *key) noexcept; + static simdjson_really_inline simdjson_result start_value(document *doc) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private + + simdjson_really_inline simdjson_result key() noexcept; + simdjson_really_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson diff --git a/src/generic/ondemand/logger-inl.h b/src/generic/ondemand/logger-inl.h new file mode 100644 index 00000000..0fd38378 --- /dev/null +++ b/src/generic/ondemand/logger-inl.h @@ -0,0 +1,70 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { +namespace logger { + +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static constexpr const int LOG_INDEX_LEN = 5; +using SIMDJSON_IMPLEMENTATION::logger::DASHES; +using SIMDJSON_IMPLEMENTATION::logger::printable_char; + +simdjson_really_inline void log_event(const token_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); +} +simdjson_really_inline void log_value(const token_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); +} +simdjson_really_inline void log_start_value(const token_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta); +} +simdjson_really_inline void log_end_value(const token_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "-", type, "", delta, depth_delta); +} +simdjson_really_inline void log_error(const token_iterator &iter, const char *error, int delta, int depth_delta) noexcept { + log_line(iter, "", "ERROR", error, delta, depth_delta); +} + +simdjson_really_inline void log_headers() noexcept { + if (LOG_ENABLED) { + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } +} + +simdjson_really_inline void log_line(const token_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { + const int indent = (iter.depth+depth_delta)*2; + printf("| %*s%s%-*s ", + indent, "", + title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title + ); + { + // Print the current structural. + printf("| "); + for (int i=0;i= doc->iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == doc->iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > doc->iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. doc->iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// doc->iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, doc->iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has doc->iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, doc->iter.depth < depth, at_start == false, and error == SUCCESS. +// + +simdjson_really_inline object::object() noexcept = default; +simdjson_really_inline object::object(document *_doc, error_code _error) noexcept + : doc{_doc}, depth{_doc->iter.depth}, at_start{!_error}, error{_error} +{ +} + +simdjson_really_inline bool object::finished() const noexcept { + return doc->iter.depth < depth; +} +simdjson_really_inline void object::finish(bool log_end) noexcept { + doc->iter.depth = depth - 1; + if (log_end) { logger::log_end_value(doc->iter, "object"); } +} + +simdjson_really_inline void object::first_field() noexcept { + at_start = false; + // If it's empty, shut down + if (*doc->iter.peek() == '}') { + logger::log_value(doc->iter, "empty object", "", -1, -1); + doc->iter.advance(); + finish(); + } else { + logger::log_start_value(doc->iter, "object", -1, -1); + } +} +simdjson_really_inline simdjson_result object::operator[](const std::string_view key) noexcept { + if (finished()) { return { doc, NO_SUCH_FIELD }; } + if (error) { finish(); return { doc, error }; } + + if (at_start) { + first_field(); + } else { + doc->iter.skip_unfinished_children(depth); + switch (*doc->iter.advance()) { + case ',': + break; + case '}': + finish(true); + return { doc, NO_SUCH_FIELD }; + default: + logger::log_error(doc->iter, "Missing comma between object fields"); + finish(); + return { doc, TAPE_ERROR }; + } + } + + while (true) { + const uint8_t *actual_key = doc->iter.advance(); + switch (*(actual_key++)) { + case '"': + if (raw_json_string(actual_key) == key) { + logger::log_event(doc->iter, "match", key); + return field::start_value(doc); + } + logger::log_event(doc->iter, "no match", key); + doc->iter.advance(); // "key" : + doc->iter.skip_value(); // "key" : + switch (*doc->iter.advance()) { + case ',': + break; + case '}': + logger::log_event(doc->iter, "no key found", key); + finish(true); + return { doc, NO_SUCH_FIELD }; + default: + logger::log_error(doc->iter, "Missing comma between object fields"); + finish(); + return { doc, TAPE_ERROR }; + } + break; + default: + logger::log_error(doc->iter, "Key is not a string"); + finish(); + return { doc, TAPE_ERROR }; + } + } +} + +simdjson_really_inline object object::begin(document *doc, error_code error) noexcept { + doc->iter.depth++; + return object(doc, error); +} +simdjson_really_inline object object::begin() noexcept { + return *this; +} +simdjson_really_inline object object::end() noexcept { + return {}; +} + +simdjson_really_inline simdjson_result object::operator*() noexcept { + if (error) { finish(); return { doc, error }; } + return field::start(doc); +} +simdjson_really_inline bool object::operator==(const object &other) noexcept { + return !(*this != other); +} +simdjson_really_inline bool object::operator!=(const object &) noexcept { + // If we're at the start, check for the first field. + if (at_start) { first_field(); } + return !finished(); +} +simdjson_really_inline object &object::operator++() noexcept { + if (!finished()) { + SIMDJSON_ASSUME(!error); + SIMDJSON_ASSUME(!at_start); + doc->iter.skip_unfinished_children(depth); + switch (*doc->iter.advance()) { + case ',': + break; + case '}': + finish(true); + break; + default: + logger::log_error(doc->iter, "Missing comma between object fields"); + finish(); + error = TAPE_ERROR; + } + } + return *this; +} + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept + : internal::simdjson_result_base({ doc, error }, error) {} + +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object simdjson_result::begin() noexcept { + return first; +} +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object simdjson_result::end() noexcept { + return {}; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return { first.doc, error() }; } + return first[key]; +} + +} // namespace simdjson diff --git a/src/generic/ondemand/object.h b/src/generic/ondemand/object.h new file mode 100644 index 00000000..21826bf8 --- /dev/null +++ b/src/generic/ondemand/object.h @@ -0,0 +1,111 @@ +#include "simdjson/error.h" + +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + simdjson_really_inline object() noexcept; + + simdjson_really_inline object begin() noexcept; + simdjson_really_inline object end() noexcept; + simdjson_really_inline simdjson_result operator[](const std::string_view key) noexcept; + + // + // Iterator interface + // + // Reads key and value, yielding them to the user. + simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < doc->iter.depth. + simdjson_really_inline bool operator==(const object &) noexcept; + // Assumes it's being compared with the end. true if depth >= doc->iter.depth. + simdjson_really_inline bool operator!=(const object &) noexcept; + // Checks for '}' and ',' + simdjson_really_inline object &operator++() noexcept; + +protected: + /** + * Begin object iteration. + * + * @param doc The document containing the object. The iterator must be just after the opening `{`. + * @param error If this is not SUCCESS, creates an error chained object. + */ + static simdjson_really_inline object begin(document *doc, error_code error=SUCCESS) noexcept; + + /** + * Internal object creation. Call object::begin(doc[, error]) instead of this. + * + * @param doc The document containing the object. doc->depth must already be incremented to + * reflect the object's depth. If there is no error, the iterator must be just after + * the opening `{`. + * @param error The error to report. If the error is not SUCCESS, this is an error chained object. + */ + simdjson_really_inline object(document *doc, error_code error) noexcept; + + /** Check whether iteration is complete. */ + simdjson_really_inline bool finished() const noexcept; + /** Decrements depth to mark iteration as complete. */ + simdjson_really_inline void finish(bool log_end=false) noexcept; + simdjson_really_inline void first_field() noexcept; + + /** + * Document containing the primary iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + document *doc{}; + /** + * Depth of the object. + * + * If doc->iter.depth < json.depth, we have finished. + * + * PERF NOTE: expected to be elided entirely, as any individual object's depth is a constant + * knowable at compile time, incremented each time we nest an object() or array(). + */ + uint32_t depth{}; + /** + * Whether we're at the beginning of the object, or after. + * + * PERF NOTE: expected to be elided into inline control flow, as it is true for the first + * iteration and false thereafter, and compilers with SSA optimization tend to analyze the first + * iteration of any loop separately. + */ + bool at_start{}; + /** + * Error, if there is one. Errors are only yielded once. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private + + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object begin() noexcept; + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::object end() noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; +}; + +} // namespace simdjson diff --git a/src/generic/ondemand/parser-inl.h b/src/generic/ondemand/parser-inl.h new file mode 100644 index 00000000..5879449f --- /dev/null +++ b/src/generic/ondemand/parser-inl.h @@ -0,0 +1,42 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity == _capacity && new_max_depth == _max_depth) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + _max_depth = 0; + // The most string buffer we could possibly need is capacity-2 (a string the whole document long). + // Allocate up to capacity so we don't have to check for capacity == 0 or 1. + string_buf.reset(new (std::nothrow) uint8_t[new_capacity]); + SIMDJSON_TRY( dom_parser.set_capacity(new_capacity) ); + SIMDJSON_TRY( dom_parser.set_max_depth(DEFAULT_MAX_DEPTH) ); + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result parser::parse(const padded_string &buf) noexcept { + if (current_string_buf_loc) { + return { this, PARSER_IN_USE }; + } + + // Allocate if needed + error_code error; + if (_capacity < buf.size()) { + error = allocate(buf.size(), _max_depth); + if (error) { + return { this, error }; + } + } + + // Run stage 1. + error = dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false); + return { this, error }; +} + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { diff --git a/src/generic/ondemand/parser.h b/src/generic/ondemand/parser.h new file mode 100644 index 00000000..6114d783 --- /dev/null +++ b/src/generic/ondemand/parser.h @@ -0,0 +1,40 @@ +#include "simdjson/error.h" + +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +class array; +class object; +class value; +class raw_json_string; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + simdjson_really_inline parser() noexcept = default; + simdjson_really_inline parser(parser &&other) noexcept = default; + simdjson_really_inline parser(const parser &other) = delete; + simdjson_really_inline parser &operator=(const parser &other) = delete; + + SIMDJSON_WARN_UNUSED error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + SIMDJSON_WARN_UNUSED simdjson_result parse(const padded_string &json) noexcept; +private: + dom_parser_implementation dom_parser{}; + size_t _capacity{0}; + size_t _max_depth{0}; + std::unique_ptr string_buf{}; + uint8_t *current_string_buf_loc{}; + + friend class raw_json_string; + friend class document; + friend class value; +}; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { diff --git a/src/generic/ondemand/raw_json_string-inl.h b/src/generic/ondemand/raw_json_string-inl.h new file mode 100644 index 00000000..d45b4c10 --- /dev/null +++ b/src/generic/ondemand/raw_json_string-inl.h @@ -0,0 +1,28 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline raw_json_string::raw_json_string() noexcept : buf{nullptr} {} // for constructing a simdjson_result +simdjson_really_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} +simdjson_really_inline raw_json_string::raw_json_string(const raw_json_string &other) noexcept : buf{other.buf} {} +simdjson_really_inline raw_json_string &raw_json_string::operator=(const raw_json_string &other) noexcept { buf = other.buf; return *this; } +simdjson_really_inline const char * raw_json_string::raw() const noexcept { return (const char *)buf; } +simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result raw_json_string::unescape(uint8_t *&dst) const noexcept { + uint8_t *end = stage2::stringparsing::parse_string(buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result((const char *)dst, end-dst); + dst = end; + return result; +} + +simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept { + return !strncmp(a.raw(), b.data(), b.size()); +} + +simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept { + return b == a; +} + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { diff --git a/src/generic/ondemand/raw_json_string.h b/src/generic/ondemand/raw_json_string.h new file mode 100644 index 00000000..e4aea05e --- /dev/null +++ b/src/generic/ondemand/raw_json_string.h @@ -0,0 +1,31 @@ +#include "simdjson/error.h" + +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote (") + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + */ +class raw_json_string { +public: + simdjson_really_inline raw_json_string() noexcept; + simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; + simdjson_really_inline raw_json_string(const raw_json_string &other) noexcept; + simdjson_really_inline raw_json_string &operator=(const raw_json_string &other) noexcept; + simdjson_really_inline const char * raw() const noexcept; + simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result unescape(uint8_t *&dst) const noexcept; +private: + const uint8_t * buf; + friend class object; +}; + +simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept; +simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { diff --git a/src/generic/ondemand/token_iterator-inl.h b/src/generic/ondemand/token_iterator-inl.h new file mode 100644 index 00000000..215c546c --- /dev/null +++ b/src/generic/ondemand/token_iterator-inl.h @@ -0,0 +1,76 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline token_iterator::token_iterator() noexcept = default; +simdjson_really_inline token_iterator::token_iterator(token_iterator &&other) noexcept = default; +simdjson_really_inline token_iterator &token_iterator::operator=(token_iterator &&other) noexcept = default; +simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, uint32_t *_index, uint32_t _depth) noexcept + : buf{_buf}, index{_index}, depth{_depth} +{ +} + +simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(index+delta)]; +} +simdjson_really_inline const uint8_t *token_iterator::advance() noexcept { + return &buf[*(index++)]; +} +simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(index+delta); +} + +simdjson_really_inline void token_iterator::skip_unfinished_children(uint32_t container_depth) noexcept { + SIMDJSON_ASSUME(depth >= container_depth); + while (depth != container_depth) { + switch (*advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + case ']': case '}': depth--; logger::log_end_value(*this, "skip"); break; + // PERF TODO does it skip the depth check when we don't decrement depth? + case '[': case '{': logger::log_start_value(*this, "skip"); depth++; break; + default: logger::log_value(*this, "skip", ""); break; + } + } +} + +simdjson_really_inline void token_iterator::skip_value() noexcept { + uint32_t child_depth = 0; + do { + switch (*advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + case ']': case '}': child_depth--; logger::log_end_value(*this, "skip", -1, child_depth); break; + // PERF TODO does it skip the depth check when we don't decrement depth? + case '[': case '{': logger::log_start_value(*this, "skip", -1, child_depth); child_depth++; break; + default: logger::log_value(*this, "skip", "", -1, child_depth); break; + } + } while (child_depth != 0); +} + +simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return index == other.index; +} +simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return index != other.index; +} +simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return index > other.index; +} +simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return index >= other.index; +} +simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return index < other.index; +} +simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return index <= other.index; +} + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { diff --git a/src/generic/ondemand/token_iterator.h b/src/generic/ondemand/token_iterator.h new file mode 100644 index 00000000..6a7d9d4f --- /dev/null +++ b/src/generic/ondemand/token_iterator.h @@ -0,0 +1,88 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +/** + * A JSON token iterator. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + simdjson_really_inline token_iterator() noexcept; + simdjson_really_inline token_iterator(token_iterator &&other) noexcept; + simdjson_really_inline token_iterator &operator=(token_iterator &&other) noexcept; + simdjson_really_inline token_iterator(const token_iterator &other) noexcept = delete; + simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = delete; + + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Advance to the next token (returning the current one). + * + * Does not check or update depth/expect_value. Caller is responsible for that. + */ + simdjson_really_inline const uint8_t *advance() noexcept; + + /** + * If children were left partially iterated / unfinished, this will complete the iteration so we + * are at a comma or end of document/array/object. + * + * @precondition The iterator MUST at or above the given depth. + * @postcondition The iterator is at the given depth. + */ + simdjson_really_inline void skip_unfinished_children(uint32_t container_depth) noexcept; + + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_really_inline void skip_value() noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_really_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; + +private: + simdjson_really_inline token_iterator(const uint8_t *buf, uint32_t *index, uint32_t depth) noexcept; + + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + */ + simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept; + + const uint8_t *buf{}; + const uint32_t *index{}; + uint32_t depth{}; + + friend class document; + friend class object; + friend class array; + friend class value; + friend simdjson_really_inline void logger::log_line(const token_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; +}; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { diff --git a/src/generic/ondemand/value-inl.h b/src/generic/ondemand/value-inl.h new file mode 100644 index 00000000..1ca697a5 --- /dev/null +++ b/src/generic/ondemand/value-inl.h @@ -0,0 +1,257 @@ +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline value::value() noexcept = default; +simdjson_really_inline value::value(value &&other) noexcept { + *this = std::forward(other); +}; +simdjson_really_inline value &value::operator=(value &&other) noexcept { + doc = other.doc; + json = other.json; + other.json = nullptr; + return *this; +} +simdjson_really_inline value::value(document *_doc, const uint8_t *_json) noexcept : doc{_doc}, json{_json} { + SIMDJSON_ASSUME(doc != nullptr); + SIMDJSON_ASSUME(json != nullptr); +} + +simdjson_really_inline value::~value() noexcept { + // If the user didn't actually use the value, we need to check if it's an array/object and bump + // depth so that the array/object iteration routines will work correctly. + // PERF TODO this better be elided entirely when people actually use the value. Don't care if it + // gets bumped on the error path unless that's costing us something important. + if (json) { + if (*json == '[' || *json == '{') { + logger::log_start_value(doc->iter, "unused"); + doc->iter.depth++; + } else { + logger::log_value(doc->iter, "unused"); + } + } +} + +simdjson_really_inline value value::start(document *doc) noexcept { + return { doc, doc->iter.advance() }; +} + +simdjson_really_inline simdjson_result value::get_array() noexcept { + if (*json != '[') { + log_error("not an array"); + return array::begin(doc, INCORRECT_TYPE); + } + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return array::begin(doc); +} +simdjson_really_inline simdjson_result value::get_object() noexcept { + if (*json != '{') { + log_error("not an object"); + return object::begin(doc, INCORRECT_TYPE); + } + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return object::begin(doc); +} +simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { + log_value("string"); + if (*json != '"') { log_error("not a string"); return INCORRECT_TYPE; } + auto result = raw_json_string{&json[1]}; + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return result; +} +simdjson_really_inline simdjson_result value::get_string() noexcept { + auto [str, error] = get_raw_json_string(); + if (error) { return error; } + return str.unescape(doc->parser->current_string_buf_loc); +} +simdjson_really_inline simdjson_result value::get_double() noexcept { + log_value("double"); + double result; + error_code error; + if ((error = stage2::numberparsing::parse_double(json).get(result))) { log_error("not a double"); return error; } + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return result; +} +simdjson_really_inline simdjson_result value::get_uint64() noexcept { + log_value("unsigned"); + uint64_t result; + error_code error; + if ((error = stage2::numberparsing::parse_unsigned(json).get(result))) { log_error("not a unsigned integer"); return error; } + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return result; +} +simdjson_really_inline simdjson_result value::get_int64() noexcept { + log_value("integer"); + int64_t result; + error_code error; + if ((error = stage2::numberparsing::parse_integer(json).get(result))) { log_error("not an integer"); return error; } + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return result; +} +simdjson_really_inline simdjson_result value::get_bool() noexcept { + log_value("bool"); + auto not_true = stage2::atomparsing::str4ncmp(json, "true"); + auto not_false = stage2::atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || stage2::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { log_error("not a boolean"); return INCORRECT_TYPE; } + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return simdjson_result(!not_true, error ? INCORRECT_TYPE : SUCCESS); +} +simdjson_really_inline bool value::is_null() noexcept { + log_value("null"); + if (stage2::atomparsing::str4ncmp(json, "null")) { return false; } + json = nullptr; // Communicate that we have handled the value PERF TODO elided, right? + return true; +} + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline value::operator array() noexcept(false) { return get_array(); } +simdjson_really_inline value::operator object() noexcept(false) { return get_object(); } +simdjson_really_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_really_inline value::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_really_inline value::operator double() noexcept(false) { return get_double(); } +simdjson_really_inline value::operator std::string_view() noexcept(false) { return get_string(); } +simdjson_really_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_really_inline value::operator bool() noexcept(false) { return get_bool(); } +#endif + +simdjson_really_inline array value::begin() noexcept { return get_array().begin(); } +simdjson_really_inline array value::end() noexcept { return {}; } +// TODO this CANNOT be reused. Each time you try, it will get you a new object. +// Probably make it move-only to avoid this issue. +simdjson_really_inline simdjson_result value::operator[](std::string_view key) noexcept { + return get_object()[key]; +} +simdjson_really_inline simdjson_result value::operator[](const char *key) noexcept { + return get_object()[key]; +} + +simdjson_really_inline void value::log_value(const char *type) const noexcept { + logger::log_value(doc->iter, type); +} +simdjson_really_inline void value::log_error(const char *message) const noexcept { + logger::log_error(doc->iter, message); +} + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::value &&value +) noexcept : + internal::simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::value &&value, + error_code error +) noexcept : + internal::simdjson_result_base( + std::forward(value), + error + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_IMPLEMENTATION::ondemand::document *doc, + error_code error +) noexcept : + internal::simdjson_result_base({ doc, nullptr }, error) +{ +} + +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result::begin() noexcept { + if (error()) { SIMDJSON_IMPLEMENTATION::ondemand::array::begin(first.doc, error()); } + return first.begin(); +} +simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result::end() noexcept { + if (error()) { return {}; } + return first.end(); +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return { first.doc, error() }; } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return { first.doc, error() }; } + return first[key]; +} + +simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return { first.doc, error() }; } + return first.get_array(); +} +simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return { first.doc, error() }; } + return first.get_object(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_really_inline bool simdjson_result::is_null() noexcept { + if (error()) { return false; } + return first.is_null(); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +} // namespace simdjson diff --git a/src/generic/ondemand/value.h b/src/generic/ondemand/value.h new file mode 100644 index 00000000..19753135 --- /dev/null +++ b/src/generic/ondemand/value.h @@ -0,0 +1,127 @@ +#include "simdjson/error.h" + +namespace { +namespace SIMDJSON_IMPLEMENTATION { +namespace ondemand { + +class array; +class document; +class field; +class object; +class raw_json_string; + +/** + * An ephemeral JSON value returned during iteration. + * + * This object must be destroyed before any other iteration occurs. + */ +class value { +public: + simdjson_really_inline value() noexcept; + simdjson_really_inline value(value &&other) noexcept; + simdjson_really_inline value &operator=(value && other) noexcept; + simdjson_really_inline value(const value &) noexcept = delete; + simdjson_really_inline value &operator=(const value &) noexcept = delete; + + // Uses RAII to ensure we skip the value if it is unused. + // TODO assert if two values are ever alive at the same time, to ensure they get destroyed + simdjson_really_inline ~value() noexcept; + simdjson_really_inline void skip() noexcept; + simdjson_really_inline simdjson_result get_array() noexcept; + simdjson_really_inline simdjson_result get_object() noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline bool is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator array() noexcept(false); + simdjson_really_inline operator object() noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); +#endif + + simdjson_really_inline array begin() noexcept; + simdjson_really_inline array end() noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) noexcept; + +protected: + /** + * Create a value. + * + * Use value::read() instead of this. + */ + simdjson_really_inline value(document *doc, const uint8_t *json) noexcept; + + /** + * Read a value. + * + * If the value is an array or object, only the opening brace will be consumed. + * + * @param doc The document containing the value. Iterator must be at the value start position. + */ + static simdjson_really_inline value start(document *doc) noexcept; + + simdjson_really_inline void log_value(const char *type) const noexcept; + simdjson_really_inline void log_error(const char *message) const noexcept; + + document *doc{}; // For the string buffer (if we need it) + const uint8_t *json{}; // The JSON text of the value + + friend class document; + friend class array; + friend class field; + friend struct simdjson_result; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace { + +namespace simdjson { + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::value &&value, error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document *doc, error_code error) noexcept; ///< @private + + simdjson_really_inline simdjson_result get_array() noexcept; + simdjson_really_inline simdjson_result get_object() noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline bool is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false); + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); +#endif + + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array begin() noexcept; + simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array end() noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) noexcept; +}; + +} // namespace simdjson diff --git a/src/generic/stage2/logger.h b/src/generic/stage2/logger.h index 5ceebfc1..d09402a5 100644 --- a/src/generic/stage2/logger.h +++ b/src/generic/stage2/logger.h @@ -6,7 +6,11 @@ namespace logger { static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else static constexpr const bool LOG_ENABLED = false; +#endif static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_SMALL_BUFFER_LEN = 10; @@ -33,7 +37,13 @@ namespace logger { } } - // Logs a single line of + static simdjson_really_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser template static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { if (LOG_ENABLED) { diff --git a/src/haswell/dom_parser_implementation.cpp b/src/haswell/dom_parser_implementation.cpp index b2104967..60627a95 100644 --- a/src/haswell/dom_parser_implementation.cpp +++ b/src/haswell/dom_parser_implementation.cpp @@ -161,4 +161,6 @@ SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t * } // namespace SIMDJSON_IMPLEMENTATION } // unnamed namespace +#include "generic/ondemand.h" + #include "haswell/end_implementation.h" diff --git a/tests/allparserscheckfile.cpp b/tests/allparserscheckfile.cpp index 959268e6..9eda379f 100644 --- a/tests/allparserscheckfile.cpp +++ b/tests/allparserscheckfile.cpp @@ -29,7 +29,7 @@ extern "C" { SIMDJSON_POP_DISABLE_WARNINGS // fastjson has a tricky interface -void on_json_error(void *, UNUSED const fastjson::ErrorContext &ec) { +void on_json_error(void *, SIMDJSON_UNUSED const fastjson::ErrorContext &ec) { // std::cerr<<"ERROR: "<