diff --git a/include/simdjson/generic/ondemand/json_iterator-inl.h b/include/simdjson/generic/ondemand/json_iterator-inl.h index 6e5d66ca..cf28e4ed 100644 --- a/include/simdjson/generic/ondemand/json_iterator-inl.h +++ b/include/simdjson/generic/ondemand/json_iterator-inl.h @@ -20,12 +20,13 @@ simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&o } simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, _parser->dom_parser->structural_indexes.get()), + : token(buf, _parser->implementation->structural_indexes.get()), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, - _depth{1} + _depth{0} { // Release the string buf so it can be reused by the next document + descend_to(1); logger::log_headers(); } @@ -69,7 +70,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child } // Now that we've considered the first value, we only increment/decrement for arrays/objects - auto end = &parser->dom_parser->structural_indexes[parser->dom_parser->n_structural_indexes]; + auto end = &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; while (token.index <= end) { switch (*advance()) { case '[': case '{': @@ -102,19 +103,19 @@ simdjson_really_inline bool json_iterator::at_root() const noexcept { } simdjson_really_inline token_position json_iterator::root_checkpoint() const noexcept { - return parser->dom_parser->structural_indexes.get(); + return parser->implementation->structural_indexes.get(); } simdjson_really_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); // Visual Studio Clang treats unique_ptr.get() as "side effecting." #ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( token.index == parser->dom_parser->structural_indexes.get() ); + SIMDJSON_ASSUME( token.index == parser->implementation->structural_indexes.get() ); #endif } simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return token.index == &parser->dom_parser->structural_indexes[parser->dom_parser->n_structural_indexes]; + return token.index == &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } simdjson_really_inline bool json_iterator::is_alive() const noexcept { @@ -153,9 +154,13 @@ simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexc } simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + descend_to(child_depth, token.index); +} +simdjson_really_inline void json_iterator::descend_to(depth_t child_depth, token_position start_position) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; + parser->start_positions[_depth] = start_position; } simdjson_really_inline depth_t json_iterator::depth() const noexcept { @@ -179,6 +184,9 @@ simdjson_really_inline token_position json_iterator::position() const noexcept { simdjson_really_inline void json_iterator::set_position(token_position target_checkpoint) noexcept { token.set_position(target_checkpoint); } +simdjson_really_inline token_position json_iterator::nested_start_position(depth_t depth) const noexcept { + return parser->start_positions[depth]; +} simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { diff --git a/include/simdjson/generic/ondemand/json_iterator.h b/include/simdjson/generic/ondemand/json_iterator.h index c28c1491..ceeb0b3b 100644 --- a/include/simdjson/generic/ondemand/json_iterator.h +++ b/include/simdjson/generic/ondemand/json_iterator.h @@ -148,6 +148,7 @@ public: * @param child_depth the expected child depth. */ simdjson_really_inline void descend_to(depth_t parent_depth) noexcept; + simdjson_really_inline void descend_to(depth_t parent_depth, token_position start_position) noexcept; /** * Get current depth. @@ -180,6 +181,7 @@ public: simdjson_really_inline token_position position() const noexcept; simdjson_really_inline void set_position(token_position target_checkpoint) noexcept; + simdjson_really_inline token_position nested_start_position(depth_t depth) const noexcept; protected: simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; diff --git a/include/simdjson/generic/ondemand/parser-inl.h b/include/simdjson/generic/ondemand/parser-inl.h index 1ffc6209..369f148d 100644 --- a/include/simdjson/generic/ondemand/parser-inl.h +++ b/include/simdjson/generic/ondemand/parser-inl.h @@ -3,32 +3,29 @@ namespace SIMDJSON_IMPLEMENTATION { namespace ondemand { simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (string_buf && new_capacity == _capacity && new_max_depth == _max_depth) { return SUCCESS; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } // string_capacity copied from document::allocate - _capacity = 0; - _max_depth = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); - if (dom_parser) { - SIMDJSON_TRY( dom_parser->set_capacity(new_capacity) ); - SIMDJSON_TRY( dom_parser->set_max_depth(new_max_depth) ); + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { - SIMDJSON_TRY( simdjson::active_implementation->create_dom_parser_implementation(new_capacity, new_max_depth, dom_parser) ); + SIMDJSON_TRY( simdjson::active_implementation->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } - _capacity = new_capacity; - _max_depth = new_max_depth; return SUCCESS; } simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const padded_string &buf) & noexcept { // Allocate if needed - if (_capacity < buf.size() || !string_buf) { - SIMDJSON_TRY( allocate(buf.size(), _max_depth) ); + if (capacity() < buf.size() || !string_buf) { + SIMDJSON_TRY( allocate(buf.size(), max_depth()) ); } // Run stage 1. - SIMDJSON_TRY( dom_parser->stage1(reinterpret_cast(buf.data()), buf.size(), false) ); + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(buf.data()), buf.size(), false) ); return document::start({ reinterpret_cast(buf.data()), this }); } @@ -41,15 +38,23 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parser::it simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(const padded_string &buf) & noexcept { // Allocate if needed - if (_capacity < buf.size()) { - SIMDJSON_TRY( allocate(buf.size(), _max_depth) ); + if (capacity() < buf.size()) { + SIMDJSON_TRY( allocate(buf.size(), max_depth()) ); } // Run stage 1. - SIMDJSON_TRY( dom_parser->stage1(reinterpret_cast(buf.data()), buf.size(), false) ); + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(buf.data()), buf.size(), false) ); return json_iterator(reinterpret_cast(buf.data()), this); } +simdjson_really_inline size_t parser::capacity() const noexcept { + return implementation ? implementation->capacity() : 0; +} +simdjson_really_inline size_t parser::max_depth() const noexcept { + return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +} + + } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/parser.h b/include/simdjson/generic/ondemand/parser.h index 291d5141..41bb107e 100644 --- a/include/simdjson/generic/ondemand/parser.h +++ b/include/simdjson/generic/ondemand/parser.h @@ -103,12 +103,14 @@ public: */ simdjson_warn_unused simdjson_result iterate_raw(const padded_string &json) & noexcept; + simdjson_really_inline size_t capacity() const noexcept; + simdjson_really_inline size_t max_depth() const noexcept; + private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr dom_parser{}; - size_t _capacity{0}; - size_t _max_depth{0}; + std::unique_ptr implementation{}; std::unique_ptr string_buf{}; + std::unique_ptr start_positions{}; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length diff --git a/include/simdjson/generic/ondemand/value_iterator-inl.h b/include/simdjson/generic/ondemand/value_iterator-inl.h index d16819b1..c61e10f5 100644 --- a/include/simdjson/generic/ondemand/value_iterator-inl.h +++ b/include/simdjson/generic/ondemand/value_iterator-inl.h @@ -24,7 +24,7 @@ simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object( _json_iter->ascend_to(depth()-1); return false; } - _json_iter->descend_to(depth()+1); + _json_iter->descend_to(depth()+1); //, _start_position+3); // skip {"key": logger::log_start_value(*_json_iter, "object"); return true; } @@ -38,7 +38,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator _json_iter->ascend_to(depth()-1); return false; case ',': - _json_iter->descend_to(depth()+1); + _json_iter->descend_to(depth()+1); //, _json_iter->token.index+2); // index+2 skips "key": return true; default: return _json_iter->report_error(TAPE_ERROR, "Missing comma between object fields"); @@ -94,6 +94,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // ``` // } else { + // if (_json_iter->nested_start_position(depth()) != _start_position+1) { return OUT_OF_ORDER_ITERATION; } if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } @@ -168,6 +169,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // ``` // } else { + // if (_json_iter->nested_start_position(depth()) != _start_position) { return OUT_OF_ORDER_ITERATION; } // Finish the previous value and see if , or } is next if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } @@ -217,7 +219,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) _json_iter->set_position(_start_position + 1); - _json_iter->descend_to(_depth); + _json_iter->descend_to(depth()); // , _start_position); has_value = started_object(); while (_json_iter->position() < search_start) { diff --git a/tests/ondemand/ondemand_object_tests.cpp b/tests/ondemand/ondemand_object_tests.cpp index bbcf5e91..e844413c 100644 --- a/tests/ondemand/ondemand_object_tests.cpp +++ b/tests/ondemand/ondemand_object_tests.cpp @@ -508,8 +508,11 @@ namespace object_tests { SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { ondemand::value object; ASSERT_SUCCESS( doc_result["outer"].get(object) ); - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); + uint64_t v; + ASSERT_SUCCESS( object["a"].get(v) ); + ASSERT_EQUAL( v, 1 ); + ASSERT_SUCCESS( object["b"].get(v) ); + ASSERT_EQUAL( v, 2 ); ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 );