Store start index of each depth for safety

This commit is contained in:
John Keiser 2021-02-05 10:17:28 -08:00
parent b2de2dfd1b
commit 9934f65987
6 changed files with 51 additions and 29 deletions

View File

@ -20,12 +20,13 @@ simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&o
}
simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept
: token(buf, _parser->dom_parser->structural_indexes.get()),
: token(buf, _parser->implementation->structural_indexes.get()),
parser{_parser},
_string_buf_loc{parser->string_buf.get()},
_depth{1}
_depth{0}
{
// Release the string buf so it can be reused by the next document
descend_to(1);
logger::log_headers();
}
@ -69,7 +70,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child
}
// Now that we've considered the first value, we only increment/decrement for arrays/objects
auto end = &parser->dom_parser->structural_indexes[parser->dom_parser->n_structural_indexes];
auto end = &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes];
while (token.index <= end) {
switch (*advance()) {
case '[': case '{':
@ -102,19 +103,19 @@ simdjson_really_inline bool json_iterator::at_root() const noexcept {
}
simdjson_really_inline token_position json_iterator::root_checkpoint() const noexcept {
return parser->dom_parser->structural_indexes.get();
return parser->implementation->structural_indexes.get();
}
simdjson_really_inline void json_iterator::assert_at_root() const noexcept {
SIMDJSON_ASSUME( _depth == 1 );
// Visual Studio Clang treats unique_ptr.get() as "side effecting."
#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
SIMDJSON_ASSUME( token.index == parser->dom_parser->structural_indexes.get() );
SIMDJSON_ASSUME( token.index == parser->implementation->structural_indexes.get() );
#endif
}
simdjson_really_inline bool json_iterator::at_eof() const noexcept {
return token.index == &parser->dom_parser->structural_indexes[parser->dom_parser->n_structural_indexes];
return token.index == &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes];
}
simdjson_really_inline bool json_iterator::is_alive() const noexcept {
@ -153,9 +154,13 @@ simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexc
}
simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept {
descend_to(child_depth, token.index);
}
simdjson_really_inline void json_iterator::descend_to(depth_t child_depth, token_position start_position) noexcept {
SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
SIMDJSON_ASSUME(_depth == child_depth - 1);
_depth = child_depth;
parser->start_positions[_depth] = start_position;
}
simdjson_really_inline depth_t json_iterator::depth() const noexcept {
@ -179,6 +184,9 @@ simdjson_really_inline token_position json_iterator::position() const noexcept {
simdjson_really_inline void json_iterator::set_position(token_position target_checkpoint) noexcept {
token.set_position(target_checkpoint);
}
simdjson_really_inline token_position json_iterator::nested_start_position(depth_t depth) const noexcept {
return parser->start_positions[depth];
}
simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept {

View File

@ -148,6 +148,7 @@ public:
* @param child_depth the expected child depth.
*/
simdjson_really_inline void descend_to(depth_t parent_depth) noexcept;
simdjson_really_inline void descend_to(depth_t parent_depth, token_position start_position) noexcept;
/**
* Get current depth.
@ -180,6 +181,7 @@ public:
simdjson_really_inline token_position position() const noexcept;
simdjson_really_inline void set_position(token_position target_checkpoint) noexcept;
simdjson_really_inline token_position nested_start_position(depth_t depth) const noexcept;
protected:
simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept;

View File

@ -3,32 +3,29 @@ namespace SIMDJSON_IMPLEMENTATION {
namespace ondemand {
simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept {
if (string_buf && new_capacity == _capacity && new_max_depth == _max_depth) { return SUCCESS; }
if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; }
// string_capacity copied from document::allocate
_capacity = 0;
_max_depth = 0;
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
if (dom_parser) {
SIMDJSON_TRY( dom_parser->set_capacity(new_capacity) );
SIMDJSON_TRY( dom_parser->set_max_depth(new_max_depth) );
start_positions.reset(new (std::nothrow) token_position[new_max_depth]);
if (implementation) {
SIMDJSON_TRY( implementation->set_capacity(new_capacity) );
SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) );
} else {
SIMDJSON_TRY( simdjson::active_implementation->create_dom_parser_implementation(new_capacity, new_max_depth, dom_parser) );
SIMDJSON_TRY( simdjson::active_implementation->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) );
}
_capacity = new_capacity;
_max_depth = new_max_depth;
return SUCCESS;
}
simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::iterate(const padded_string &buf) & noexcept {
// Allocate if needed
if (_capacity < buf.size() || !string_buf) {
SIMDJSON_TRY( allocate(buf.size(), _max_depth) );
if (capacity() < buf.size() || !string_buf) {
SIMDJSON_TRY( allocate(buf.size(), max_depth()) );
}
// Run stage 1.
SIMDJSON_TRY( dom_parser->stage1(reinterpret_cast<const uint8_t *>(buf.data()), buf.size(), false) );
SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(buf.data()), buf.size(), false) );
return document::start({ reinterpret_cast<const uint8_t *>(buf.data()), this });
}
@ -41,15 +38,23 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::it
simdjson_warn_unused simdjson_really_inline simdjson_result<json_iterator> parser::iterate_raw(const padded_string &buf) & noexcept {
// Allocate if needed
if (_capacity < buf.size()) {
SIMDJSON_TRY( allocate(buf.size(), _max_depth) );
if (capacity() < buf.size()) {
SIMDJSON_TRY( allocate(buf.size(), max_depth()) );
}
// Run stage 1.
SIMDJSON_TRY( dom_parser->stage1(reinterpret_cast<const uint8_t *>(buf.data()), buf.size(), false) );
SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(buf.data()), buf.size(), false) );
return json_iterator(reinterpret_cast<const uint8_t *>(buf.data()), this);
}
simdjson_really_inline size_t parser::capacity() const noexcept {
return implementation ? implementation->capacity() : 0;
}
simdjson_really_inline size_t parser::max_depth() const noexcept {
return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson

View File

@ -103,12 +103,14 @@ public:
*/
simdjson_warn_unused simdjson_result<json_iterator> iterate_raw(const padded_string &json) & noexcept;
simdjson_really_inline size_t capacity() const noexcept;
simdjson_really_inline size_t max_depth() const noexcept;
private:
/** @private [for benchmarking access] The implementation to use */
std::unique_ptr<internal::dom_parser_implementation> dom_parser{};
size_t _capacity{0};
size_t _max_depth{0};
std::unique_ptr<internal::dom_parser_implementation> implementation{};
std::unique_ptr<uint8_t[]> string_buf{};
std::unique_ptr<token_position[]> start_positions{};
/**
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length

View File

@ -24,7 +24,7 @@ simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object(
_json_iter->ascend_to(depth()-1);
return false;
}
_json_iter->descend_to(depth()+1);
_json_iter->descend_to(depth()+1); //, _start_position+3); // skip {"key":
logger::log_start_value(*_json_iter, "object");
return true;
}
@ -38,7 +38,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
_json_iter->ascend_to(depth()-1);
return false;
case ',':
_json_iter->descend_to(depth()+1);
_json_iter->descend_to(depth()+1); //, _json_iter->token.index+2); // index+2 skips "key":
return true;
default:
return _json_iter->report_error(TAPE_ERROR, "Missing comma between object fields");
@ -94,6 +94,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
// ```
//
} else {
// if (_json_iter->nested_start_position(depth()) != _start_position+1) { return OUT_OF_ORDER_ITERATION; }
if ((error = skip_child() )) { abandon(); return error; }
if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
}
@ -168,6 +169,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
// ```
//
} else {
// if (_json_iter->nested_start_position(depth()) != _start_position) { return OUT_OF_ORDER_ITERATION; }
// Finish the previous value and see if , or } is next
if ((error = skip_child() )) { abandon(); return error; }
if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
@ -217,7 +219,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
// (We have already run through the object before, so we've already validated its structure. We
// don't check errors in this bit.)
_json_iter->set_position(_start_position + 1);
_json_iter->descend_to(_depth);
_json_iter->descend_to(depth()); // , _start_position);
has_value = started_object();
while (_json_iter->position() < search_start) {

View File

@ -508,8 +508,11 @@ namespace object_tests {
SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) {
ondemand::value object;
ASSERT_SUCCESS( doc_result["outer"].get(object) );
ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 );
ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 );
uint64_t v;
ASSERT_SUCCESS( object["a"].get(v) );
ASSERT_EQUAL( v, 1 );
ASSERT_SUCCESS( object["b"].get(v) );
ASSERT_EQUAL( v, 2 );
ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 );
ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 );