Check for end object/array at top level
This avoids a very unlikely buffer overrun that can occur in a particular kind of invalid JSON: - the document is invalid with an unclosed top level array or object - the last thing in the document is a number that ends at EOF - the padding is filled entirely with numeric digits
This commit is contained in:
parent
9d747642fe
commit
bcab8d3abf
|
@ -50,6 +50,11 @@ simdjson_really_inline simdjson_result<array> array::start(value_iterator &iter)
|
|||
SIMDJSON_TRY( iter.start_array().get(has_value) );
|
||||
return array(iter);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<array> array::start_root(value_iterator &iter) noexcept {
|
||||
simdjson_unused bool has_value;
|
||||
SIMDJSON_TRY( iter.start_root_array().get(has_value) );
|
||||
return array(iter);
|
||||
}
|
||||
simdjson_really_inline array array::started(value_iterator &iter) noexcept {
|
||||
simdjson_unused bool has_value = iter.started_array();
|
||||
return array(iter);
|
||||
|
|
|
@ -41,6 +41,15 @@ protected:
|
|||
* @error INCORRECT_TYPE if the iterator is not at [.
|
||||
*/
|
||||
static simdjson_really_inline simdjson_result<array> start(value_iterator &iter) noexcept;
|
||||
/**
|
||||
* Begin array iteration from the root.
|
||||
*
|
||||
* @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the
|
||||
* resulting array.
|
||||
* @error INCORRECT_TYPE if the iterator is not at [.
|
||||
* @error TAPE_ERROR if there is no closing ] at the end of the document.
|
||||
*/
|
||||
static simdjson_really_inline simdjson_result<array> start_root(value_iterator &iter) noexcept;
|
||||
/**
|
||||
* Begin array iteration.
|
||||
*
|
||||
|
@ -68,6 +77,7 @@ protected:
|
|||
value_iterator iter{};
|
||||
|
||||
friend class value;
|
||||
friend class document;
|
||||
friend struct simdjson_result<value>;
|
||||
friend struct simdjson_result<array>;
|
||||
friend class array_iterator;
|
||||
|
|
|
@ -26,10 +26,12 @@ simdjson_really_inline value document::get_root_value() noexcept {
|
|||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<array> document::get_array() & noexcept {
|
||||
return get_root_value().get_array();
|
||||
auto value = get_root_value_iterator();
|
||||
return array::start_root(value);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<object> document::get_object() & noexcept {
|
||||
return get_root_value().get_object();
|
||||
auto value = get_root_value_iterator();
|
||||
return object::start_root(value);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<uint64_t> document::get_uint64() noexcept {
|
||||
return get_root_value_iterator().get_root_uint64();
|
||||
|
|
|
@ -145,6 +145,14 @@ simdjson_really_inline uint32_t json_iterator::peek_length(token_position positi
|
|||
return token.peek_length(position);
|
||||
}
|
||||
|
||||
simdjson_really_inline token_position json_iterator::last_document_position() const noexcept {
|
||||
SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0);
|
||||
return &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes - 1];
|
||||
}
|
||||
simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept {
|
||||
return token.peek(last_document_position());
|
||||
}
|
||||
|
||||
simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept {
|
||||
SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1);
|
||||
SIMDJSON_ASSUME(_depth == parent_depth + 1);
|
||||
|
|
|
@ -130,6 +130,15 @@ public:
|
|||
* @param index The position of the token to retrieve.
|
||||
*/
|
||||
simdjson_really_inline uint32_t peek_length(token_position position) const noexcept;
|
||||
/**
|
||||
* Get the JSON text for the last token in the document.
|
||||
*
|
||||
* This is not null-terminated; it is a view into the JSON.
|
||||
*
|
||||
* TODO consider a string_view, assuming the length will get stripped out by the optimizer when
|
||||
* it isn't used ...
|
||||
*/
|
||||
simdjson_really_inline const uint8_t *peek_last() const noexcept;
|
||||
|
||||
/**
|
||||
* Ascend one level.
|
||||
|
@ -188,6 +197,7 @@ public:
|
|||
|
||||
protected:
|
||||
simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept;
|
||||
simdjson_really_inline token_position last_document_position() const noexcept;
|
||||
|
||||
friend class document;
|
||||
friend class object;
|
||||
|
|
|
@ -38,6 +38,11 @@ simdjson_really_inline simdjson_result<object> object::start(value_iterator &ite
|
|||
SIMDJSON_TRY( iter.start_object().get(has_value) );
|
||||
return object(iter);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<object> object::start_root(value_iterator &iter) noexcept {
|
||||
simdjson_unused bool has_value;
|
||||
SIMDJSON_TRY( iter.start_root_object().get(has_value) );
|
||||
return object(iter);
|
||||
}
|
||||
simdjson_really_inline object object::started(value_iterator &iter) noexcept {
|
||||
simdjson_unused bool has_value = iter.started_object();
|
||||
return iter;
|
||||
|
|
|
@ -72,6 +72,7 @@ public:
|
|||
|
||||
protected:
|
||||
static simdjson_really_inline simdjson_result<object> start(value_iterator &iter) noexcept;
|
||||
static simdjson_really_inline simdjson_result<object> start_root(value_iterator &iter) noexcept;
|
||||
static simdjson_really_inline object started(value_iterator &iter) noexcept;
|
||||
static simdjson_really_inline object resume(const value_iterator &iter) noexcept;
|
||||
simdjson_really_inline object(const value_iterator &iter) noexcept;
|
||||
|
|
|
@ -16,6 +16,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
|
|||
return started_object();
|
||||
}
|
||||
|
||||
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator::start_root_object() noexcept {
|
||||
bool result;
|
||||
SIMDJSON_TRY( start_object().get(result) );
|
||||
if (*_json_iter->peek_last() != '}') { return _json_iter->report_error(TAPE_ERROR, "object invalid: { at beginning of document unmatched by } at end of document"); }
|
||||
return result;
|
||||
}
|
||||
|
||||
simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object() noexcept {
|
||||
assert_at_container_start();
|
||||
if (*_json_iter->peek() == '}') {
|
||||
|
@ -278,6 +285,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
|
|||
return started_array();
|
||||
}
|
||||
|
||||
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator::start_root_array() noexcept {
|
||||
bool result;
|
||||
SIMDJSON_TRY( start_array().get(result) );
|
||||
if (*_json_iter->peek_last() != ']') { return _json_iter->report_error(TAPE_ERROR, "array invalid: [ at beginning of document unmatched by ] at end of document"); }
|
||||
return result;
|
||||
}
|
||||
|
||||
simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() noexcept {
|
||||
assert_at_container_start();
|
||||
if (*_json_iter->peek() == ']') {
|
||||
|
|
|
@ -97,12 +97,13 @@ public:
|
|||
*/
|
||||
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_object() noexcept;
|
||||
/**
|
||||
* Check for an opening { and start an object iteration.
|
||||
* Start an object iteration from the root.
|
||||
*
|
||||
* @returns Whether the object had any fields (returns false for empty).
|
||||
* @error INCORRECT_TYPE if there is no opening {
|
||||
* @error TAPE_ERROR if there is no matching } at end of document
|
||||
*/
|
||||
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> try_start_object() noexcept;
|
||||
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_root_object() noexcept;
|
||||
|
||||
/**
|
||||
* Start an object iteration after the user has already checked and moved past the {.
|
||||
|
@ -203,11 +204,18 @@ public:
|
|||
/**
|
||||
* Check for an opening [ and start an array iteration.
|
||||
*
|
||||
* @param json A pointer to the potential [.
|
||||
* @returns Whether the array had any elements (returns false for empty).
|
||||
* @error INCORRECT_TYPE If there is no [.
|
||||
*/
|
||||
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_array() noexcept;
|
||||
/**
|
||||
* Check for an opening [ and start an array iteration while at the root.
|
||||
*
|
||||
* @returns Whether the array had any elements (returns false for empty).
|
||||
* @error INCORRECT_TYPE If there is no [.
|
||||
* @error TAPE_ERROR if there is no matching ] at end of document
|
||||
*/
|
||||
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_root_array() noexcept;
|
||||
|
||||
/**
|
||||
* Start an array iteration after the user has already checked and moved past the [.
|
||||
|
|
|
@ -68,14 +68,12 @@ namespace array_error_tests {
|
|||
}
|
||||
bool top_level_array_iterate_unclosed_error() {
|
||||
TEST_START();
|
||||
ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR }));
|
||||
// TODO These pass the user values that may run past the end of the buffer if they aren't careful
|
||||
// In particular, if the padding is decorated with the wrong values, we could cause overrun!
|
||||
ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { INCORRECT_TYPE, INCORRECT_TYPE, TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { TAPE_ERROR }));
|
||||
TEST_SUCCEED();
|
||||
}
|
||||
|
||||
|
|
|
@ -61,13 +61,13 @@ namespace object_error_tests {
|
|||
}
|
||||
bool object_iterate_unclosed_error() {
|
||||
TEST_START();
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
|
||||
// TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful.
|
||||
// In particular, if the padding is decorated with the wrong values, we could cause overrun!
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { INCORRECT_TYPE, TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
|
||||
ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
|
||||
TEST_SUCCEED();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue