Check for end object/array at top level

This avoids a very unlikely buffer overrun that can occur in a particular kind of invalid JSON:
- the document is invalid with an unclosed top level array or object
- the last thing in the document is a number that ends at EOF
- the padding is filled entirely with numeric digits
This commit is contained in:
John Keiser 2021-02-21 13:23:44 -08:00 committed by John Keiser
parent 9d747642fe
commit bcab8d3abf
11 changed files with 79 additions and 18 deletions

View File

@ -50,6 +50,11 @@ simdjson_really_inline simdjson_result<array> array::start(value_iterator &iter)
SIMDJSON_TRY( iter.start_array().get(has_value) ); SIMDJSON_TRY( iter.start_array().get(has_value) );
return array(iter); return array(iter);
} }
simdjson_really_inline simdjson_result<array> array::start_root(value_iterator &iter) noexcept {
simdjson_unused bool has_value;
SIMDJSON_TRY( iter.start_root_array().get(has_value) );
return array(iter);
}
simdjson_really_inline array array::started(value_iterator &iter) noexcept { simdjson_really_inline array array::started(value_iterator &iter) noexcept {
simdjson_unused bool has_value = iter.started_array(); simdjson_unused bool has_value = iter.started_array();
return array(iter); return array(iter);

View File

@ -41,6 +41,15 @@ protected:
* @error INCORRECT_TYPE if the iterator is not at [. * @error INCORRECT_TYPE if the iterator is not at [.
*/ */
static simdjson_really_inline simdjson_result<array> start(value_iterator &iter) noexcept; static simdjson_really_inline simdjson_result<array> start(value_iterator &iter) noexcept;
/**
* Begin array iteration from the root.
*
* @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the
* resulting array.
* @error INCORRECT_TYPE if the iterator is not at [.
* @error TAPE_ERROR if there is no closing ] at the end of the document.
*/
static simdjson_really_inline simdjson_result<array> start_root(value_iterator &iter) noexcept;
/** /**
* Begin array iteration. * Begin array iteration.
* *
@ -68,6 +77,7 @@ protected:
value_iterator iter{}; value_iterator iter{};
friend class value; friend class value;
friend class document;
friend struct simdjson_result<value>; friend struct simdjson_result<value>;
friend struct simdjson_result<array>; friend struct simdjson_result<array>;
friend class array_iterator; friend class array_iterator;

View File

@ -26,10 +26,12 @@ simdjson_really_inline value document::get_root_value() noexcept {
} }
simdjson_really_inline simdjson_result<array> document::get_array() & noexcept { simdjson_really_inline simdjson_result<array> document::get_array() & noexcept {
return get_root_value().get_array(); auto value = get_root_value_iterator();
return array::start_root(value);
} }
simdjson_really_inline simdjson_result<object> document::get_object() & noexcept { simdjson_really_inline simdjson_result<object> document::get_object() & noexcept {
return get_root_value().get_object(); auto value = get_root_value_iterator();
return object::start_root(value);
} }
simdjson_really_inline simdjson_result<uint64_t> document::get_uint64() noexcept { simdjson_really_inline simdjson_result<uint64_t> document::get_uint64() noexcept {
return get_root_value_iterator().get_root_uint64(); return get_root_value_iterator().get_root_uint64();

View File

@ -145,6 +145,14 @@ simdjson_really_inline uint32_t json_iterator::peek_length(token_position positi
return token.peek_length(position); return token.peek_length(position);
} }
simdjson_really_inline token_position json_iterator::last_document_position() const noexcept {
SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0);
return &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes - 1];
}
simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept {
return token.peek(last_document_position());
}
simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept {
SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1);
SIMDJSON_ASSUME(_depth == parent_depth + 1); SIMDJSON_ASSUME(_depth == parent_depth + 1);

View File

@ -130,6 +130,15 @@ public:
* @param index The position of the token to retrieve. * @param index The position of the token to retrieve.
*/ */
simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; simdjson_really_inline uint32_t peek_length(token_position position) const noexcept;
/**
* Get the JSON text for the last token in the document.
*
* This is not null-terminated; it is a view into the JSON.
*
* TODO consider a string_view, assuming the length will get stripped out by the optimizer when
* it isn't used ...
*/
simdjson_really_inline const uint8_t *peek_last() const noexcept;
/** /**
* Ascend one level. * Ascend one level.
@ -188,6 +197,7 @@ public:
protected: protected:
simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept;
simdjson_really_inline token_position last_document_position() const noexcept;
friend class document; friend class document;
friend class object; friend class object;

View File

@ -38,6 +38,11 @@ simdjson_really_inline simdjson_result<object> object::start(value_iterator &ite
SIMDJSON_TRY( iter.start_object().get(has_value) ); SIMDJSON_TRY( iter.start_object().get(has_value) );
return object(iter); return object(iter);
} }
simdjson_really_inline simdjson_result<object> object::start_root(value_iterator &iter) noexcept {
simdjson_unused bool has_value;
SIMDJSON_TRY( iter.start_root_object().get(has_value) );
return object(iter);
}
simdjson_really_inline object object::started(value_iterator &iter) noexcept { simdjson_really_inline object object::started(value_iterator &iter) noexcept {
simdjson_unused bool has_value = iter.started_object(); simdjson_unused bool has_value = iter.started_object();
return iter; return iter;

View File

@ -72,6 +72,7 @@ public:
protected: protected:
static simdjson_really_inline simdjson_result<object> start(value_iterator &iter) noexcept; static simdjson_really_inline simdjson_result<object> start(value_iterator &iter) noexcept;
static simdjson_really_inline simdjson_result<object> start_root(value_iterator &iter) noexcept;
static simdjson_really_inline object started(value_iterator &iter) noexcept; static simdjson_really_inline object started(value_iterator &iter) noexcept;
static simdjson_really_inline object resume(const value_iterator &iter) noexcept; static simdjson_really_inline object resume(const value_iterator &iter) noexcept;
simdjson_really_inline object(const value_iterator &iter) noexcept; simdjson_really_inline object(const value_iterator &iter) noexcept;

View File

@ -16,6 +16,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
return started_object(); return started_object();
} }
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator::start_root_object() noexcept {
bool result;
SIMDJSON_TRY( start_object().get(result) );
if (*_json_iter->peek_last() != '}') { return _json_iter->report_error(TAPE_ERROR, "object invalid: { at beginning of document unmatched by } at end of document"); }
return result;
}
simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object() noexcept { simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object() noexcept {
assert_at_container_start(); assert_at_container_start();
if (*_json_iter->peek() == '}') { if (*_json_iter->peek() == '}') {
@ -278,6 +285,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
return started_array(); return started_array();
} }
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator::start_root_array() noexcept {
bool result;
SIMDJSON_TRY( start_array().get(result) );
if (*_json_iter->peek_last() != ']') { return _json_iter->report_error(TAPE_ERROR, "array invalid: [ at beginning of document unmatched by ] at end of document"); }
return result;
}
simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() noexcept { simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() noexcept {
assert_at_container_start(); assert_at_container_start();
if (*_json_iter->peek() == ']') { if (*_json_iter->peek() == ']') {

View File

@ -97,12 +97,13 @@ public:
*/ */
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_object() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_object() noexcept;
/** /**
* Check for an opening { and start an object iteration. * Start an object iteration from the root.
* *
* @returns Whether the object had any fields (returns false for empty). * @returns Whether the object had any fields (returns false for empty).
* @error INCORRECT_TYPE if there is no opening { * @error INCORRECT_TYPE if there is no opening {
* @error TAPE_ERROR if there is no matching } at end of document
*/ */
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> try_start_object() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_root_object() noexcept;
/** /**
* Start an object iteration after the user has already checked and moved past the {. * Start an object iteration after the user has already checked and moved past the {.
@ -203,11 +204,18 @@ public:
/** /**
* Check for an opening [ and start an array iteration. * Check for an opening [ and start an array iteration.
* *
* @param json A pointer to the potential [.
* @returns Whether the array had any elements (returns false for empty). * @returns Whether the array had any elements (returns false for empty).
* @error INCORRECT_TYPE If there is no [. * @error INCORRECT_TYPE If there is no [.
*/ */
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_array() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_array() noexcept;
/**
* Check for an opening [ and start an array iteration while at the root.
*
* @returns Whether the array had any elements (returns false for empty).
* @error INCORRECT_TYPE If there is no [.
* @error TAPE_ERROR if there is no matching ] at end of document
*/
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> start_root_array() noexcept;
/** /**
* Start an array iteration after the user has already checked and moved past the [. * Start an array iteration after the user has already checked and moved past the [.

View File

@ -68,14 +68,12 @@ namespace array_error_tests {
} }
bool top_level_array_iterate_unclosed_error() { bool top_level_array_iterate_unclosed_error() {
TEST_START(); TEST_START();
ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { TAPE_ERROR }));
// TODO These pass the user values that may run past the end of the buffer if they aren't careful ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { TAPE_ERROR }));
// In particular, if the padding is decorated with the wrong values, we could cause overrun! ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { INCORRECT_TYPE, INCORRECT_TYPE, TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR }));
TEST_SUCCEED(); TEST_SUCCEED();
} }

View File

@ -61,13 +61,13 @@ namespace object_error_tests {
} }
bool object_iterate_unclosed_error() { bool object_iterate_unclosed_error() {
TEST_START(); TEST_START();
ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
// TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful. // TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful.
// In particular, if the padding is decorated with the wrong values, we could cause overrun! // In particular, if the padding is decorated with the wrong values, we could cause overrun!
ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { INCORRECT_TYPE, TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR }));
TEST_SUCCEED(); TEST_SUCCEED();
} }