From bcab8d3abfc032bf3c49c3d018f46a0584ba2998 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Sun, 21 Feb 2021 13:23:44 -0800 Subject: [PATCH] Check for end object/array at top level This avoids a very unlikely buffer overrun that can occur in a particular kind of invalid JSON: - the document is invalid with an unclosed top level array or object - the last thing in the document is a number that ends at EOF - the padding is filled entirely with numeric digits --- include/simdjson/generic/ondemand/array-inl.h | 5 +++++ include/simdjson/generic/ondemand/array.h | 10 ++++++++++ include/simdjson/generic/ondemand/document-inl.h | 6 ++++-- .../simdjson/generic/ondemand/json_iterator-inl.h | 8 ++++++++ include/simdjson/generic/ondemand/json_iterator.h | 10 ++++++++++ include/simdjson/generic/ondemand/object-inl.h | 5 +++++ include/simdjson/generic/ondemand/object.h | 1 + .../simdjson/generic/ondemand/value_iterator-inl.h | 14 ++++++++++++++ include/simdjson/generic/ondemand/value_iterator.h | 14 +++++++++++--- tests/ondemand/ondemand_array_error_tests.cpp | 14 ++++++-------- tests/ondemand/ondemand_object_error_tests.cpp | 10 +++++----- 11 files changed, 79 insertions(+), 18 deletions(-) diff --git a/include/simdjson/generic/ondemand/array-inl.h b/include/simdjson/generic/ondemand/array-inl.h index 1018e9ac..048ab356 100644 --- a/include/simdjson/generic/ondemand/array-inl.h +++ b/include/simdjson/generic/ondemand/array-inl.h @@ -50,6 +50,11 @@ simdjson_really_inline simdjson_result array::start(value_iterator &iter) SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } +simdjson_really_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} simdjson_really_inline array array::started(value_iterator &iter) noexcept { simdjson_unused bool has_value = iter.started_array(); return array(iter); diff --git a/include/simdjson/generic/ondemand/array.h b/include/simdjson/generic/ondemand/array.h index 867d93af..656c321e 100644 --- a/include/simdjson/generic/ondemand/array.h +++ b/include/simdjson/generic/ondemand/array.h @@ -41,6 +41,15 @@ protected: * @error INCORRECT_TYPE if the iterator is not at [. */ static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * @@ -68,6 +77,7 @@ protected: value_iterator iter{}; friend class value; + friend class document; friend struct simdjson_result; friend struct simdjson_result; friend class array_iterator; diff --git a/include/simdjson/generic/ondemand/document-inl.h b/include/simdjson/generic/ondemand/document-inl.h index 8053e443..f532fdf9 100644 --- a/include/simdjson/generic/ondemand/document-inl.h +++ b/include/simdjson/generic/ondemand/document-inl.h @@ -26,10 +26,12 @@ simdjson_really_inline value document::get_root_value() noexcept { } simdjson_really_inline simdjson_result document::get_array() & noexcept { - return get_root_value().get_array(); + auto value = get_root_value_iterator(); + return array::start_root(value); } simdjson_really_inline simdjson_result document::get_object() & noexcept { - return get_root_value().get_object(); + auto value = get_root_value_iterator(); + return object::start_root(value); } simdjson_really_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(); diff --git a/include/simdjson/generic/ondemand/json_iterator-inl.h b/include/simdjson/generic/ondemand/json_iterator-inl.h index 04263893..86ed9c09 100644 --- a/include/simdjson/generic/ondemand/json_iterator-inl.h +++ b/include/simdjson/generic/ondemand/json_iterator-inl.h @@ -145,6 +145,14 @@ simdjson_really_inline uint32_t json_iterator::peek_length(token_position positi return token.peek_length(position); } +simdjson_really_inline token_position json_iterator::last_document_position() const noexcept { + SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + return &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes - 1]; +} +simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_document_position()); +} + simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); diff --git a/include/simdjson/generic/ondemand/json_iterator.h b/include/simdjson/generic/ondemand/json_iterator.h index d423217c..05626440 100644 --- a/include/simdjson/generic/ondemand/json_iterator.h +++ b/include/simdjson/generic/ondemand/json_iterator.h @@ -130,6 +130,15 @@ public: * @param index The position of the token to retrieve. */ simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_really_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. @@ -188,6 +197,7 @@ public: protected: simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + simdjson_really_inline token_position last_document_position() const noexcept; friend class document; friend class object; diff --git a/include/simdjson/generic/ondemand/object-inl.h b/include/simdjson/generic/ondemand/object-inl.h index b1d8620c..0789b1b6 100644 --- a/include/simdjson/generic/ondemand/object-inl.h +++ b/include/simdjson/generic/ondemand/object-inl.h @@ -38,6 +38,11 @@ simdjson_really_inline simdjson_result object::start(value_iterator &ite SIMDJSON_TRY( iter.start_object().get(has_value) ); return object(iter); } +simdjson_really_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_object().get(has_value) ); + return object(iter); +} simdjson_really_inline object object::started(value_iterator &iter) noexcept { simdjson_unused bool has_value = iter.started_object(); return iter; diff --git a/include/simdjson/generic/ondemand/object.h b/include/simdjson/generic/ondemand/object.h index 23e49090..1233bff5 100644 --- a/include/simdjson/generic/ondemand/object.h +++ b/include/simdjson/generic/ondemand/object.h @@ -72,6 +72,7 @@ public: protected: static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_really_inline object started(value_iterator &iter) noexcept; static simdjson_really_inline object resume(const value_iterator &iter) noexcept; simdjson_really_inline object(const value_iterator &iter) noexcept; diff --git a/include/simdjson/generic/ondemand/value_iterator-inl.h b/include/simdjson/generic/ondemand/value_iterator-inl.h index 8c7dcc2f..5b84e142 100644 --- a/include/simdjson/generic/ondemand/value_iterator-inl.h +++ b/include/simdjson/generic/ondemand/value_iterator-inl.h @@ -16,6 +16,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator return started_object(); } +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { + bool result; + SIMDJSON_TRY( start_object().get(result) ); + if (*_json_iter->peek_last() != '}') { return _json_iter->report_error(TAPE_ERROR, "object invalid: { at beginning of document unmatched by } at end of document"); } + return result; +} + simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object() noexcept { assert_at_container_start(); if (*_json_iter->peek() == '}') { @@ -278,6 +285,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator return started_array(); } +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { + bool result; + SIMDJSON_TRY( start_array().get(result) ); + if (*_json_iter->peek_last() != ']') { return _json_iter->report_error(TAPE_ERROR, "array invalid: [ at beginning of document unmatched by ] at end of document"); } + return result; +} + simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { diff --git a/include/simdjson/generic/ondemand/value_iterator.h b/include/simdjson/generic/ondemand/value_iterator.h index 0e49a65e..1bb0e602 100644 --- a/include/simdjson/generic/ondemand/value_iterator.h +++ b/include/simdjson/generic/ondemand/value_iterator.h @@ -97,12 +97,13 @@ public: */ simdjson_warn_unused simdjson_really_inline simdjson_result start_object() noexcept; /** - * Check for an opening { and start an object iteration. + * Start an object iteration from the root. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document */ - simdjson_warn_unused simdjson_really_inline simdjson_result try_start_object() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result start_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. @@ -203,11 +204,18 @@ public: /** * Check for an opening [ and start an array iteration. * - * @param json A pointer to the potential [. * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ simdjson_warn_unused simdjson_really_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_really_inline simdjson_result start_root_array() noexcept; /** * Start an array iteration after the user has already checked and moved past the [. diff --git a/tests/ondemand/ondemand_array_error_tests.cpp b/tests/ondemand/ondemand_array_error_tests.cpp index 5bb0970e..12a5b64f 100644 --- a/tests/ondemand/ondemand_array_error_tests.cpp +++ b/tests/ondemand/ondemand_array_error_tests.cpp @@ -68,14 +68,12 @@ namespace array_error_tests { } bool top_level_array_iterate_unclosed_error() { TEST_START(); - ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); - // TODO These pass the user values that may run past the end of the buffer if they aren't careful - // In particular, if the padding is decorated with the wrong values, we could cause overrun! - ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { INCORRECT_TYPE, INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { TAPE_ERROR })); TEST_SUCCEED(); } diff --git a/tests/ondemand/ondemand_object_error_tests.cpp b/tests/ondemand/ondemand_object_error_tests.cpp index 10cad3da..12062036 100644 --- a/tests/ondemand/ondemand_object_error_tests.cpp +++ b/tests/ondemand/ondemand_object_error_tests.cpp @@ -61,13 +61,13 @@ namespace object_error_tests { } bool object_iterate_unclosed_error() { TEST_START(); - ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); // TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful. // In particular, if the padding is decorated with the wrong values, we could cause overrun! - ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); TEST_SUCCEED(); }