Merge pull request #1416 from simdjson/jkeiser/safe-iterators-2

Add safety checks for out of order array/object iteration+indexing
This commit is contained in:
John Keiser 2021-02-05 09:47:03 -08:00 committed by GitHub
commit b2de2dfd1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 186 additions and 29 deletions

View File

@ -35,6 +35,7 @@ enum error_code {
INVALID_URI_FRAGMENT, ///< Invalid URI fragment
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
PARSER_IN_USE, ///< parser is already in use.
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order
/** @private Number of error codes */
NUM_ERROR_CODES
};

View File

@ -55,11 +55,12 @@ simdjson_really_inline array array::started(value_iterator &iter) noexcept {
return array(iter);
}
simdjson_really_inline array_iterator array::begin() noexcept {
return iter;
simdjson_really_inline simdjson_result<array_iterator> array::begin() noexcept {
if (!iter.is_at_container_start()) { return OUT_OF_ORDER_ITERATION; }
return array_iterator(iter);
}
simdjson_really_inline array_iterator array::end() noexcept {
return {};
simdjson_really_inline simdjson_result<array_iterator> array::end() noexcept {
return array_iterator();
}
} // namespace ondemand

View File

@ -24,13 +24,13 @@ public:
*
* Part of the std::iterable interface.
*/
simdjson_really_inline array_iterator begin() noexcept;
simdjson_really_inline simdjson_result<array_iterator> begin() noexcept;
/**
* Sentinel representing the end of the array.
*
* Part of the std::iterable interface.
*/
simdjson_really_inline array_iterator end() noexcept;
simdjson_really_inline simdjson_result<array_iterator> end() noexcept;
protected:
/**

View File

@ -51,13 +51,12 @@ simdjson_really_inline object::object(const value_iterator &_iter) noexcept
{
}
simdjson_really_inline object_iterator object::begin() noexcept {
// Expanded version of SIMDJSON_ASSUME( iter.at_field_start() || !iter.is_open() )
SIMDJSON_ASSUME( (iter._json_iter->token.index == iter._start_position + 1) || (iter._json_iter->_depth < iter._depth) );
return iter;
simdjson_really_inline simdjson_result<object_iterator> object::begin() noexcept {
if (!iter.is_at_container_start()) { return OUT_OF_ORDER_ITERATION; }
return object_iterator(iter);
}
simdjson_really_inline object_iterator object::end() noexcept {
return {};
simdjson_really_inline simdjson_result<object_iterator> object::end() noexcept {
return object_iterator();
}
} // namespace ondemand

View File

@ -16,8 +16,8 @@ public:
*/
simdjson_really_inline object() noexcept = default;
simdjson_really_inline object_iterator begin() noexcept;
simdjson_really_inline object_iterator end() noexcept;
simdjson_really_inline simdjson_result<object_iterator> begin() noexcept;
simdjson_really_inline simdjson_result<object_iterator> end() noexcept;
/**
* Look up a field by name on an object (order-sensitive).

View File

@ -10,7 +10,9 @@ simdjson_really_inline value_iterator::value_iterator(json_iterator *json_iter,
}
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator::start_object() noexcept {
if (*advance_container_start("object") != '{') { return incorrect_type_error("Not an object"); }
const uint8_t *json;
SIMDJSON_TRY( advance_container_start("object", json) );
if (*json != '{') { return incorrect_type_error("Not an object"); }
return started_object();
}
@ -56,6 +58,10 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
// ^ (depth 2, index 1)
// ```
//
if (at_first_field()) {
has_value = true;
//
// 2. When a previous search did not yield a value or the object is empty:
//
@ -66,9 +72,12 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
// ^ (depth 0, index 2)
// ```
//
if (!is_open()) { return false; }
if (at_first_field()) {
has_value = true;
} else if (!is_open()) {
// If we're past the end of the object, we're being iterated out of order.
// Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
// this object iterator will blithely scan that object for fields.
if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
has_value = false;
// 3. When a previous search found a field or an iterator yielded a value:
//
@ -138,6 +147,10 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
// ```
//
} else if (!is_open()) {
// If we're past the end of the object, we're being iterated out of order.
// Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
// this object iterator will blithely scan that object for fields.
if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
has_value = false;
// 3. When a previous search found a field or an iterator yielded a value:
@ -248,7 +261,9 @@ simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_val
}
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator::start_array() noexcept {
if (*advance_container_start("array") != '[') { return incorrect_type_error("Not an array"); }
const uint8_t *json;
SIMDJSON_TRY( advance_container_start("array", json) );
if (*json != '[') { return incorrect_type_error("Not an array"); }
return started_array();
}
@ -428,15 +443,20 @@ simdjson_really_inline const uint8_t *value_iterator::advance_start(const char *
_json_iter->ascend_to(depth()-1);
return result;
}
simdjson_really_inline const uint8_t *value_iterator::advance_container_start(const char *type) const noexcept {
// If we're not at the position anymore, we don't want to advance the cursor.
if (is_at_container_start()) { return peek_start(); }
simdjson_really_inline error_code value_iterator::advance_container_start(const char *type, const uint8_t *&json) const noexcept {
logger::log_start_value(*_json_iter, _start_position, depth(), type);
// If we're not at the position anymore, we don't want to advance the cursor.
if (!is_at_start()) {
if (!is_at_container_start()) { return OUT_OF_ORDER_ITERATION; }
json = peek_start();
return SUCCESS;
}
// Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value.
assert_at_start();
return _json_iter->advance();
json = _json_iter->advance();
return SUCCESS;
}
simdjson_really_inline const uint8_t *value_iterator::advance_root_scalar(const char *type) const noexcept {
logger::log_value(*_json_iter, _start_position, depth(), type);

View File

@ -274,7 +274,7 @@ protected:
simdjson_really_inline const uint8_t *peek_start() const noexcept;
simdjson_really_inline uint32_t peek_start_length() const noexcept;
simdjson_really_inline const uint8_t *advance_start(const char *type) const noexcept;
simdjson_really_inline const uint8_t *advance_container_start(const char *type) const noexcept;
simdjson_really_inline error_code advance_container_start(const char *type, const uint8_t *&json) const noexcept;
simdjson_really_inline const uint8_t *advance_root_scalar(const char *type) const noexcept;
simdjson_really_inline const uint8_t *advance_non_root_scalar(const char *type) const noexcept;

View File

@ -28,7 +28,8 @@ namespace internal {
{ INVALID_JSON_POINTER, "Invalid JSON pointer syntax." },
{ INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
{ UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
{ PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }
{ PARSER_IN_USE, "Cannot parse a new document while a document is still in use." },
{ OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }
}; // error_messages[]
} // namespace internal

View File

@ -562,10 +562,140 @@ namespace error_tests {
ASSERT_EQUAL( val.is_null(), true );
TEST_SUCCEED();
}));
TEST_SUCCEED();
}
//
// Do it again for bool
//
bool out_of_order_array_iteration_error() {
TEST_START();
auto json = R"([ [ 1, 2 ] ])"_padded;
SUBTEST("simdjson_result<value>", test_ondemand_doc(json, [&](auto doc) {
for (auto element : doc) {
for (auto subelement : element) { ASSERT_SUCCESS(subelement); }
ASSERT_ERROR( element.begin(), OUT_OF_ORDER_ITERATION );
}
return true;
}));
SUBTEST("value", test_ondemand_doc(json, [&](auto doc) {
for (auto element : doc) {
ondemand::value val;
ASSERT_SUCCESS( element.get(val) );
for (auto subelement : val) { ASSERT_SUCCESS(subelement); }
ASSERT_ERROR( val.begin(), OUT_OF_ORDER_ITERATION );
}
return true;
}));
SUBTEST("simdjson_result<array>", test_ondemand_doc(json, [&](auto doc) {
for (auto element : doc) {
auto arr = element.get_array();
for (auto subelement : arr) { ASSERT_SUCCESS(subelement); }
ASSERT_ERROR( arr.begin(), OUT_OF_ORDER_ITERATION );
}
return true;
}));
SUBTEST("array", test_ondemand_doc(json, [&](auto doc) {
for (auto element : doc) {
ondemand::array arr;
ASSERT_SUCCESS( element.get(arr) );
for (auto subelement : arr) { ASSERT_SUCCESS(subelement); }
ASSERT_ERROR( arr.begin(), OUT_OF_ORDER_ITERATION );
}
return true;
}));
TEST_SUCCEED();
}
bool out_of_order_object_iteration_error() {
TEST_START();
auto json = R"([ { "x": 1, "y": 2 } ])"_padded;
SUBTEST("simdjson_result<object>", test_ondemand_doc(json, [&](auto doc) {
for (auto element : doc) {
auto obj = element.get_object();
for (auto field : obj) { ASSERT_SUCCESS(field); }
ASSERT_ERROR( obj.begin(), OUT_OF_ORDER_ITERATION );
}
return true;
}));
SUBTEST("object", test_ondemand_doc(json, [&](auto doc) {
for (auto element : doc) {
ondemand::object obj;
ASSERT_SUCCESS( element.get(obj) );
for (auto field : obj) { ASSERT_SUCCESS(field); }
ASSERT_ERROR( obj.begin(), OUT_OF_ORDER_ITERATION );
}
return true;
}));
TEST_SUCCEED();
}
bool out_of_order_object_index_error() {
TEST_START();
auto json = R"([ { "x": 1, "y": 2 } ])"_padded;
SUBTEST("simdjson_result<object>", test_ondemand_doc(json, [&](auto doc) {
simdjson_result<ondemand::object> obj;
for (auto element : doc) {
obj = element.get_object();
for (auto field : obj) { ASSERT_SUCCESS(field); }
}
ASSERT_ERROR( obj["x"], OUT_OF_ORDER_ITERATION );
return true;
}));
SUBTEST("object", test_ondemand_doc(json, [&](auto doc) {
ondemand::object obj;
for (auto element : doc) {
ASSERT_SUCCESS( element.get(obj) );
for (auto field : obj) { ASSERT_SUCCESS(field); }
}
ASSERT_ERROR( obj["x"], OUT_OF_ORDER_ITERATION );
return true;
}));
TEST_SUCCEED();
}
bool out_of_order_object_find_field_error() {
TEST_START();
auto json = R"([ { "x": 1, "y": 2 } ])"_padded;
SUBTEST("simdjson_result<object>", test_ondemand_doc(json, [&](auto doc) {
simdjson_result<ondemand::object> obj;
for (auto element : doc) {
obj = element.get_object();
for (auto field : obj) { ASSERT_SUCCESS(field); }
}
ASSERT_ERROR( obj.find_field("x"), OUT_OF_ORDER_ITERATION );
return true;
}));
SUBTEST("object", test_ondemand_doc(json, [&](auto doc) {
ondemand::object obj;
for (auto element : doc) {
ASSERT_SUCCESS( element.get(obj) );
for (auto field : obj) { ASSERT_SUCCESS(field); }
}
ASSERT_ERROR( obj.find_field("x"), OUT_OF_ORDER_ITERATION );
return true;
}));
TEST_SUCCEED();
}
bool out_of_order_object_find_field_unordered_error() {
TEST_START();
auto json = R"([ { "x": 1, "y": 2 } ])"_padded;
SUBTEST("simdjson_result<object>", test_ondemand_doc(json, [&](auto doc) {
simdjson_result<ondemand::object> obj;
for (auto element : doc) {
obj = element.get_object();
for (auto field : obj) { ASSERT_SUCCESS(field); }
}
ASSERT_ERROR( obj.find_field_unordered("x"), OUT_OF_ORDER_ITERATION );
return true;
}));
SUBTEST("object", test_ondemand_doc(json, [&](auto doc) {
ondemand::object obj;
for (auto element : doc) {
ASSERT_SUCCESS( element.get(obj) );
for (auto field : obj) { ASSERT_SUCCESS(field); }
}
ASSERT_ERROR( obj.find_field_unordered("x"), OUT_OF_ORDER_ITERATION );
return true;
}));
TEST_SUCCEED();
}
@ -588,6 +718,11 @@ namespace error_tests {
object_lookup_miss_next_error() &&
get_fail_then_succeed_bool() &&
get_fail_then_succeed_null() &&
out_of_order_array_iteration_error() &&
out_of_order_object_iteration_error() &&
out_of_order_object_index_error() &&
out_of_order_object_find_field_error() &&
out_of_order_object_find_field_unordered_error() &&
true;
}
}