From 5533f8d87b5158e8ddcd0e23c16d062d95f07734 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Sun, 4 Oct 2020 11:37:10 -0700 Subject: [PATCH] Add object iteration error tests --- .../generic/ondemand/array_iterator-inl.h | 3 + .../generic/ondemand/object_iterator-inl.h | 15 +++- .../generic/ondemand/raw_json_string-inl.h | 14 ++++ .../generic/ondemand/raw_json_string.h | 2 + tests/ondemand/ondemand_basictests.cpp | 83 +++++++++++++++++-- tests/test_macros.h | 4 + 6 files changed, 109 insertions(+), 12 deletions(-) diff --git a/include/simdjson/generic/ondemand/array_iterator-inl.h b/include/simdjson/generic/ondemand/array_iterator-inl.h index bdfbba13..194f2950 100644 --- a/include/simdjson/generic/ondemand/array_iterator-inl.h +++ b/include/simdjson/generic/ondemand/array_iterator-inl.h @@ -27,6 +27,9 @@ simdjson_really_inline bool array_iterator::operator!=(const array_iterator simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter->is_iterator_alive()) { return *this; } // Iterator will be released if there is an error bool has_value; error_code error = iter->get_iterator().has_next_element().get(has_value); // If there's an error, has_next stays true. if (!(error || has_value)) { iter->iteration_finished(); } diff --git a/include/simdjson/generic/ondemand/object_iterator-inl.h b/include/simdjson/generic/ondemand/object_iterator-inl.h index 86a4c99b..68d71885 100644 --- a/include/simdjson/generic/ondemand/object_iterator-inl.h +++ b/include/simdjson/generic/ondemand/object_iterator-inl.h @@ -9,8 +9,13 @@ namespace ondemand { simdjson_really_inline object_iterator::object_iterator(json_iterator_ref &_iter) noexcept : iter{&_iter} {} simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { - if ((*iter)->error()) { iter->release(); return (*iter)->error(); } - return field::start(iter->borrow()); + error_code error = (*iter)->error(); + if (error) { iter->release(); return error; } + auto result = field::start(iter->borrow()); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter->release(); } + return result; } simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) noexcept { return !(*this != other); @@ -19,10 +24,12 @@ simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) return iter->is_alive(); } simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { - if ((*iter)->error()) { return *this; } + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter->is_alive()) { return *this; } // Iterator will be released if there is an error bool has_value; error_code error = (*iter)->has_next_field().get(has_value); - if (!(error || has_value)) { iter->release(); } + if (!(error || has_value)) { std::cout << std::endl; iter->release(); } return *this; } diff --git a/include/simdjson/generic/ondemand/raw_json_string-inl.h b/include/simdjson/generic/ondemand/raw_json_string-inl.h index 02069ee8..e8392f30 100644 --- a/include/simdjson/generic/ondemand/raw_json_string-inl.h +++ b/include/simdjson/generic/ondemand/raw_json_string-inl.h @@ -33,6 +33,20 @@ SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const return !(a == b); } +SIMDJSON_UNUSED simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/raw_json_string.h b/include/simdjson/generic/ondemand/raw_json_string.h index 7ab1ba47..87de8aea 100644 --- a/include/simdjson/generic/ondemand/raw_json_string.h +++ b/include/simdjson/generic/ondemand/raw_json_string.h @@ -72,6 +72,8 @@ SIMDJSON_UNUSED simdjson_really_inline bool operator==(std::string_view a, const SIMDJSON_UNUSED simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view b) noexcept; SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const raw_json_string &b) noexcept; +SIMDJSON_UNUSED simdjson_really_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson diff --git a/tests/ondemand/ondemand_basictests.cpp b/tests/ondemand/ondemand_basictests.cpp index 7351633d..4c3f0143 100644 --- a/tests/ondemand/ondemand_basictests.cpp +++ b/tests/ondemand/ondemand_basictests.cpp @@ -936,8 +936,8 @@ namespace error_tests { V actual; auto actual_error = elem.get(actual); if (count >= N) { - ASSERT(count < (N+N2), "Extra error reported") ASSERT_ERROR(actual_error, expected_error[count - N]); + ASSERT(count < (N+N2), "Extra error reported"); } else { ASSERT_SUCCESS(actual_error); ASSERT_EQUAL(actual, expected[count]); @@ -963,16 +963,82 @@ namespace error_tests { return assert_iterate(array, expected, N, nullptr, 0); } + bool array_iterate_error() { TEST_START(); - ONDEMAND_SUBTEST("missing comma ", "[1 1]", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", "[1,,1]", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", "[,", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed array", "[1 ", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); + ONDEMAND_SUBTEST("missing comma", "[1 1]", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", "[1,,1]", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", "[,", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); // TODO These pass the user values that may run past the end of the buffer if they aren't careful - ONDEMAND_SUBTEST("unclosed array", "[1,", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed array", "[1", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed array", "[", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); + // In particular, if the padding is decorated with the wrong values, we could cause overrun! + ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { NUMBER_ERROR, TAPE_ERROR })); + TEST_SUCCEED(); + } + + template + bool assert_iterate_object(T &&object, const char **expected_key, V *expected, size_t N, simdjson::error_code *expected_error, size_t N2) { + size_t count = 0; + for (auto field : object) { + V actual; + auto actual_error = field.value().get(actual); + if (count >= N) { + ASSERT((count - N) < N2, "Extra error reported"); + ASSERT_ERROR(actual_error, expected_error[count - N]); + } else { + ASSERT_SUCCESS(actual_error); + ASSERT_EQUAL(field.key().first, expected_key[count]); + ASSERT_EQUAL(actual, expected[count]); + } + count++; + } + ASSERT_EQUAL(count, N+N2); + return true; + } + + template + bool assert_iterate_object(T &&object, const char *(&&expected_key)[N], V (&&expected)[N], simdjson::error_code (&&expected_error)[N2]) { + return assert_iterate_object(std::forward(object), expected_key, expected, N, expected_error, N2); + } + + template + bool assert_iterate_object(T &&object, simdjson::error_code (&&expected_error)[N2]) { + return assert_iterate_object(std::forward(object), nullptr, nullptr, 0, expected_error, N2); + } + + template + bool assert_iterate_object(T &&object, const char *(&&expected_key)[N], V (&&expected)[N]) { + return assert_iterate_object(std::forward(object), expected_key, expected, N, nullptr, 0); + } + + bool object_iterate_error() { + TEST_START(); + + ONDEMAND_SUBTEST("missing semicolon", R"({ "a" 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + + ONDEMAND_SUBTEST("missing key ", R"({ : 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + + ONDEMAND_SUBTEST("missing value ", R"({ "a": , "b": 2 })", assert_iterate_object(doc.get_object(), { NUMBER_ERROR, TAPE_ERROR })); + + ONDEMAND_SUBTEST("missing comma ", R"({ "a": 1 "b": 2 })", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); + + ONDEMAND_SUBTEST("wrong key type ", R"({ 1: 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("wrong key type ", R"({ true: 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("wrong key type ", R"({ false: 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("wrong key type ", R"({ null: 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("wrong key type ", R"({ []: 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("wrong key type ", R"({ {}: 1, "b": 2 })", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + + ONDEMAND_SUBTEST("unclosed ", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); + // TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful. + // In particular, if the padding is decorated with the wrong values, we could cause overrun! + ONDEMAND_SUBTEST("unclosed ", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": )", assert_iterate_object(doc.get_object(), { NUMBER_ERROR, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + TEST_SUCCEED(); } @@ -981,6 +1047,7 @@ namespace error_tests { empty_document_error() && wrong_type() && array_iterate_error() && + object_iterate_error() && true; } } diff --git a/tests/test_macros.h b/tests/test_macros.h index 5508aa96..2e3ac7c5 100644 --- a/tests/test_macros.h +++ b/tests/test_macros.h @@ -31,6 +31,10 @@ template<> simdjson_really_inline bool equals_expected(const char *actual, const char *expected) { return !strcmp(actual, expected); } +template<> +simdjson_really_inline bool equals_expected(simdjson::builtin::ondemand::raw_json_string actual, const char *expected) { + return actual == expected; +} simdjson_really_inline simdjson::error_code to_error_code(simdjson::error_code error) { return error;