Add object iteration error tests

2020-10-04 11:37:10 -07:00 · 2020-10-04 11:37:10 -07:00 · 5533f8d87b
parent 93af7b61ce
commit 5533f8d87b
6 changed files with 109 additions and 12 deletions
--- a/include/simdjson/generic/ondemand/array_iterator-inl.h
+++ b/include/simdjson/generic/ondemand/array_iterator-inl.h
@ -27,6 +27,9 @@ simdjson_really_inline bool array_iterator<T>::operator!=(const array_iterator<T
 }
 template<typename T>
 simdjson_really_inline array_iterator<T> &array_iterator<T>::operator++() noexcept {
+  // TODO this is a safety rail ... users should exit loops as soon as they receive an error.
+  // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
+  if (!iter->is_iterator_alive()) { return *this; } // Iterator will be released if there is an error
  bool has_value;
  error_code error = iter->get_iterator().has_next_element().get(has_value); // If there's an error, has_next stays true.
  if (!(error || has_value)) { iter->iteration_finished(); }
--- a/include/simdjson/generic/ondemand/object_iterator-inl.h
+++ b/include/simdjson/generic/ondemand/object_iterator-inl.h
@ -9,8 +9,13 @@ namespace ondemand {
 simdjson_really_inline object_iterator::object_iterator(json_iterator_ref &_iter) noexcept : iter{&_iter} {}

 simdjson_really_inline simdjson_result<field> object_iterator::operator*() noexcept {
-  if ((*iter)->error()) { iter->release(); return (*iter)->error(); }
-  return field::start(iter->borrow());
+  error_code error = (*iter)->error();
+  if (error) { iter->release(); return error; }
+  auto result = field::start(iter->borrow());
+  // TODO this is a safety rail ... users should exit loops as soon as they receive an error.
+  // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
+  if (result.error()) { iter->release(); }
+  return result;
 }
 simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) noexcept {
  return !(*this != other);
@ -19,10 +24,12 @@ simdjson_really_inline bool object_iterator::operator!=(const object_iterator &)
  return iter->is_alive();
 }
 simdjson_really_inline object_iterator &object_iterator::operator++() noexcept {
-  if ((*iter)->error()) { return *this; }
+  // TODO this is a safety rail ... users should exit loops as soon as they receive an error.
+  // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
+  if (!iter->is_alive()) { return *this; } // Iterator will be released if there is an error
  bool has_value;
  error_code error = (*iter)->has_next_field().get(has_value);
-  if (!(error || has_value)) { iter->release(); }
+  if (!(error || has_value)) { std::cout << std::endl; iter->release(); }
  return *this;
 }

--- a/include/simdjson/generic/ondemand/raw_json_string-inl.h
+++ b/include/simdjson/generic/ondemand/raw_json_string-inl.h
@ -33,6 +33,20 @@ SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const
  return !(a == b);
 }

+SIMDJSON_UNUSED simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept {
+  bool in_escape = false;
+  const char *s = str.raw();
+  while (true) {
+    switch (*s) {
+      case '\\': in_escape = !in_escape; break;
+      case '"': if (in_escape) { in_escape = false; } else { return out; } break;
+      default: if (in_escape) { in_escape = false; }
+    }
+    out << *s;
+    s++;
+  }
+}
+
 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
 } // namespace simdjson
--- a/include/simdjson/generic/ondemand/raw_json_string.h
+++ b/include/simdjson/generic/ondemand/raw_json_string.h
@ -72,6 +72,8 @@ SIMDJSON_UNUSED simdjson_really_inline bool operator==(std::string_view a, const
 SIMDJSON_UNUSED simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view b) noexcept;
 SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const raw_json_string &b) noexcept;

+SIMDJSON_UNUSED simdjson_really_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept;
+
 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
 } // namespace simdjson
--- a/tests/ondemand/ondemand_basictests.cpp
+++ b/tests/ondemand/ondemand_basictests.cpp
@ -936,8 +936,8 @@ namespace error_tests {
      V actual;
      auto actual_error = elem.get(actual);
      if (count >= N) {
-        ASSERT(count < (N+N2), "Extra error reported")
        ASSERT_ERROR(actual_error, expected_error[count - N]);
+        ASSERT(count < (N+N2), "Extra error reported");
      } else {
        ASSERT_SUCCESS(actual_error);
        ASSERT_EQUAL(actual, expected[count]);
@ -963,16 +963,82 @@ namespace error_tests {
    return assert_iterate<V, T>(array, expected, N, nullptr, 0);
  }

+
  bool array_iterate_error() {
    TEST_START();
-    ONDEMAND_SUBTEST("missing comma ", "[1 1]",  assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR }));
-    ONDEMAND_SUBTEST("extra comma   ", "[1,,1]", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR }));
-    ONDEMAND_SUBTEST("extra comma   ", "[,",     assert_iterate(doc,                 { NUMBER_ERROR, TAPE_ERROR }));
-    ONDEMAND_SUBTEST("unclosed array", "[1 ",    assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("missing comma", "[1 1]",  assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("extra comma  ", "[1,,1]", assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR }));
+    ONDEMAND_SUBTEST("extra comma  ", "[,",     assert_iterate(doc,                 { NUMBER_ERROR, TAPE_ERROR }));
+    ONDEMAND_SUBTEST("unclosed     ", "[1 ",    assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR }));
    // TODO These pass the user values that may run past the end of the buffer if they aren't careful
-    ONDEMAND_SUBTEST("unclosed array", "[1,",    assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR }));
-    ONDEMAND_SUBTEST("unclosed array", "[1",     assert_iterate(doc,                 { NUMBER_ERROR, TAPE_ERROR }));
-    ONDEMAND_SUBTEST("unclosed array", "[",      assert_iterate(doc,                 { NUMBER_ERROR, TAPE_ERROR }));
+    // In particular, if the padding is decorated with the wrong values, we could cause overrun!
+    ONDEMAND_SUBTEST("unclosed     ", "[1,",    assert_iterate(doc, { int64_t(1) }, { NUMBER_ERROR, TAPE_ERROR }));
+    ONDEMAND_SUBTEST("unclosed     ", "[1",     assert_iterate(doc,                 { NUMBER_ERROR, TAPE_ERROR }));
+    ONDEMAND_SUBTEST("unclosed     ", "[",      assert_iterate(doc,                 { NUMBER_ERROR, TAPE_ERROR }));
+    TEST_SUCCEED();
+  }
+
+  template<typename V, typename T>
+  bool assert_iterate_object(T &&object, const char **expected_key, V *expected, size_t N, simdjson::error_code *expected_error, size_t N2) {
+    size_t count = 0;
+    for (auto field : object) {
+      V actual;
+      auto actual_error = field.value().get(actual);
+      if (count >= N) {
+        ASSERT((count - N) < N2, "Extra error reported");
+        ASSERT_ERROR(actual_error, expected_error[count - N]);
+      } else {
+        ASSERT_SUCCESS(actual_error);
+        ASSERT_EQUAL(field.key().first, expected_key[count]);
+        ASSERT_EQUAL(actual, expected[count]);
+      }
+      count++;
+    }
+    ASSERT_EQUAL(count, N+N2);
+    return true;
+  }
+
+  template<typename V, size_t N, size_t N2, typename T>
+  bool assert_iterate_object(T &&object, const char *(&&expected_key)[N], V (&&expected)[N], simdjson::error_code (&&expected_error)[N2]) {
+    return assert_iterate_object<V, T>(std::forward<T>(object), expected_key, expected, N, expected_error, N2);
+  }
+
+  template<size_t N2, typename T>
+  bool assert_iterate_object(T &&object, simdjson::error_code (&&expected_error)[N2]) {
+    return assert_iterate_object<int64_t, T>(std::forward<T>(object), nullptr, nullptr, 0, expected_error, N2);
+  }
+
+  template<typename V, size_t N, typename T>
+  bool assert_iterate_object(T &&object, const char *(&&expected_key)[N], V (&&expected)[N]) {
+    return assert_iterate_object<V, T>(std::forward<T>(object), expected_key, expected, N, nullptr, 0);
+  }
+
+  bool object_iterate_error() {
+    TEST_START();
+
+    ONDEMAND_SUBTEST("missing semicolon", R"({ "a"  1, "b": 2 })",    assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+
+    ONDEMAND_SUBTEST("missing key      ", R"({    : 1, "b": 2 })",    assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+
+    ONDEMAND_SUBTEST("missing value    ", R"({ "a":  , "b": 2 })",    assert_iterate_object(doc.get_object(),                          { NUMBER_ERROR, TAPE_ERROR }));
+
+    ONDEMAND_SUBTEST("missing comma    ", R"({ "a": 1  "b": 2 })",    assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR }));
+
+    ONDEMAND_SUBTEST("wrong key type   ", R"({ 1:   1, "b": 2 })",    assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("wrong key type   ", R"({ true: 1, "b": 2 })",   assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("wrong key type   ", R"({ false: 1, "b": 2 })",  assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("wrong key type   ", R"({ null: 1, "b": 2 })",   assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("wrong key type   ", R"({ []:  1, "b": 2 })",    assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("wrong key type   ", R"({ {}:  1, "b": 2 })",    assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+
+    ONDEMAND_SUBTEST("unclosed         ", R"({ "a": 1,         )",    assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR }));
+    // TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful.
+    // In particular, if the padding is decorated with the wrong values, we could cause overrun!
+    ONDEMAND_SUBTEST("unclosed         ", R"({ "a": 1          )",    assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("unclosed         ", R"({ "a":            )",    assert_iterate_object(doc.get_object(),                          { NUMBER_ERROR, TAPE_ERROR }));
+    ONDEMAND_SUBTEST("unclosed         ", R"({ "a"             )",    assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+    ONDEMAND_SUBTEST("unclosed         ", R"({                 )",    assert_iterate_object(doc.get_object(),                          { TAPE_ERROR }));
+
    TEST_SUCCEED();
  }

@ -981,6 +1047,7 @@ namespace error_tests {
           empty_document_error() &&
           wrong_type() &&
           array_iterate_error() &&
+           object_iterate_error() &&
           true;
  }
 }
--- a/tests/test_macros.h
+++ b/tests/test_macros.h
@ -31,6 +31,10 @@ template<>
 simdjson_really_inline bool equals_expected<const char *, const char *>(const char *actual, const char *expected) {
  return !strcmp(actual, expected);
 }
+template<>
+simdjson_really_inline bool equals_expected<simdjson::builtin::ondemand::raw_json_string, const char *>(simdjson::builtin::ondemand::raw_json_string actual, const char *expected) {
+  return actual == expected;
+}

 simdjson_really_inline simdjson::error_code to_error_code(simdjson::error_code error) {
  return error;