From 4c63956624c5bd1b3f379afa0e893bbecc2517e8 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Sun, 6 Dec 2020 12:00:20 -0800 Subject: [PATCH] Enable object["x"]["y"] --- benchmark/distinctuserid/ondemand.h | 18 +-- include/simdjson/generic/ondemand.h | 1 + include/simdjson/generic/ondemand/document.h | 25 ++-- include/simdjson/generic/ondemand/object.h | 41 +++--- include/simdjson/generic/ondemand/value-inl.h | 117 +++++++++++------- include/simdjson/generic/ondemand/value.h | 44 +++++++ tests/ondemand/ondemand_basictests.cpp | 58 +++++++-- 7 files changed, 205 insertions(+), 99 deletions(-) diff --git a/benchmark/distinctuserid/ondemand.h b/benchmark/distinctuserid/ondemand.h index 73201295..933e630f 100644 --- a/benchmark/distinctuserid/ondemand.h +++ b/benchmark/distinctuserid/ondemand.h @@ -35,25 +35,13 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) { auto doc = parser.iterate(json); for (ondemand::object tweet : doc["statuses"]) { // We believe that all statuses have a matching - // user, and we are willing to throw when they do not: - // - // You might think that you do not need the braces, but - // you do, otherwise you will get the wrong answer. That is - // because you can only have one active object or array - // at a time. - { - ondemand::object user = tweet["user"]; - int64_t id = user["id"]; - ids.push_back(id); - } + // user, and we are willing to throw when they do not. + ids.push_back(tweet["user"]["id"]); // Not all tweets have a "retweeted_status", but when they do // we want to go and find the user within. auto retweet = tweet["retweeted_status"]; if(!retweet.error()) { - ondemand::object retweet_content = retweet; - ondemand::object reuser = retweet_content["user"]; - int64_t rid = reuser["id"]; - ids.push_back(rid); + ids.push_back(retweet["user"]["id"]); } } remove_duplicates(ids); diff --git a/include/simdjson/generic/ondemand.h b/include/simdjson/generic/ondemand.h index 91e7c1d1..0c9dd7ce 100644 --- a/include/simdjson/generic/ondemand.h +++ b/include/simdjson/generic/ondemand.h @@ -6,6 +6,7 @@ namespace SIMDJSON_IMPLEMENTATION { * Designed for maximum speed and a lower memory profile. */ namespace ondemand { + /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION diff --git a/include/simdjson/generic/ondemand/document.h b/include/simdjson/generic/ondemand/document.h index 0a42a2d7..81a4f808 100644 --- a/include/simdjson/generic/ondemand/document.h +++ b/include/simdjson/generic/ondemand/document.h @@ -208,23 +208,20 @@ public: /** * Look up a field by name on an object. - * - * This method may only be called once on a given value. If you want to look up multiple fields, - * you must first get the object using value.get_object() or object(value). - * + * + * Important notes: + * + * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * * **Once Only:** You may only look up a single field on a document. To look up multiple fields, + * use `.get_object()` or cast to `object`. + * * @param key The key to look up. - * @returns INCORRECT_TYPE If the JSON value is not an array. + * @returns The value of the field, NO_SUCH_FIELD if the field is not in the object, or + * INCORRECT_TYPE if the JSON value is not an array. */ simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** - * Look up a field by name on an object. - * - * This method may only be called once on a given value. If you want to look up multiple fields, - * you must first get the object using value.get_object() or object(value). - * - * @param key The key to look up. - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; protected: diff --git a/include/simdjson/generic/ondemand/object.h b/include/simdjson/generic/ondemand/object.h index c9abcd15..54e1659f 100644 --- a/include/simdjson/generic/ondemand/object.h +++ b/include/simdjson/generic/ondemand/object.h @@ -23,26 +23,37 @@ public: simdjson_really_inline object_iterator begin() noexcept; simdjson_really_inline object_iterator end() noexcept; - simdjson_really_inline simdjson_result operator[](const std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const std::string_view key) && noexcept; + + /** + * Look up a field by name on an object. + * + * Important notes: + * + * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * * **Order Sensitive:** Each field lookup will only move forward in the object. In particular, + * the following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::builtin::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj["z"]; + * double y = obj["y"]; + * double x = obj["x"]; + * ``` + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; protected: - /** - * Begin object iteration. - * - * @param doc The document containing the object. The iterator must be just after the opening `{`. - * @param error If this is not SUCCESS, creates an error chained object. - */ static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_really_inline simdjson_result try_start(value_iterator &iter) noexcept; static simdjson_really_inline object started(value_iterator &iter) noexcept; - - /** - * Internal object creation. Call object::begin(doc) instead of this. - * - * @param doc The document containing the object. doc->depth must already be incremented to - * reflect the object's depth. The iterator must be just after the opening `{`. - */ simdjson_really_inline object(const value_iterator &_iter) noexcept; simdjson_warn_unused simdjson_really_inline error_code find_field_raw(const std::string_view key) noexcept; diff --git a/include/simdjson/generic/ondemand/value-inl.h b/include/simdjson/generic/ondemand/value-inl.h index e88dc80f..376527a7 100644 --- a/include/simdjson/generic/ondemand/value-inl.h +++ b/include/simdjson/generic/ondemand/value-inl.h @@ -145,14 +145,18 @@ simdjson_really_inline simdjson_result value::end() & noexcept { return {}; } -// simdjson_really_inline void value::log_value(const char *type) const noexcept { -// char json_char[]{char(json[0]), '\0'}; -// logger::log_value(*iter, type, json_char); -// } -// simdjson_really_inline void value::log_error(const char *message) const noexcept { -// char json_char[]{char(json[0]), '\0'}; -// logger::log_error(*iter, message, json_char); -// } +simdjson_really_inline simdjson_result value::operator[](std::string_view key) & noexcept { + return get_object()[key]; +} +simdjson_really_inline simdjson_result value::operator[](std::string_view key) && noexcept { + return std::forward(*this).get_object()[key]; +} +simdjson_really_inline simdjson_result value::operator[](const char *key) & noexcept { + return get_object()[key]; +} +simdjson_really_inline simdjson_result value::operator[](const char *key) && noexcept { + return std::forward(*this).get_object()[key]; +} } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION @@ -184,78 +188,95 @@ simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} + +simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} simdjson_really_inline simdjson_result simdjson_result::get_array() && noexcept { if (error()) { return error(); } return std::forward(first).get_array(); } -simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } - return std::forward(first).get_array(); + return first.get_object(); } simdjson_really_inline simdjson_result simdjson_result::get_object() && noexcept { if (error()) { return error(); } return std::forward(first).get_object(); } -simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_uint64() & noexcept { if (error()) { return error(); } - return std::forward(first).get_object(); + return first.get_uint64(); } simdjson_really_inline simdjson_result simdjson_result::get_uint64() && noexcept { if (error()) { return error(); } return std::forward(first).get_uint64(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_int64() & noexcept { if (error()) { return error(); } - return std::forward(first).get_uint64(); + return first.get_int64(); } simdjson_really_inline simdjson_result simdjson_result::get_int64() && noexcept { if (error()) { return error(); } return std::forward(first).get_int64(); } -simdjson_really_inline simdjson_result simdjson_result::get_int64() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_double() & noexcept { if (error()) { return error(); } - return std::forward(first).get_int64(); + return first.get_double(); } simdjson_really_inline simdjson_result simdjson_result::get_double() && noexcept { if (error()) { return error(); } return std::forward(first).get_double(); } -simdjson_really_inline simdjson_result simdjson_result::get_double() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_string() & noexcept { if (error()) { return error(); } - return std::forward(first).get_double(); + return first.get_string(); } simdjson_really_inline simdjson_result simdjson_result::get_string() && noexcept { if (error()) { return error(); } return std::forward(first).get_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_string() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() & noexcept { if (error()) { return error(); } - return std::forward(first).get_string(); + return first.get_raw_json_string(); } simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() && noexcept { if (error()) { return error(); } return std::forward(first).get_raw_json_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::get_bool() & noexcept { if (error()) { return error(); } - return std::forward(first).get_raw_json_string(); + return first.get_bool(); } simdjson_really_inline simdjson_result simdjson_result::get_bool() && noexcept { if (error()) { return error(); } return std::forward(first).get_bool(); } -simdjson_really_inline simdjson_result simdjson_result::get_bool() & noexcept { - if (error()) { return error(); } - return std::forward(first).get_bool(); +simdjson_really_inline bool simdjson_result::is_null() & noexcept { + if (error()) { return false; } + return first.is_null(); } simdjson_really_inline bool simdjson_result::is_null() && noexcept { if (error()) { return false; } return std::forward(first).is_null(); } -simdjson_really_inline bool simdjson_result::is_null() & noexcept { - if (error()) { return false; } - return std::forward(first).is_null(); -} template simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } @@ -287,66 +308,66 @@ template<> simdjson_really_inline error_code simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::array() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } - return std::forward(first); + return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::object() && noexcept(false) { +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::array() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); } simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::object() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); } simdjson_really_inline simdjson_result::operator uint64_t() && noexcept(false) { if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator uint64_t() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); + return first; } simdjson_really_inline simdjson_result::operator int64_t() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); } -simdjson_really_inline simdjson_result::operator int64_t() & noexcept(false) { +simdjson_really_inline simdjson_result::operator uint64_t() & noexcept(false) { if (error()) { throw simdjson_error(error()); } - return std::forward(first); + return first; } simdjson_really_inline simdjson_result::operator double() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); } -simdjson_really_inline simdjson_result::operator double() & noexcept(false) { +simdjson_really_inline simdjson_result::operator int64_t() & noexcept(false) { if (error()) { throw simdjson_error(error()); } - return std::forward(first); + return first; } simdjson_really_inline simdjson_result::operator std::string_view() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); } -simdjson_really_inline simdjson_result::operator std::string_view() & noexcept(false) { +simdjson_really_inline simdjson_result::operator double() & noexcept(false) { if (error()) { throw simdjson_error(error()); } - return std::forward(first); + return first; } simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); } -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { +simdjson_really_inline simdjson_result::operator std::string_view() & noexcept(false) { if (error()) { throw simdjson_error(error()); } - return std::forward(first); + return first; } simdjson_really_inline simdjson_result::operator bool() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); } +simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} simdjson_really_inline simdjson_result::operator bool() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(first); diff --git a/include/simdjson/generic/ondemand/value.h b/include/simdjson/generic/ondemand/value.h index 379dfc7b..e8d1d800 100644 --- a/include/simdjson/generic/ondemand/value.h +++ b/include/simdjson/generic/ondemand/value.h @@ -250,6 +250,28 @@ public: */ simdjson_really_inline simdjson_result end() & noexcept; + /** + * Look up a field by name on an object. + * + * Important notes: + * + * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * * **Once Only:** You may only look up a single field on a value. To look up multiple fields, + * you must cast to object or call `.get_object()`. + * + * @param key The key to look up. + * @returns The value of the field, NO_SUCH_FIELD if the field is not in the object, or + * INCORRECT_TYPE if the JSON value is not an array. + */ + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](const char *key) && noexcept; + protected: /** * Create a value. @@ -347,6 +369,28 @@ public: simdjson_really_inline simdjson_result begin() & noexcept; simdjson_really_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object. + * + * Important notes: + * + * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * * **Once Only:** You may only look up a single field on a value. To look up multiple fields, + * you must cast to object or call `.get_object()`. + * + * @param key The key to look up. + * @returns The value of the field, NO_SUCH_FIELD if the field is not in the object, or + * INCORRECT_TYPE if the JSON value is not an array. + */ + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + /** @overload simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](const char *key) && noexcept; }; } // namespace simdjson diff --git a/tests/ondemand/ondemand_basictests.cpp b/tests/ondemand/ondemand_basictests.cpp index 13d61174..f2ed2aea 100644 --- a/tests/ondemand/ondemand_basictests.cpp +++ b/tests/ondemand/ondemand_basictests.cpp @@ -655,6 +655,44 @@ namespace dom_api_tests { TEST_SUCCEED(); } + bool nested_object_index() { + TEST_START(); + auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_EQUAL( doc_result["x"]["y"]["z"].get_uint64().first, 2 ); + return true; + })); + SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::document doc; + ASSERT_SUCCESS( std::move(doc_result).get(doc) ); + ASSERT_EQUAL( doc["x"]["y"]["z"].get_uint64().first, 2 ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object = doc_result.get_object(); + ASSERT_EQUAL( object["x"]["y"]["z"].get_uint64().first, 2 ); + return true; + })); + SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::object object; + ASSERT_SUCCESS( doc_result.get(object) ); + ASSERT_EQUAL( object["x"]["y"]["z"].get_uint64().first, 2 ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result x = doc_result["x"]; + ASSERT_EQUAL( x["y"]["z"].get_uint64().first, 2 ); + return true; + })); + SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::value x; + ASSERT_SUCCESS( doc_result["x"].get(x) ); + ASSERT_EQUAL( x["y"]["z"].get_uint64().first, 2 ); + return true; + })); + TEST_SUCCEED(); + } + #if SIMDJSON_EXCEPTIONS bool iterate_object_exception() { @@ -776,6 +814,15 @@ namespace dom_api_tests { })); TEST_SUCCEED(); } + bool nested_object_index_exception() { + TEST_START(); + auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_EQUAL( uint64_t(doc_result["x"]["y"]["z"]), 2 ); + return true; + })); + TEST_SUCCEED(); + } #endif @@ -790,6 +837,7 @@ namespace dom_api_tests { boolean_values() && null_value() && object_index() && + nested_object_index() && #if SIMDJSON_EXCEPTIONS iterate_object_exception() && iterate_array_exception() && @@ -797,6 +845,7 @@ namespace dom_api_tests { numeric_values_exception() && boolean_values_exception() && object_index_exception() && + nested_object_index_exception() && #endif true; } @@ -887,9 +936,8 @@ namespace twitter_tests { padded_string json; ASSERT_SUCCESS( padded_string::load(TWITTER_JSON).get(json) ); ASSERT_TRUE(test_ondemand_doc(json, [&](auto doc_result) { - auto metadata = doc_result["search_metadata"].get_object(); uint64_t count; - ASSERT_SUCCESS( metadata["count"].get(count) ); + ASSERT_SUCCESS( doc_result["search_metadata"]["count"].get(count) ); ASSERT_EQUAL( count, 100 ); return true; })); @@ -905,11 +953,7 @@ namespace twitter_tests { for (ondemand::object tweet : doc["statuses"]) { uint64_t id = tweet["id"]; std::string_view text = tweet["text"]; - std::string_view screen_name; - { - ondemand::object user = tweet["user"]; - screen_name = user["screen_name"]; - } + std::string_view screen_name = tweet["user"]["screen_name"]; uint64_t retweets = tweet["retweet_count"]; uint64_t favorites = tweet["favorite_count"]; (void) id;