From 2dac3705d2e842fa37c88431a96161a828caed18 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 19 Jul 2021 10:24:36 -0400 Subject: [PATCH] renames 'to_string' to 'to_json_string' and makes it ridiculously fast (#1642) * Changing the name of the function to 'to_json_string' from 'to_string' to avoid confusion. * Moving to a fast string_view model * Making it exception-safe. * Tweaking. * Workaround for exceptions. * more robust to_json_string (#1651) * WIP. * Fuzzing timeout (bug fix) (#1650) * prove pull request #1648 introduces an infinite loop * Interesting bug! * Tweak. Co-authored-by: Paul Dreik * It should now work. * Moving car examples to exception mode * Simplifying somewhat. * I forgot to abandon. Let us do that. * Adding more tests. * WIP. * It should now work. * Moving car examples to exception mode * Simplifying somewhat. * I forgot to abandon. Let us do that. * Adding more tests. Co-authored-by: Paul Dreik Co-authored-by: Paul Dreik --- .vscode/settings.json | 2 +- doc/basics.md | 42 ++- include/simdjson/generic/ondemand/array-inl.h | 13 + include/simdjson/generic/ondemand/array.h | 10 + .../simdjson/generic/ondemand/document-inl.h | 16 + include/simdjson/generic/ondemand/document.h | 12 +- .../generic/ondemand/json_iterator-inl.h | 22 +- .../simdjson/generic/ondemand/json_iterator.h | 2 +- .../simdjson/generic/ondemand/object-inl.h | 34 ++ include/simdjson/generic/ondemand/object.h | 11 + .../generic/ondemand/serialization-inl.h | 340 +++++++++--------- .../simdjson/generic/ondemand/serialization.h | 220 ++---------- .../generic/ondemand/token_iterator-inl.h | 2 +- .../generic/ondemand/token_iterator.h | 2 +- .../generic/ondemand/value_iterator-inl.h | 28 +- .../generic/ondemand/value_iterator.h | 11 +- .../ondemand/ondemand_json_pointer_tests.cpp | 73 +++- tests/ondemand/ondemand_parse_api_tests.cpp | 14 +- tests/ondemand/ondemand_readme_examples.cpp | 2 +- tests/ondemand/ondemand_tostring_tests.cpp | 303 ++++++++++++++-- tests/test_macros.h | 2 +- 21 files changed, 744 insertions(+), 417 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f0fda06b..683931cc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ "chrono": "cpp", "optional": "cpp", "__locale": "cpp", + "__tuple": "cpp", "__bit_reference": "cpp", "__config": "cpp", "__debug": "cpp", @@ -24,7 +25,6 @@ "__string": "cpp", "__threading_support": "cpp", "__tree": "cpp", - "__tuple": "cpp", "algorithm": "cpp", "atomic": "cpp", "bit": "cpp", diff --git a/doc/basics.md b/doc/basics.md index 175e4e5f..9034bf82 100644 --- a/doc/basics.md +++ b/doc/basics.md @@ -253,19 +253,49 @@ support for users who avoid exceptions. See [the simdjson error handling documen - `field.value()` will get you the value, which you can then use all these other methods on. * **Array Index:** Because it is forward-only, you cannot look up an array element by index. Instead, you will need to iterate through the array and keep an index yourself. -* **Output to strings (simdjson 1.0 or better):** Given a document or an element (or node) out of a JSON document, you can output a JSON string version suitable to be parsed again as JSON content: `simdjson::to_string(element)` returns a `simdjson::simdjson_result` instance. You can cast it to `std::string` and it will throw when an error was encountered (`std::string(simdjson::to_string(element))`). Or else you can do `std::string s; if(simdjson::to_string(element).get(s) == simdjson::SUCCESS) { ... }`. This consumes fully the element: if you apply it on a document, the JSON pointer is advanced to the end of the document. The returned string contains a serialized version of the element or document that is suitable to be parsed again. It is also a newly allocated `std::string` that is independent from the simdjson parser. The `to_string` function should not be confused with retrieving the value of a string instance which are escaped and represented using a lightweight `std::string_view` instance pointing at an internal string buffer inside the parser instance. To illustrate, the first of the following two code segments will print the unescaped string `"test"` complete with the quote whereas the second one will print the escaped content of the string (without the quotes). Th +* **Output to strings (simdjson 1.0 or better):** Given a document, a value, an array or an object in a JSON document, you can output a JSON string version suitable to be parsed again as JSON content: `simdjson::to_json_string(element)`. A call to `to_json_string` consumes fully the element: if you apply it on a document, the JSON pointer is advanced to the end of the document. The `simdjson::to_json_string` does not allocate memory. The `to_json_string` function should not be confused with retrieving the value of a string instance which are escaped and represented using a lightweight `std::string_view` instance pointing at an internal string buffer inside the parser instance. To illustrate, the first of the following two code segments will print the unescaped string `"test"` complete with the quote whereas the second one will print the escaped content of the string (without the quotes). > ```C++ > // serialize a JSON to an escaped std::string instance so that it can be parsed again as JSON - > auto cars_json = R"( { "test": "result" } )"_padded; - > ondemand::document doc = parser.iterate(cars_json); - > std::cout << simdjson::to_string(doc["test"]) << std::endl; // Requires simdjson 1.0 or better + > auto silly_json = R"( { "test": "result" } )"_padded; + > ondemand::document doc = parser.iterate(silly_json); + > std::cout << simdjson::to_json_string(doc["test"]) << std::endl; // Requires simdjson 1.0 or better >```` > ```C++ > // retrieves an unescaped string value as a string_view instance - > auto cars_json = R"( { "test": "result" } )"_padded; - > ondemand::document doc = parser.iterate(cars_json); + > auto silly_json = R"( { "test": "result" } )"_padded; + > ondemand::document doc = parser.iterate(silly_json); > std::cout << std::string_view(doc["test"]) << std::endl; >```` +You can use `to_json_string` to efficiently extract components of a JSON document to reconstruct a new JSON document, as in the following example: + > ```C++ + > auto cars_json = R"( [ + > { "make": "Toyota", "model": "Camry", "year": 2018, "tire_pressure": [ 40.1, 39.9, 37.7, 40.4 ] }, + > { "make": "Kia", "model": "Soul", "year": 2012, "tire_pressure": [ 30.1, 31.0, 28.6, 28.7 ] }, + > { "make": "Toyota", "model": "Tercel", "year": 1999, "tire_pressure": [ 29.8, 30.0, 30.2, 30.5 ] } + > ] )"_padded; + > std::vector arrays; + > // We are going to collect string_view instances which point inside the `cars_json` string + > // and are therefore valid as long as `cars_json` remains in scope. + > { + > ondemand::parser parser; + > for (ondemand::object car : parser.iterate(cars_json)) { + > if(uint64_t(car["year"]) > 2000) { + > arrays.push_back(simdjson::to_json_string(car["tire_pressure"])); + > } + > } + > } + > // We can now convert to a JSON string: + > std::ostringstream oss; + > oss << "["; + > for(size_t i = 0; i < arrays.size(); i++) { + > if(i>0) { oss << ","; } + > oss << arrays[i]; + > } + > oss << "]"; + > auto json_string = oss.str(); + > // json_string == "[[ 40.1, 39.9, 37.7, 40.4 ],[ 30.1, 31.0, 28.6, 28.7 ]]" + >```` + ### Examples diff --git a/include/simdjson/generic/ondemand/array-inl.h b/include/simdjson/generic/ondemand/array-inl.h index 0a3d1931..88a72e2d 100644 --- a/include/simdjson/generic/ondemand/array-inl.h +++ b/include/simdjson/generic/ondemand/array-inl.h @@ -71,6 +71,19 @@ simdjson_really_inline simdjson_result array::begin() noexcept { simdjson_really_inline simdjson_result array::end() noexcept { return array_iterator(iter); } +simdjson_really_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_really_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek(0)}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} simdjson_really_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; diff --git a/include/simdjson/generic/ondemand/array.h b/include/simdjson/generic/ondemand/array.h index d1381dd4..fe428c85 100644 --- a/include/simdjson/generic/ondemand/array.h +++ b/include/simdjson/generic/ondemand/array.h @@ -73,8 +73,18 @@ public: * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_really_inline error_code consume() noexcept; + /** * Begin array iteration. * diff --git a/include/simdjson/generic/ondemand/document-inl.h b/include/simdjson/generic/ondemand/document-inl.h index 75b0cd29..44b4de25 100644 --- a/include/simdjson/generic/ondemand/document-inl.h +++ b/include/simdjson/generic/ondemand/document-inl.h @@ -125,6 +125,22 @@ simdjson_really_inline simdjson_result document::operator[](const char *k return resume_value()[key]; } +simdjson_really_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_really_inline simdjson_result document::raw_json() noexcept { + printf("document::raw_json()\n"); + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter.peek(0)}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + simdjson_really_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } diff --git a/include/simdjson/generic/ondemand/document.h b/include/simdjson/generic/ondemand/document.h index 4e4a0731..75805b5d 100644 --- a/include/simdjson/generic/ondemand/document.h +++ b/include/simdjson/generic/ondemand/document.h @@ -351,8 +351,18 @@ public: * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; - + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containg + * the JSON document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; protected: + /** + * Consumes the document. + */ + simdjson_really_inline error_code consume() noexcept; + simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; diff --git a/include/simdjson/generic/ondemand/json_iterator-inl.h b/include/simdjson/generic/ondemand/json_iterator-inl.h index f0a1cac4..ad01f9c2 100644 --- a/include/simdjson/generic/ondemand/json_iterator-inl.h +++ b/include/simdjson/generic/ondemand/json_iterator-inl.h @@ -43,9 +43,19 @@ inline void json_iterator::rewind() noexcept { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + /*** + * WARNING: + * Inside an object, a string value is a depth of +1 compared to the object. Yet a key + * is at the same depth as the object. + * But json_iterator cannot easily tell whether we are pointing at a key or a string value. + * Instead, it assumes that if you are pointing at a string, then it is a value, not a key. + * To be clear... + * the following code assumes that we are *not* pointing at a key. If we are then a bug + * will follow. Unfortunately, it is not possible for the json_iterator its to make this + * check. + */ if (depth() <= parent_depth) { return SUCCESS; } - - switch (*advance()) { + switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are @@ -92,7 +102,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child // Now that we've considered the first value, we only increment/decrement for arrays/objects auto end = &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; while (token.index <= end) { - switch (*advance()) { + switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); _depth++; @@ -157,8 +167,8 @@ simdjson_really_inline void json_iterator::abandon() noexcept { _depth = 0; } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return token.advance(); +simdjson_really_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { + return token.return_current_and_advance(); } simdjson_really_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { @@ -272,7 +282,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::peek_to_buffer(u template simdjson_warn_unused simdjson_really_inline bool json_iterator::advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept { auto max_len = peek_length(); - auto json = advance(); + auto json = return_current_and_advance(); return copy_to_buffer(json, max_len, tmpbuf); } diff --git a/include/simdjson/generic/ondemand/json_iterator.h b/include/simdjson/generic/ondemand/json_iterator.h index 16d9e5b1..257609af 100644 --- a/include/simdjson/generic/ondemand/json_iterator.h +++ b/include/simdjson/generic/ondemand/json_iterator.h @@ -89,7 +89,7 @@ public: /** * Advance the current token. */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; /** * Get the JSON text for a given token (relative). diff --git a/include/simdjson/generic/ondemand/object-inl.h b/include/simdjson/generic/ondemand/object-inl.h index 9a86cb71..319b744f 100644 --- a/include/simdjson/generic/ondemand/object-inl.h +++ b/include/simdjson/generic/ondemand/object-inl.h @@ -45,6 +45,40 @@ simdjson_really_inline simdjson_result object::start_root(value_iterator SIMDJSON_TRY( iter.start_root_object().get(has_value) ); return object(iter); } +simdjson_really_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_really_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek(0)}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + simdjson_really_inline object object::started(value_iterator &iter) noexcept { simdjson_unused bool has_value = iter.started_object(); return iter; diff --git a/include/simdjson/generic/ondemand/object.h b/include/simdjson/generic/ondemand/object.h index 8723821d..08e34efe 100644 --- a/include/simdjson/generic/ondemand/object.h +++ b/include/simdjson/generic/ondemand/object.h @@ -110,7 +110,18 @@ public: */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containg + * the JSON document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; + protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_really_inline error_code consume() noexcept; static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_really_inline object started(value_iterator &iter) noexcept; diff --git a/include/simdjson/generic/ondemand/serialization-inl.h b/include/simdjson/generic/ondemand/serialization-inl.h index 9a9d6d67..dae088b1 100644 --- a/include/simdjson/generic/ondemand/serialization-inl.h +++ b/include/simdjson/generic/ondemand/serialization-inl.h @@ -1,184 +1,196 @@ namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - - -template -inline simdjson::error_code string_builder::append(document& element) noexcept { - json_type t; - auto e = element.type().get(t); - if(e != simdjson::SUCCESS) { return e; } - switch (t) { - case ondemand::json_type::array: - { - array x; - simdjson::error_code error = element.get_array().get(x); - if(error == simdjson::SUCCESS) { - append(x); - } - return error; - } - case ondemand::json_type::object: - { - object x; - simdjson::error_code error = element.get_object().get(x); - if(error == simdjson::SUCCESS) { - append(x); - } - return error; - } - case ondemand::json_type::number: - // Assume it fits in a double. We do not detect integer types. This could be improved. - { - double x; - simdjson::error_code error = element.get_double().get(x); - if(error == simdjson::SUCCESS) { - format.number(x); - } - return error; - } - case ondemand::json_type::string: - { - std::string_view x; - simdjson::error_code error = element.get_string().get(x); - if(error == simdjson::SUCCESS) { - format.string(x); - } - return error; - } - case ondemand::json_type::boolean: - { - bool x; - simdjson::error_code error = element.get_bool().get(x); - if(error == simdjson::SUCCESS) { - x ? format.true_atom() : format.false_atom(); - } - return error; - } - case ondemand::json_type::null: - format.null_atom(); - return simdjson::SUCCESS; - } - return simdjson::INCORRECT_TYPE; +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -template -inline simdjson::error_code string_builder::append(value element) noexcept { - json_type t; - auto e = element.type().get(t); - if(e != simdjson::SUCCESS) { return e; } - switch (t) { - case ondemand::json_type::array: - { - array x; - simdjson::error_code error = element.get_array().get(x); - if(error == simdjson::SUCCESS) { - append(x); - } - return error; - } - case ondemand::json_type::object: - { - object x; - simdjson::error_code error = element.get_object().get(x); - if(error == simdjson::SUCCESS) { - append(x); - } - return error; - } - case ondemand::json_type::number: - // Assume it fits in a double. We do not detect integer types. This could be improved. - { - double x; - simdjson::error_code error = element.get_double().get(x); - if(error == simdjson::SUCCESS) { - format.number(x); - } - return error; - } - case ondemand::json_type::string: - { - std::string_view x; - simdjson::error_code error = element.get_string().get(x); - if(error == simdjson::SUCCESS) { - format.string(x); - } - return error; - } - break; - case ondemand::json_type::boolean: - { - bool x; - simdjson::error_code error = element.get_bool().get(x); - if(error == simdjson::SUCCESS) { - x ? format.true_atom() : format.false_atom(); - } - return error; - } - case ondemand::json_type::null: - format.null_atom(); - return simdjson::SUCCESS; - } - return simdjson::INCORRECT_TYPE; -} -template -inline simdjson::error_code string_builder::append(simdjson::SIMDJSON_IMPLEMENTATION::ondemand::field x) noexcept { - // Performance note: There is a sizeable performance opportunity here to avoid unescaping - // and the re-escaping the key!!!! +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::document& x) noexcept { std::string_view v; - auto error = x.unescaped_key().get(v); - if (error) { return error; } - format.key(v); - return append(x.value()); + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } - -template -inline simdjson::error_code string_builder::append(simdjson::SIMDJSON_IMPLEMENTATION::ondemand::array x) noexcept { - format.start_array(); - bool first{true}; - for(simdjson::simdjson_result v: x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::value element; - simdjson::error_code error = std::move(v).get(element); - if(error != simdjson::SUCCESS) { return error; } - if(first) { first = false; } else { format.comma(); }; - error = append(element); - if(error != simdjson::SUCCESS) { return error; } +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace SIMDJSON_IMPLEMENTATION::ondemand; + SIMDJSON_IMPLEMENTATION::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + SIMDJSON_IMPLEMENTATION::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + SIMDJSON_IMPLEMENTATION::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - format.end_array(); - return simdjson::SUCCESS; } -template -inline simdjson::error_code string_builder::append(simdjson::SIMDJSON_IMPLEMENTATION::ondemand::object x) noexcept { - format.start_object(); - bool first{true}; - for(simdjson::simdjson_result r: x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::field element; - simdjson::error_code error = std::move(r).get(element); - if(error != simdjson::SUCCESS) { return error; } - if(first) { first = false; } else { format.comma(); }; - error = append(element); - if(error != simdjson::SUCCESS) { return error; } - } - format.end_object(); - return simdjson::SUCCESS; +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -template -simdjson_really_inline void string_builder::clear() { - format.clear(); +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -template -simdjson_really_inline std::string_view string_builder::str() const { - return format.str(); +#if SIMDJSON_EXCEPTIONS + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value()); } -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value()); +} +#endif } // namespace simdjson + + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif diff --git a/include/simdjson/generic/ondemand/serialization.h b/include/simdjson/generic/ondemand/serialization.h index 1e17782e..bf33891a 100644 --- a/include/simdjson/generic/ondemand/serialization.h +++ b/include/simdjson/generic/ondemand/serialization.h @@ -2,37 +2,35 @@ #include "simdjson/error.h" namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. + */ +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_IMPLEMENTATION::ondemand::array& x) noexcept; +#if SIMDJSON_EXCEPTIONS +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +#endif +} // namespace simdjson -template -class string_builder { -public: - /** Append an document to the builder (to be printed), numbers are - * assumed to be 64-bit floating-point numbers. - **/ - inline simdjson::error_code append(document& value) noexcept; - /** Append an element to the builder (to be printed) **/ - inline simdjson::error_code append(value element) noexcept; - /** Append an array to the builder (to be printed) **/ - inline simdjson::error_code append(array value) noexcept; - /** Append an object to the builder (to be printed) **/ - inline simdjson::error_code append(object value) noexcept; - /** Append a field to the builder (to be printed) **/ - inline simdjson::error_code append(field value) noexcept; - /** Reset the builder (so that it would print the empty string) **/ - simdjson_really_inline void clear(); - /** - * Get access to the string. The string_view is owned by the builder - * and it is invalid to use it after the string_builder has been - * destroyed. - * However you can make a copy of the string_view on memory that you - * own. - */ - simdjson_really_inline std::string_view str() const; -private: - formatter format{}; -}; /** * Print JSON to an output stream. @@ -41,30 +39,9 @@ private: * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::value x); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, value x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto err = sb.append(x); - if(err == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - throw simdjson::simdjson_error(err); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, value x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(x); - if(error == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - return (out << error); - } -} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. @@ -73,30 +50,9 @@ inline std::ostream& operator<<(std::ostream& out, value x) { * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::array value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, array value) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto err = sb.append(value); - if(err == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - throw simdjson::simdjson_error(err); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, array value) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(value); - if(error == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - return (out << error); - } -} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. @@ -105,30 +61,9 @@ inline std::ostream& operator<<(std::ostream& out, array value) { * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::document& value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, document& value) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto err = sb.append(value); - if(err == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - throw simdjson::simdjson_error(err); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, document& value) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(value); - if(error == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - return (out << error); - } -} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. @@ -137,90 +72,7 @@ inline std::ostream& operator<<(std::ostream& out, document& value) { * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_IMPLEMENTATION::ondemand::object value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, object value) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto err = sb.append(value); - if(err == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - throw simdjson::simdjson_error(err); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, object value) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(value); - if(error == simdjson::SUCCESS) { - return (out << sb.str()); - } else { - return (out << error); - } -} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -inline simdjson::simdjson_result to_string(simdjson::SIMDJSON_IMPLEMENTATION::ondemand::document& x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(x); - if(error != simdjson::SUCCESS) { return error; } - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); -} - -inline simdjson::simdjson_result to_string(simdjson::SIMDJSON_IMPLEMENTATION::ondemand::value& x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(x); - if(error != simdjson::SUCCESS) { return error; } - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); -} - -inline simdjson::simdjson_result to_string(simdjson::SIMDJSON_IMPLEMENTATION::ondemand::object& x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(x); - if(error != simdjson::SUCCESS) { return error; } - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); -} - -inline simdjson::simdjson_result to_string(simdjson::SIMDJSON_IMPLEMENTATION::ondemand::array& x) { - simdjson::SIMDJSON_IMPLEMENTATION::ondemand::string_builder<> sb; - auto error = sb.append(x); - if(error != simdjson::SUCCESS) { return error; } - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); -} - -#if SIMDJSON_EXCEPTIONS - -inline std::string to_string(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); -} - -inline std::string to_string(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); -} - -inline std::string to_string(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); -} - -inline std::string to_string(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); -} -#endif -} // namespace simdjson diff --git a/include/simdjson/generic/ondemand/token_iterator-inl.h b/include/simdjson/generic/ondemand/token_iterator-inl.h index 034ab9e8..7ef82359 100644 --- a/include/simdjson/generic/ondemand/token_iterator-inl.h +++ b/include/simdjson/generic/ondemand/token_iterator-inl.h @@ -12,7 +12,7 @@ simdjson_really_inline uint32_t token_iterator::current_offset() const noexcept } -simdjson_really_inline const uint8_t *token_iterator::advance() noexcept { +simdjson_really_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(index++)]; } diff --git a/include/simdjson/generic/ondemand/token_iterator.h b/include/simdjson/generic/ondemand/token_iterator.h index 042a762d..dafc1d75 100644 --- a/include/simdjson/generic/ondemand/token_iterator.h +++ b/include/simdjson/generic/ondemand/token_iterator.h @@ -26,7 +26,7 @@ public: * * Does not check or update depth/expect_value. Caller is responsible for that. */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ diff --git a/include/simdjson/generic/ondemand/value_iterator-inl.h b/include/simdjson/generic/ondemand/value_iterator-inl.h index 04c43ee6..ed2b22fe 100644 --- a/include/simdjson/generic/ondemand/value_iterator-inl.h +++ b/include/simdjson/generic/ondemand/value_iterator-inl.h @@ -30,7 +30,7 @@ simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object( #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); - _json_iter->advance(); + _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); return false; } @@ -41,7 +41,7 @@ simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object( simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); - switch (*_json_iter->advance()) { + switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); _json_iter->ascend_to(depth()-1); @@ -340,7 +340,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); - const uint8_t *key = _json_iter->advance(); + const uint8_t *key = _json_iter->return_current_and_advance(); if (*(key++) != '"') { return _json_iter->report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } @@ -348,7 +348,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result val simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { assert_at_next(); - if (*_json_iter->advance() != ':') { return _json_iter->report_error(TAPE_ERROR, "Missing colon in object field"); } + if (*_json_iter->return_current_and_advance() != ':') { return _json_iter->report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } @@ -378,7 +378,7 @@ simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); - _json_iter->advance(); + _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); return false; } @@ -393,7 +393,7 @@ simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); - switch (*_json_iter->advance()) { + switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); _json_iter->ascend_to(depth()-1); @@ -554,7 +554,7 @@ simdjson_really_inline const uint8_t *value_iterator::advance_start(const char * // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); - auto result = _json_iter->advance(); + auto result = _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); return result; } @@ -572,7 +572,7 @@ simdjson_really_inline error_code value_iterator::advance_container_start(const // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); - json = _json_iter->advance(); + json = _json_iter->return_current_and_advance(); return SUCCESS; } simdjson_really_inline const uint8_t *value_iterator::advance_root_scalar(const char *type) const noexcept { @@ -580,7 +580,7 @@ simdjson_really_inline const uint8_t *value_iterator::advance_root_scalar(const if (!is_at_start()) { return peek_start(); } assert_at_root(); - auto result = _json_iter->advance(); + auto result = _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); return result; } @@ -589,7 +589,7 @@ simdjson_really_inline const uint8_t *value_iterator::advance_non_root_scalar(co if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); - auto result = _json_iter->advance(); + auto result = _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); return result; } @@ -602,6 +602,14 @@ simdjson_really_inline error_code value_iterator::incorrect_type_error(const cha simdjson_really_inline bool value_iterator::is_at_start() const noexcept { return _json_iter->token.index == _start_position; } + +simdjson_really_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = _json_iter->token.index - _start_position; diff --git a/include/simdjson/generic/ondemand/value_iterator.h b/include/simdjson/generic/ondemand/value_iterator.h index 7d3d7253..558258dc 100644 --- a/include/simdjson/generic/ondemand/value_iterator.h +++ b/include/simdjson/generic/ondemand/value_iterator.h @@ -313,7 +313,6 @@ protected: simdjson_really_inline bool parse_null(const uint8_t *json) const noexcept; simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; - simdjson_really_inline const uint8_t *peek_start() const noexcept; simdjson_really_inline uint32_t peek_start_length() const noexcept; simdjson_really_inline const uint8_t *advance_start(const char *type) const noexcept; @@ -331,6 +330,16 @@ protected: * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ simdjson_really_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_really_inline bool is_at_key() const noexcept; + inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; inline void assert_at_root() const noexcept; diff --git a/tests/ondemand/ondemand_json_pointer_tests.cpp b/tests/ondemand/ondemand_json_pointer_tests.cpp index 6f623600..5aa3d696 100644 --- a/tests/ondemand/ondemand_json_pointer_tests.cpp +++ b/tests/ondemand/ondemand_json_pointer_tests.cpp @@ -43,10 +43,10 @@ namespace json_pointer_tests { ondemand::parser parser; ondemand::document doc; ondemand::value val; - std::string actual; + std::string_view actual; ASSERT_SUCCESS(parser.iterate(json).get(doc)); ASSERT_SUCCESS(doc.at_pointer(json_pointer).get(val)); - ASSERT_SUCCESS(simdjson::to_string(val).get(actual)); + ASSERT_SUCCESS(simdjson::to_json_string(val).get(actual)); ASSERT_EQUAL(actual,expected); TEST_SUCCEED(); } @@ -275,8 +275,19 @@ namespace json_pointer_tests { json_pointer_invalidation() && demo_test() && demo_relative_path() && - run_success_test(TEST_RFC_JSON,"",R"({"foo":["bar","baz"],"":0,"a/b":1,"c%d":2,"e^f":3,"g|h":4,"i\\j":5,"k\"l":6," ":7,"m~n":8})") && - run_success_test(TEST_RFC_JSON,"/foo",R"(["bar","baz"])") && + run_success_test(TEST_RFC_JSON,"",R"({ + "foo": ["bar", "baz"], + "": 0, + "a/b": 1, + "c%d": 2, + "e^f": 3, + "g|h": 4, + "i\\j": 5, + "k\"l": 6, + " ": 7, + "m~n": 8 + })") && + run_success_test(TEST_RFC_JSON,"/foo",R"(["bar", "baz"])") && run_success_test(TEST_RFC_JSON,"/foo/0",R"("bar")") && run_success_test(TEST_RFC_JSON,"/",R"(0)") && run_success_test(TEST_RFC_JSON,"/a~1b",R"(1)") && @@ -287,16 +298,60 @@ namespace json_pointer_tests { run_success_test(TEST_RFC_JSON,R"(/k\"l)",R"(6)") && run_success_test(TEST_RFC_JSON,"/ ",R"(7)") && run_success_test(TEST_RFC_JSON,"/m~0n",R"(8)") && - run_success_test(TEST_JSON, "", R"({"/~01abc":[0,{"\\\" 0":["value0","value1"]}],"0":"0 ok","01":"01 ok","":"empty ok","arr":[]})") && - run_success_test(TEST_JSON, R"(/~1~001abc)", R"([0,{"\\\" 0":["value0","value1"]}])") && - run_success_test(TEST_JSON, R"(/~1~001abc/1)", R"({"\\\" 0":["value0","value1"]})") && - run_success_test(TEST_JSON, R"(/~1~001abc/1/\\\" 0)", R"(["value0","value1"])") && + run_success_test(TEST_JSON, "", R"({ + "/~01abc": [ + 0, + { + "\\\" 0": [ + "value0", + "value1" + ] + } + ], + "0": "0 ok", + "01": "01 ok", + "": "empty ok", + "arr": [] + })") && + run_success_test(TEST_JSON, R"(/~1~001abc)", R"([ + 0, + { + "\\\" 0": [ + "value0", + "value1" + ] + } + ])") && + run_success_test(TEST_JSON, R"(/~1~001abc/1)", R"({ + "\\\" 0": [ + "value0", + "value1" + ] + })") && + run_success_test(TEST_JSON, R"(/~1~001abc/1/\\\" 0)", R"([ + "value0", + "value1" + ])") && run_success_test(TEST_JSON, R"(/~1~001abc/1/\\\" 0/0)", "\"value0\"") && run_success_test(TEST_JSON, R"(/~1~001abc/1/\\\" 0/1)", "\"value1\"") && run_success_test(TEST_JSON, "/arr", R"([])") && run_success_test(TEST_JSON, "/0", "\"0 ok\"") && run_success_test(TEST_JSON, "/01", "\"01 ok\"") && - run_success_test(TEST_JSON, "", R"({"/~01abc":[0,{"\\\" 0":["value0","value1"]}],"0":"0 ok","01":"01 ok","":"empty ok","arr":[]})") && + run_success_test(TEST_JSON, "", R"({ + "/~01abc": [ + 0, + { + "\\\" 0": [ + "value0", + "value1" + ] + } + ], + "0": "0 ok", + "01": "01 ok", + "": "empty ok", + "arr": [] + })") && run_failure_test(TEST_JSON, R"(/~1~001abc/1/\\\" 0/2)", INDEX_OUT_OF_BOUNDS) && run_failure_test(TEST_JSON, "/arr/0", INDEX_OUT_OF_BOUNDS) && run_failure_test(TEST_JSON, "~1~001abc", INVALID_JSON_POINTER) && diff --git a/tests/ondemand/ondemand_parse_api_tests.cpp b/tests/ondemand/ondemand_parse_api_tests.cpp index 5a157c8c..d8a4570e 100644 --- a/tests/ondemand/ondemand_parse_api_tests.cpp +++ b/tests/ondemand/ondemand_parse_api_tests.cpp @@ -173,14 +173,14 @@ namespace parse_api_tests { auto json = R"({"key": "value"})"_padded; auto jsonbad = R"({"key": "value")"_padded; // deliberaty broken auto jsonunclosedstring = "{\"coordinates:[{\"x\":1.1,\"y\":2.2,\"z\":3.3}]}"_padded; - std::string output; + std::string_view output; ondemand::parser parser; std::cout << "correct document (1)" << std::endl; ASSERT_SUCCESS( parser.iterate(json).get(doc) ); - ASSERT_SUCCESS(simdjson::to_string(doc).get(output)); + ASSERT_SUCCESS(simdjson::to_json_string(doc).get(output)); std::cout << output << std::endl; std::cout << "correct document (2)" << std::endl; @@ -200,10 +200,8 @@ namespace parse_api_tests { } std::cout << "truncated document " << std::endl; - ASSERT_SUCCESS( parser.iterate(jsonbad).get(doc) ); - - ASSERT_EQUAL( simdjson::to_string(doc).get(output), TAPE_ERROR ); + ASSERT_EQUAL( simdjson::to_json_string(doc).get(output), TAPE_ERROR ); std::cout << "correct document with new doc" << std::endl; ondemand::document doc2; @@ -222,7 +220,7 @@ namespace parse_api_tests { std::cout << "unclosed string document " << std::endl; ASSERT_SUCCESS( parser.iterate(jsonbad).get(doc) ); - ASSERT_EQUAL( simdjson::to_string(doc).get(output), TAPE_ERROR ); + ASSERT_EQUAL( simdjson::to_json_string(doc).get(output), TAPE_ERROR ); // next two lines are terrible code. doc.~document(); @@ -232,7 +230,7 @@ namespace parse_api_tests { std::cout << "correct document (4)" << std::endl; ASSERT_SUCCESS( parser.iterate(json).get(doc) ); - ASSERT_SUCCESS( simdjson::to_string(doc).get(output) ); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(output) ); std::cout << output << std::endl; std::cout << "unclosed string document " << std::endl; @@ -253,7 +251,7 @@ namespace parse_api_tests { std::cout << "correct document (5)" << std::endl; ASSERT_SUCCESS( parser.iterate(json).get(doc) ); - ASSERT_SUCCESS( simdjson::to_string(doc).get(output) ); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(output) ); std::cout << output << std::endl; TEST_SUCCEED(); diff --git a/tests/ondemand/ondemand_readme_examples.cpp b/tests/ondemand/ondemand_readme_examples.cpp index e200b7c1..43604508 100644 --- a/tests/ondemand/ondemand_readme_examples.cpp +++ b/tests/ondemand/ondemand_readme_examples.cpp @@ -117,7 +117,7 @@ bool json_array_count_complex() { std::cout << "Number of elements: " << count << std::endl; size_t c = 0; for(ondemand::object elem : test_array) { - std::cout << simdjson::to_string(elem); + std::cout << simdjson::to_json_string(elem); c++; } std::cout << std::endl; diff --git a/tests/ondemand/ondemand_tostring_tests.cpp b/tests/ondemand/ondemand_tostring_tests.cpp index 97ab66c7..23d8e1db 100644 --- a/tests/ondemand/ondemand_tostring_tests.cpp +++ b/tests/ondemand/ondemand_tostring_tests.cpp @@ -24,11 +24,12 @@ const char *test_files[] = { #if SIMDJSON_EXCEPTIONS bool issue1607() { TEST_START(); - auto cars_json = R"( { "test": "result" } )"_padded; + auto silly_json = R"( { "test": "result" } )"_padded; ondemand::parser parser; - ondemand::document doc = parser.iterate(cars_json); - std::string expected = R"("result")"; - std::string result = simdjson::to_string(doc["test"]); + ondemand::document doc = parser.iterate(silly_json); + std::string_view expected = R"("result")"; + std::string_view result = simdjson::to_json_string(doc["test"]); + std::cout << "'"<< result << "'" << std::endl; ASSERT_EQUAL(result, expected); TEST_SUCCEED(); } @@ -36,22 +37,53 @@ bool issue1607() { bool minify_demo() { TEST_START(); ondemand::parser parser; - auto cars_json = R"( { "test": "result" } )"_padded; + auto silly_json = R"( { "test": "result" } )"_padded; ondemand::document doc; - ASSERT_SUCCESS( parser.iterate(cars_json).get(doc) ); - std::cout << simdjson::to_string(doc["test"]) << std::endl; + ASSERT_SUCCESS( parser.iterate(silly_json).get(doc) ); + std::cout << simdjson::to_json_string(doc["test"]) << std::endl; TEST_SUCCEED(); } bool minify_demo2() { TEST_START(); ondemand::parser parser; - auto cars_json = R"( { "test": "result" } )"_padded; + auto silly_json = R"( { "test": "result" } )"_padded; ondemand::document doc; - ASSERT_SUCCESS( parser.iterate(cars_json).get(doc) ); + ASSERT_SUCCESS( parser.iterate(silly_json).get(doc) ); std::cout << std::string_view(doc["test"]) << std::endl; TEST_SUCCEED(); } +bool car_example() { + TEST_START(); + auto cars_json = R"( [ + { "make": "Toyota", "model": "Camry", "year": 2018, "tire_pressure": [ 40.1, 39.9, 37.7, 40.4 ] }, + { "make": "Kia", "model": "Soul", "year": 2012, "tire_pressure": [ 30.1, 31.0, 28.6, 28.7 ] }, + { "make": "Toyota", "model": "Tercel", "year": 1999, "tire_pressure": [ 29.8, 30.0, 30.2, 30.5 ] } + ] )"_padded; + std::vector arrays; + // We are going to collect string_view instances which point inside the `cars_json` string + // and are therefore valid as long as `cars_json` remains in scope. + { + ondemand::parser parser; + for (ondemand::object car : parser.iterate(cars_json)) { + if(uint64_t(car["year"]) > 2000) { // Pick the recent cars only! + arrays.push_back(simdjson::to_json_string(car["tire_pressure"])); + } + } + } + // We can now convert to a JSON string: + std::ostringstream oss; + oss << "["; + for(size_t i = 0; i < arrays.size(); i++) { + if(i>0) { oss << ","; } + oss << arrays[i]; + } + oss << "]"; + auto json_string = oss.str(); + ASSERT_EQUAL(json_string, "[[ 40.1, 39.9, 37.7, 40.4 ],[ 30.1, 31.0, 28.6, 28.7 ]]"); + TEST_SUCCEED(); +} + /** * The general idea of these tests if that if you take a JSON file, @@ -72,25 +104,26 @@ bool load_to_string(const char *filename) { } std::cout << "file loaded: " << docdata.size() << " bytes." << std::endl; simdjson::ondemand::document doc; - error = parser.iterate(docdata).get(doc); + auto silly_json = R"( { "test": "result" } )"_padded; + + error = parser.iterate(silly_json).get(doc); if (error) { std::cerr << error << std::endl; return false; } std::cout << "serializing once." << std::endl; - std::string serial1 = simdjson::to_string(doc); - serial1.reserve(serial1.size() + simdjson::SIMDJSON_PADDING); - error = parser.iterate(serial1).get(doc); + std::string_view serial1 = simdjson::to_json_string(doc); + error = parser.iterate(serial1, serial1.size() + simdjson::SIMDJSON_PADDING).get(doc); if (error) { std::cerr << error << std::endl; return false; } std::cout << "serializing twice." << std::endl; - std::string serial2 = simdjson::to_string(doc); + std::string_view serial2 = simdjson::to_json_string(doc); bool match = (serial1 == serial2); if (match) { std::cout << "Parsing to_string and calling to_string again results in the " - "same content." + "same content. " << "Got " << serial1.size() << " bytes." << std::endl; } return match; @@ -126,21 +159,20 @@ bool load_to_string_exceptionless(const char *filename) { return false; } std::cout << "serializing once." << std::endl; - std::string serial1; - error = simdjson::to_string(doc).get(serial1); + std::string_view serial1; + error = simdjson::to_json_string(doc).get(serial1); if (error) { std::cerr << error << std::endl; return false; } - serial1.reserve(serial1.size() + simdjson::SIMDJSON_PADDING); - error = parser.iterate(serial1).get(doc); + error = parser.iterate(serial1, serial1.size() + simdjson::SIMDJSON_PADDING).get(doc); if (error) { std::cerr << error << std::endl; return false; } std::cout << "serializing twice." << std::endl; - std::string serial2; - error = simdjson::to_string(doc).get(serial2); + std::string_view serial2; + error = simdjson::to_json_string(doc).get(serial2); if (error) { std::cerr << error << std::endl; return false; @@ -148,7 +180,7 @@ bool load_to_string_exceptionless(const char *filename) { bool match = (serial1 == serial2); if (match) { std::cout << "Parsing to_string and calling to_string again results in the " - "same content." + "same content. " << "Got " << serial1.size() << " bytes." << std::endl; } return match; @@ -165,13 +197,240 @@ bool minify_exceptionless_test() { return true; } +bool empty_object() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"({})"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc)); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"({})"); + TEST_SUCCEED(); +} + +bool empty_array() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"([])"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc)); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"([])"); + TEST_SUCCEED(); +} + +bool single_digit_document() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"(9)"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"(9)"); + TEST_SUCCEED(); +} + +bool single_string_document() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"("")"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"("")"); + TEST_SUCCEED(); +} + +bool at_start_array() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"( [111,2,3,5] )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::array array; + ASSERT_SUCCESS( doc.get_array().get(array) ); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(array).get(serial)); + ASSERT_EQUAL(serial, "[111,2,3,5]"); + TEST_SUCCEED(); +} + + +bool at_start_object() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"( {"a":1, "b":2, "c": 3 } )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::object object; + ASSERT_SUCCESS( doc.get_object().get(object) ); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(object).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + ASSERT_SUCCESS( simdjson::to_json_string(object).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + TEST_SUCCEED(); +} + +bool in_middle_array() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"( [111,{"a":1},3,5] )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::array array; + ASSERT_SUCCESS( doc.get_array().get(array) ); + auto i = array.begin(); + int64_t x; + ASSERT_SUCCESS( (*i).get_int64().get(x) ); + ASSERT_EQUAL(x, 111); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(array).get(serial)); + ASSERT_EQUAL(serial, "[111,{\"a\":1},3,5]"); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, "[111,{\"a\":1},3,5]"); + TEST_SUCCEED(); +} + +bool at_middle_object() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"( {"a":1, "b":2, "c": 3 } )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::object object; + ASSERT_SUCCESS( doc.get_object().get(object) ); + int64_t x; + ASSERT_SUCCESS(object["b"].get_int64().get(x)); + ASSERT_EQUAL(x,2); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(object).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + TEST_SUCCEED(); +} + +bool at_middle_object_just_key() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"( {"a":1, "b":2, "c": 3 } )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::object object; + ASSERT_SUCCESS( doc.get_object().get(object) ); + ondemand::value x; + ASSERT_SUCCESS(object["b"].get(x)); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(object).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + TEST_SUCCEED(); +} + + +bool at_end_object() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"( {"a":1, "b":2, "c": 3 } )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::object object; + ASSERT_SUCCESS( doc.get_object().get(object) ); + int64_t x; + ASSERT_ERROR(object["bcc"].get_int64().get(x), NO_SUCH_FIELD); + std::string_view serial; + ASSERT_SUCCESS( simdjson::to_json_string(object).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"({"a":1, "b":2, "c": 3 })"); + TEST_SUCCEED(); +} + +bool at_array_end() { + TEST_START(); + ondemand::parser parser; + std::string_view serial; + auto arr_json = R"( [111,2,3,5] )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::array array; + ASSERT_SUCCESS( doc.get_array().get(array) ); + auto i = array.begin(); + int64_t x; + ASSERT_SUCCESS( (*i).get_int64().get(x) ); + ASSERT_EQUAL(x, 111); + ++i; + ASSERT_SUCCESS( (*i).get_int64().get(x) ); + ASSERT_EQUAL(x, 2); + ++i; + ASSERT_SUCCESS( (*i).get_int64().get(x) ); + ASSERT_EQUAL(x, 3); + ++i; + ASSERT_SUCCESS( (*i).get_int64().get(x) ); + ASSERT_EQUAL(x, 5); + ASSERT_SUCCESS( simdjson::to_json_string(array).get(serial)); + ASSERT_EQUAL(serial, "[111,2,3,5]"); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, "[111,2,3,5]"); + TEST_SUCCEED(); +} + +bool complex_case() { + TEST_START(); + ondemand::parser parser; + auto arr_json = R"( {"array":[1,2,3], "objects":[{"id":1}, {"id":2}, {"id":3}]} )"_padded; + ondemand::document doc; + ASSERT_SUCCESS( parser.iterate(arr_json).get(doc) ); + ondemand::object obj; + ASSERT_SUCCESS( doc.get_object().get(obj) ); + ondemand::array array; + ASSERT_SUCCESS( obj["objects"].get_array().get(array) ); + std::string_view serial; + for(auto v : array) { + ondemand::object object; + ASSERT_SUCCESS( v.get_object().get(object) ); + int64_t x; + ASSERT_SUCCESS(object["id"].get_int64().get(x)); + if(x / 2 * 2 != x) { + ASSERT_SUCCESS( simdjson::to_json_string(object).get(serial)); + ASSERT_EQUAL(serial, "{\"id\":"+std::to_string(x)+"}"); + } + } + ASSERT_SUCCESS( simdjson::to_json_string(array).get(serial)); + ASSERT_EQUAL(serial, R"([{"id":1}, {"id":2}, {"id":3}])"); + ASSERT_SUCCESS( simdjson::to_json_string(obj).get(serial)); + ASSERT_EQUAL(serial, R"({"array":[1,2,3], "objects":[{"id":1}, {"id":2}, {"id":3}]})"); + ASSERT_SUCCESS( simdjson::to_json_string(doc).get(serial)); + ASSERT_EQUAL(serial, R"({"array":[1,2,3], "objects":[{"id":1}, {"id":2}, {"id":3}]})"); + TEST_SUCCEED(); +} + bool run() { return + empty_object() && + empty_array() && + single_digit_document() && + single_string_document() && + complex_case() && + at_start_object() && + at_middle_object() && + at_middle_object_just_key() && + at_end_object() && + at_start_array() && + in_middle_array() && + at_array_end() && #if SIMDJSON_EXCEPTIONS issue1607() && minify_demo() && minify_demo2() && minify_test() && + car_example() && #endif // SIMDJSON_EXCEPTIONS minify_exceptionless_test() && true; diff --git a/tests/test_macros.h b/tests/test_macros.h index 29a500cc..1e0a1ae7 100644 --- a/tests/test_macros.h +++ b/tests/test_macros.h @@ -56,7 +56,7 @@ simdjson_really_inline bool assert_success(const T &actual, const char *operatio template simdjson_really_inline bool assert_equal(const A &actual, const E &expected, const char *operation = "result") { if (!equals_expected(actual, expected)) { - std::cerr << "FAIL: " << operation << " returned " << actual << " (expected " << expected << ")" << std::flush; + std::cerr << "FAIL: " << operation << " returned '" << actual << "' (expected '" << expected << "')" << std::flush; std::cerr << std::endl; return false; }