diff --git a/include/simdjson/error.h b/include/simdjson/error.h index 87ff34d8..d56c230f 100644 --- a/include/simdjson/error.h +++ b/include/simdjson/error.h @@ -284,6 +284,13 @@ struct simdjson_result : public internal::simdjson_result_base { }; // struct simdjson_result +#if SIMDJSON_EXCEPTIONS + +template +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) noexcept { return out << value.value(); } + +#endif // SIMDJSON_EXCEPTIONS + #ifndef SIMDJSON_DISABLE_DEPRECATED_API /** * @deprecated This is an alias and will be removed, use error_code instead diff --git a/include/simdjson/generic/ondemand/document-inl.h b/include/simdjson/generic/ondemand/document-inl.h index d584c35a..fe56568f 100644 --- a/include/simdjson/generic/ondemand/document-inl.h +++ b/include/simdjson/generic/ondemand/document-inl.h @@ -119,6 +119,11 @@ simdjson_really_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } +simdjson_really_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson @@ -283,4 +288,9 @@ simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/document.h b/include/simdjson/generic/ondemand/document.h index 5f671a81..ba9581b2 100644 --- a/include/simdjson/generic/ondemand/document.h +++ b/include/simdjson/generic/ondemand/document.h @@ -274,6 +274,30 @@ public: */ simdjson_really_inline simdjson_result type() noexcept; + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; + protected: simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -350,6 +374,9 @@ public: simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_really_inline simdjson_result type() noexcept; + + /** @copydoc simdjson_really_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; }; } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/value-inl.h b/include/simdjson/generic/ondemand/value-inl.h index 60ba9038..59f3a178 100644 --- a/include/simdjson/generic/ondemand/value-inl.h +++ b/include/simdjson/generic/ondemand/value-inl.h @@ -121,6 +121,10 @@ simdjson_really_inline simdjson_result value::type() noexcept { return iter.type(); } +simdjson_really_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson @@ -274,4 +278,9 @@ simdjson_really_inline simdjson_result } #endif +simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/value.h b/include/simdjson/generic/ondemand/value.h index c092dccc..f8310954 100644 --- a/include/simdjson/generic/ondemand/value.h +++ b/include/simdjson/generic/ondemand/value.h @@ -273,6 +273,31 @@ public: */ simdjson_really_inline simdjson_result type() noexcept; + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_really_inline std::string_view raw_json_token() noexcept; + protected: /** * Create a value. @@ -416,6 +441,9 @@ public: * let it throw an exception). */ simdjson_really_inline simdjson_result type() noexcept; + + /** @copydoc simdjson_really_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; }; } // namespace simdjson diff --git a/tests/ondemand/CMakeLists.txt b/tests/ondemand/CMakeLists.txt index 49f34288..4244996d 100644 --- a/tests/ondemand/CMakeLists.txt +++ b/tests/ondemand/CMakeLists.txt @@ -9,6 +9,7 @@ add_cpp_test(ondemand_array_error_tests LABELS ondemand acceptance per_impl add_cpp_test(ondemand_compilation_tests LABELS ondemand acceptance per_implementation) add_cpp_test(ondemand_error_tests LABELS ondemand acceptance per_implementation) add_cpp_test(ondemand_key_string_tests LABELS ondemand acceptance per_implementation) +add_cpp_test(ondemand_misc_tests LABELS ondemand acceptance per_implementation) add_cpp_test(ondemand_number_tests LABELS ondemand acceptance per_implementation) add_cpp_test(ondemand_object_tests LABELS ondemand acceptance per_implementation) add_cpp_test(ondemand_object_error_tests LABELS ondemand acceptance per_implementation) diff --git a/tests/ondemand/ondemand_misc_tests.cpp b/tests/ondemand/ondemand_misc_tests.cpp new file mode 100644 index 00000000..6717caa5 --- /dev/null +++ b/tests/ondemand/ondemand_misc_tests.cpp @@ -0,0 +1,75 @@ +#include "simdjson.h" +#include "test_ondemand.h" + +using namespace simdjson; + +namespace misc_tests { + using namespace std; + + simdjson_warn_unused bool test_raw_json_token(string_view json, string_view expected_token, int expected_start_index = 0) { + string title = "'"; + title.append(json.data(), json.length()); + title += "'"; + padded_string json_padded = json; + SUBTEST(title, test_ondemand_doc(json_padded, [&](auto doc) { + string_view token; + ASSERT_SUCCESS( doc.raw_json_token().get(token) ); + ASSERT_EQUAL( token, expected_token ); + // Validate the text is inside the original buffer + ASSERT_EQUAL( reinterpret_cast(token.data()), reinterpret_cast(&json_padded.data()[expected_start_index])); + return true; + })); + + // Test values + auto json_in_hash = string(R"({"a":)"); + json_in_hash.append(json.data(), json.length()); + json_in_hash += "}"; + json_padded = json_in_hash; + title = "'"; + title.append(json_in_hash.data(), json_in_hash.length()); + title += "'"; + SUBTEST(title, test_ondemand_doc(json_padded, [&](auto doc) { + string_view token; + ASSERT_SUCCESS( doc["a"].raw_json_token().get(token) ); + ASSERT_EQUAL( token, expected_token ); + // Validate the text is inside the original buffer + // Adjust for the {"a": + ASSERT_EQUAL( reinterpret_cast(token.data()), reinterpret_cast(&json_padded.data()[5+expected_start_index])); + return true; + })); + + return true; + } + + bool raw_json_token() { + TEST_START(); + return + test_raw_json_token("{}", "{") && + test_raw_json_token("{ }", "{ ") && + test_raw_json_token("{ \n }", "{ \n ") && + test_raw_json_token(" \n { \n } \n ", "{ \n ", 3) && + test_raw_json_token("[]", "[") && + test_raw_json_token("1", "1") && + test_raw_json_token(" \n 1 \n ", "1 \n ", 3) && + test_raw_json_token("-123.456e-789", "-123.456e-789") && + test_raw_json_token(" \n -123.456e-789 \n ", "-123.456e-789 \n ", 3) && + test_raw_json_token("true", "true") && + test_raw_json_token("false", "false") && + test_raw_json_token("null", "null") && + test_raw_json_token("blah2", "blah2") && + test_raw_json_token("true false", "true ") && + test_raw_json_token("true \n false", "true \n ") && + true; + } + + bool run() { + return + raw_json_token() && + true; + } + +} // namespace twitter_tests + +int main(int argc, char *argv[]) { + return test_main(argc, argv, misc_tests::run); +}