Merge pull request #1479 from simdjson/jkeiser/raw_json_token

Add value.raw_json_token()
This commit is contained in:
John Keiser 2021-03-05 10:24:36 -08:00 committed by GitHub
commit 0948573e63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 157 additions and 0 deletions

View File

@ -284,6 +284,13 @@ struct simdjson_result : public internal::simdjson_result_base<T> {
}; // struct simdjson_result
#if SIMDJSON_EXCEPTIONS
template<typename T>
inline std::ostream& operator<<(std::ostream& out, simdjson_result<T> value) noexcept { return out << value.value(); }
#endif // SIMDJSON_EXCEPTIONS
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
/**
* @deprecated This is an alias and will be removed, use error_code instead

View File

@ -119,6 +119,11 @@ simdjson_really_inline simdjson_result<json_type> document::type() noexcept {
return get_root_value_iterator().type();
}
simdjson_really_inline simdjson_result<std::string_view> document::raw_json_token() noexcept {
auto _iter = get_root_value_iterator();
return std::string_view(reinterpret_cast<const char*>(_iter.peek_start()), _iter.peek_start_length());
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
@ -283,4 +288,9 @@ simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::docume
}
#endif
simdjson_really_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::raw_json_token() noexcept {
if (error()) { return error(); }
return first.raw_json_token();
}
} // namespace simdjson

View File

@ -274,6 +274,30 @@ public:
*/
simdjson_really_inline simdjson_result<json_type> type() noexcept;
/**
* Get the raw JSON for this token.
*
* The string_view will always point into the input buffer.
*
* The string_view will start at the beginning of the token, and include the entire token
* *as well as all spaces until the next token (or EOF).* This means, for example, that a
* string token always begins with a " and is always terminated by the final ", possibly
* followed by a number of spaces.
*
* The string_view is *not* null-terminated. If this is a scalar (string, number,
* boolean, or null), the character after the end of the string_view may be the padded buffer.
*
* Tokens include:
* - {
* - [
* - "a string (possibly with UTF-8 or backslashed characters like \\\")".
* - -1.2e-100
* - true
* - false
* - null
*/
simdjson_really_inline simdjson_result<std::string_view> raw_json_token() noexcept;
protected:
simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept;
simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept;
@ -350,6 +374,9 @@ public:
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> find_field_unordered(const char *key) & noexcept;
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_type> type() noexcept;
/** @copydoc simdjson_really_inline std::string_view document::raw_json_token() const noexcept */
simdjson_really_inline simdjson_result<std::string_view> raw_json_token() noexcept;
};
} // namespace simdjson

View File

@ -121,6 +121,10 @@ simdjson_really_inline simdjson_result<json_type> value::type() noexcept {
return iter.type();
}
simdjson_really_inline std::string_view value::raw_json_token() noexcept {
return std::string_view(reinterpret_cast<const char*>(iter.peek_start()), iter.peek_start_length());
}
} // namespace ondemand
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson
@ -274,4 +278,9 @@ simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>
}
#endif
simdjson_really_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::raw_json_token() noexcept {
if (error()) { return error(); }
return first.raw_json_token();
}
} // namespace simdjson

View File

@ -273,6 +273,31 @@ public:
*/
simdjson_really_inline simdjson_result<json_type> type() noexcept;
/**
* Get the raw JSON for this token.
*
* The string_view will always point into the input buffer.
*
* The string_view will start at the beginning of the token, and include the entire token
* *as well as all spaces until the next token (or EOF).* This means, for example, that a
* string token always begins with a " and is always terminated by the final ", possibly
* followed by a number of spaces.
*
* The string_view is *not* null-terminated. However, if this is a scalar (string, number,
* boolean, or null), the character after the end of the string_view is guaranteed to be
* a non-space token.
*
* Tokens include:
* - {
* - [
* - "a string (possibly with UTF-8 or backslashed characters like \\\")".
* - -1.2e-100
* - true
* - false
* - null
*/
simdjson_really_inline std::string_view raw_json_token() noexcept;
protected:
/**
* Create a value.
@ -416,6 +441,9 @@ public:
* let it throw an exception).
*/
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_type> type() noexcept;
/** @copydoc simdjson_really_inline std::string_view value::raw_json_token() const noexcept */
simdjson_really_inline simdjson_result<std::string_view> raw_json_token() noexcept;
};
} // namespace simdjson

View File

@ -9,6 +9,7 @@ add_cpp_test(ondemand_array_error_tests LABELS ondemand acceptance per_impl
add_cpp_test(ondemand_compilation_tests LABELS ondemand acceptance per_implementation)
add_cpp_test(ondemand_error_tests LABELS ondemand acceptance per_implementation)
add_cpp_test(ondemand_key_string_tests LABELS ondemand acceptance per_implementation)
add_cpp_test(ondemand_misc_tests LABELS ondemand acceptance per_implementation)
add_cpp_test(ondemand_number_tests LABELS ondemand acceptance per_implementation)
add_cpp_test(ondemand_object_tests LABELS ondemand acceptance per_implementation)
add_cpp_test(ondemand_object_error_tests LABELS ondemand acceptance per_implementation)

View File

@ -0,0 +1,75 @@
#include "simdjson.h"
#include "test_ondemand.h"
using namespace simdjson;
namespace misc_tests {
using namespace std;
simdjson_warn_unused bool test_raw_json_token(string_view json, string_view expected_token, int expected_start_index = 0) {
string title = "'";
title.append(json.data(), json.length());
title += "'";
padded_string json_padded = json;
SUBTEST(title, test_ondemand_doc(json_padded, [&](auto doc) {
string_view token;
ASSERT_SUCCESS( doc.raw_json_token().get(token) );
ASSERT_EQUAL( token, expected_token );
// Validate the text is inside the original buffer
ASSERT_EQUAL( reinterpret_cast<const void*>(token.data()), reinterpret_cast<const void*>(&json_padded.data()[expected_start_index]));
return true;
}));
// Test values
auto json_in_hash = string(R"({"a":)");
json_in_hash.append(json.data(), json.length());
json_in_hash += "}";
json_padded = json_in_hash;
title = "'";
title.append(json_in_hash.data(), json_in_hash.length());
title += "'";
SUBTEST(title, test_ondemand_doc(json_padded, [&](auto doc) {
string_view token;
ASSERT_SUCCESS( doc["a"].raw_json_token().get(token) );
ASSERT_EQUAL( token, expected_token );
// Validate the text is inside the original buffer
// Adjust for the {"a":
ASSERT_EQUAL( reinterpret_cast<const void*>(token.data()), reinterpret_cast<const void*>(&json_padded.data()[5+expected_start_index]));
return true;
}));
return true;
}
bool raw_json_token() {
TEST_START();
return
test_raw_json_token("{}", "{") &&
test_raw_json_token("{ }", "{ ") &&
test_raw_json_token("{ \n }", "{ \n ") &&
test_raw_json_token(" \n { \n } \n ", "{ \n ", 3) &&
test_raw_json_token("[]", "[") &&
test_raw_json_token("1", "1") &&
test_raw_json_token(" \n 1 \n ", "1 \n ", 3) &&
test_raw_json_token("-123.456e-789", "-123.456e-789") &&
test_raw_json_token(" \n -123.456e-789 \n ", "-123.456e-789 \n ", 3) &&
test_raw_json_token("true", "true") &&
test_raw_json_token("false", "false") &&
test_raw_json_token("null", "null") &&
test_raw_json_token("blah2", "blah2") &&
test_raw_json_token("true false", "true ") &&
test_raw_json_token("true \n false", "true \n ") &&
true;
}
bool run() {
return
raw_json_token() &&
true;
}
} // namespace twitter_tests
int main(int argc, char *argv[]) {
return test_main(argc, argv, misc_tests::run);
}