Documentation for issue 1562 (Accessing escaped key with on-demand API) (#1563)

* Documentation for issue 1562.

* Making exception-free.

* Improving wording.
This commit is contained in:
Daniel Lemire 2021-06-04 09:21:52 -04:00 committed by GitHub
parent d90714e8df
commit f44a53271d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 78 additions and 6 deletions

View File

@ -166,11 +166,26 @@ support for users who avoid exceptions. See [the simdjson error handling documen
> NOTE: simdjson does *not* unescape keys when matching. This is not generally a problem for > NOTE: simdjson does *not* unescape keys when matching. This is not generally a problem for
> applications with well-defined key names (which generally do not use escapes). If you do need this > applications with well-defined key names (which generally do not use escapes). If you do need this
> support, it's best to iterate through the object fields to find the field you are looking for. > support, it's best to iterate through the object fields to find the field you are looking for. The
> method `unescaped_key()` provides the desired unescaped keys by parsing and writing out the
> unescaped keys to a string buffer and returning a `std::string_view` instance. You should expect
> a performance penalty when using `unescaped_key()`.
> ```c++
> auto json = R"({"k\u0065y": 1})"_padded;
> ondemand::parser parser;
> auto doc = parser.iterate(json);
> ondemand::object object = doc.get_object();
> for(auto field : object) {
> // parses and writes out the key, after unescaping it,
> // to a string buffer.
> std::string_view keyv = field.unescaped_key();
> if(keyv == "key") { std::cout << uint64_t(field.value()); }
> }
> ```
> >
> By default, field lookup is order-insensitive, so you can look up values in any order. However, > By default, field lookup is order-insensitive, so you can look up values in any order. However,
> we still encourage you to look up fields in the order you expect them in the JSON, as it is still > we still encourage you to look up fields in the order you expect them in the JSON, as it is still
> much faster. > faster.
> >
> If you want to enforce finding fields in order, you can use `object.find_field("foo")` instead. > If you want to enforce finding fields in order, you can use `object.find_field("foo")` instead.
> This will only look forward, and will fail to find fields in the wrong order: for example, this > This will only look forward, and will fail to find fields in the wrong order: for example, this
@ -198,7 +213,7 @@ support for users who avoid exceptions. See [the simdjson error handling documen
If you know the type of the value, you can cast it right there, too! `for (double value : array) { ... }`. If you know the type of the value, you can cast it right there, too! `for (double value : array) { ... }`.
* **Object Iteration:** You can iterate through an object's fields, as well: `for (auto field : object) { ... }` * **Object Iteration:** You can iterate through an object's fields, as well: `for (auto field : object) { ... }`
- `field.unescaped_key()` will get you the key string. - `field.unescaped_key()` will get you the unescaped key string.
- `field.value()` will get you the value, which you can then use all these other methods on. - `field.value()` will get you the value, which you can then use all these other methods on.
* **Array Index:** Because it is forward-only, you cannot look up an array element by index. Instead, * **Array Index:** Because it is forward-only, you cannot look up an array element by index. Instead,
you will need to iterate through the array and keep an index yourself. you will need to iterate through the array and keep an index yourself.
@ -328,9 +343,9 @@ void recursive_print_json(T&& element) {
if (add_comma) { if (add_comma) {
cout << ","; cout << ",";
} }
// key() returns the unescaped key, if we // key() returns the key as it appears in the raw
// want the escaped key, we should do // JSON document, if we want the unescaped key,
// field.unescaped_key(). // we should do field.unescaped_key().
cout << "\"" << field.key() << "\": "; cout << "\"" << field.key() << "\": ";
recursive_print_json(field.value()); recursive_print_json(field.value());
add_comma = true; add_comma = true;

View File

@ -268,6 +268,25 @@ void implementation_selection_1() {
cout << "(" << simdjson::active_implementation->description() << ")" << endl; cout << "(" << simdjson::active_implementation->description() << ")" << endl;
} }
void unescaped_key() {
auto json = R"({"k\u0065y": 1})"_padded;
ondemand::parser parser;
auto doc = parser.iterate(json);
ondemand::object object = doc.get_object();
for(auto field : object) {
// parses and writes out the key, after unescaping it,
// to a string buffer. This should be expected to be much
// more costly than accessing the raw string ("key()").
std::string_view keyv = field.unescaped_key();
if(keyv == "key") {
std::cout << uint64_t(field.value());
}
// You can access the raw value like so:
// ondemand::raw_json_string keyv = field.key();
// if(keyv == R"(k\u0065y)") {
}
}
void implementation_selection_2() { void implementation_selection_2() {
for (auto implementation : simdjson::available_implementations) { for (auto implementation : simdjson::available_implementations) {
cout << implementation->name() << ": " << implementation->description() << endl; cout << implementation->name() << ": " << implementation->description() << endl;

View File

@ -698,6 +698,43 @@ namespace object_tests {
})); }));
TEST_SUCCEED(); TEST_SUCCEED();
} }
bool value_search_unescaped_key() {
TEST_START();
auto json = R"({"k\u0065y": 1})"_padded;
SUBTEST("ondemand::unescapedkey", test_ondemand_doc(json, [&](auto doc_result) {
ondemand::object object;
bool got_key = false;
ASSERT_SUCCESS( doc_result.get(object) );
for (auto field : object) {
std::string_view keyv;
ASSERT_SUCCESS( field.unescaped_key().get(keyv) );
if(keyv == "key") {
int64_t value;
ASSERT_SUCCESS( field.value().get(value) );
ASSERT_EQUAL( value, 1);
got_key = true;
}
}
return got_key;
}));
SUBTEST("ondemand::rawkey", test_ondemand_doc(json, [&](auto doc_result) {
ondemand::object object;
ASSERT_SUCCESS( doc_result.get(object) );
bool got_key = false;
for (auto field : object) {
ondemand::raw_json_string keyv;
ASSERT_SUCCESS( field.key().get(keyv) );
if(keyv == R"(k\u0065y)") {
int64_t value;
ASSERT_SUCCESS( field.value().get(value) );
ASSERT_EQUAL( value, 1);
got_key = true;
}
}
return got_key;
}));
TEST_SUCCEED();
}
bool value_object_index() { bool value_object_index() {
TEST_START(); TEST_START();
auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded; auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded;
@ -1079,6 +1116,7 @@ namespace object_tests {
bool run() { bool run() {
return return
value_search_unescaped_key() &&
missing_key_continue() && missing_key_continue() &&
no_missing_keys() && no_missing_keys() &&
missing_keys() && missing_keys() &&