Documenting raw string access. (#1566)

* Documenting raw string access.

* Removing trailing space.
This commit is contained in:
Daniel Lemire 2021-05-20 13:57:48 -04:00 committed by GitHub
parent a27367210a
commit ad1cd6a2ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 73 additions and 53 deletions

View File

@ -18,7 +18,7 @@ An overview of what you need to know to use simdjson, with examples.
* [Error Handling](#error-handling)
* [Error Handling Example](#error-handling-example)
* [Exceptions](#exceptions)
* [Tree Walking and JSON Element Types](#tree-walking-and-json-element-types)
* [Direct Access to the Raw String](#direct-access-to-the-raw-string)
* [Newline-Delimited JSON (ndjson) and JSON lines](#newline-delimited-json-ndjson-and-json-lines)
* [Thread Safety](#thread-safety)
* [Standard Compliance](#standard-compliance)
@ -725,61 +725,29 @@ int main(void) {
}
```
Direct Access to the Raw String
--------------------------------
Tree Walking and JSON Element Types
-----------------------------------
The simdjson library makes explicit assumptions about types. For examples, numbers
must be integers (up to 64-bit integers) or binary64 floating-point numbers. Some users
have different needs. For example, some users might want to support big integers.
The library makes this possible by providing a `raw_json_token` method which returns
a `std::string_view` instance containing the value as a string which you may then
parse as you see fit.
Sometimes you don't necessarily have a document with a known type, and are trying to generically
inspect or walk over JSON elements. To do that, you can use iterators and the type() method. For
example, here's a quick and dirty recursive function that verbosely prints the JSON document as JSON
(* ignoring nuances like trailing commas and escaping strings, for brevity's sake):
```c++
void print_json(dom::element element) {
switch (element.type()) {
case dom::element_type::ARRAY:
cout << "[";
for (dom::element child : dom::array(element)) {
print_json(child);
cout << ",";
}
cout << "]";
break;
case dom::element_type::OBJECT:
cout << "{";
for (dom::key_value_pair field : dom::object(element)) {
cout << "\"" << field.key << "\": ";
print_json(field.value);
}
cout << "}";
break;
case dom::element_type::INT64:
cout << int64_t(element) << endl;
break;
case dom::element_type::UINT64:
cout << uint64_t(element) << endl;
break;
case dom::element_type::DOUBLE:
cout << double(element) << endl;
break;
case dom::element_type::STRING:
cout << std::string_view(element) << endl;
break;
case dom::element_type::BOOL:
cout << bool(element) << endl;
break;
case dom::element_type::NULL_VALUE:
cout << "null" << endl;
break;
}
}
void basics_treewalk_1() {
dom::parser parser;
print_json(parser.load("twitter.json"));
}
```C++
simdjson::ondemand::parser parser;
simdjson::padded_string docdata = R"({"value":12321323213213213213213213213211223})"_padded;
simdjson::ondemand::document doc = parser.iterate(docdata);
simdjson::ondemand::object obj = doc.get_object();
std::string_view token = obj["value"].raw_json_token();
// token has value "12321323213213213213213213213211223"
```
The `raw_json_token` method even works when the JSON value is a string. In such cases, it
will return the complete string with the quotes and with eventual escaped sequences as in the
source document.
Newline-Delimited JSON (ndjson) and JSON lines
----------------------------------------------

View File

@ -5,7 +5,32 @@ using namespace simdjson;
namespace misc_tests {
using namespace std;
simdjson_warn_unused bool big_integer() {
TEST_START();
simdjson::ondemand::parser parser;
simdjson::padded_string docdata = R"({"value":12321323213213213213213213213211223})"_padded;
simdjson::ondemand::document doc;
ASSERT_SUCCESS(parser.iterate(docdata).get(doc));
simdjson::ondemand::object o;
ASSERT_SUCCESS(doc.get_object().get(o));
string_view token;
ASSERT_SUCCESS(o["value"].raw_json_token().get(token));
ASSERT_EQUAL(token, "12321323213213213213213213213211223");
return true;
}
simdjson_warn_unused bool big_integer_in_string() {
TEST_START();
simdjson::ondemand::parser parser;
simdjson::padded_string docdata = R"({"value":"12321323213213213213213213213211223"})"_padded;
simdjson::ondemand::document doc;
ASSERT_SUCCESS(parser.iterate(docdata).get(doc));
simdjson::ondemand::object o;
ASSERT_SUCCESS(doc.get_object().get(o));
string_view token;
ASSERT_SUCCESS(o["value"].raw_json_token().get(token));
ASSERT_EQUAL(token, "\"12321323213213213213213213213211223\"");
return true;
}
simdjson_warn_unused bool test_raw_json_token(string_view json, string_view expected_token, int expected_start_index = 0) {
string title = "'";
title.append(json.data(), json.length());
@ -64,6 +89,8 @@ namespace misc_tests {
bool run() {
return
big_integer_in_string() &&
big_integer() &&
raw_json_token() &&
true;
}

View File

@ -76,6 +76,29 @@ bool using_the_parsed_json_2() {
TEST_SUCCEED();
}
bool big_integer() {
TEST_START();
simdjson::ondemand::parser parser;
simdjson::padded_string docdata = R"({"value":12321323213213213213213213213211223})"_padded;
simdjson::ondemand::document doc = parser.iterate(docdata);
simdjson::ondemand::object obj = doc.get_object();
string_view token = obj["value"].raw_json_token();
std::cout << token << std::endl;
// token == "12321323213213213213213213213211223"
TEST_SUCCEED();
}
bool big_integer_in_string() {
TEST_START();
simdjson::ondemand::parser parser;
simdjson::padded_string docdata = R"({"value":"12321323213213213213213213213211223"})"_padded;
simdjson::ondemand::document doc = parser.iterate(docdata);
simdjson::ondemand::object obj = doc.get_object();
string_view token = obj["value"].raw_json_token();
std::cout << token << std::endl;
// token == "\"12321323213213213213213213213211223\""
TEST_SUCCEED();
}
bool using_the_parsed_json_3() {
TEST_START();
@ -171,6 +194,8 @@ int main() {
&& basics_2()
&& using_the_parsed_json_1()
&& using_the_parsed_json_2()
&& big_integer()
&& big_integer_in_string()
&& using_the_parsed_json_3()
&& using_the_parsed_json_4()
&& using_the_parsed_json_5()