diff --git a/doc/basics.md b/doc/basics.md index a3a91a89..b3d856b1 100644 --- a/doc/basics.md +++ b/doc/basics.md @@ -18,7 +18,7 @@ An overview of what you need to know to use simdjson, with examples. * [Error Handling](#error-handling) * [Error Handling Example](#error-handling-example) * [Exceptions](#exceptions) -* [Tree Walking and JSON Element Types](#tree-walking-and-json-element-types) +* [Direct Access to the Raw String](#direct-access-to-the-raw-string) * [Newline-Delimited JSON (ndjson) and JSON lines](#newline-delimited-json-ndjson-and-json-lines) * [Thread Safety](#thread-safety) * [Standard Compliance](#standard-compliance) @@ -725,61 +725,29 @@ int main(void) { } ``` +Direct Access to the Raw String +-------------------------------- -Tree Walking and JSON Element Types ------------------------------------ +The simdjson library makes explicit assumptions about types. For examples, numbers +must be integers (up to 64-bit integers) or binary64 floating-point numbers. Some users +have different needs. For example, some users might want to support big integers. +The library makes this possible by providing a `raw_json_token` method which returns +a `std::string_view` instance containing the value as a string which you may then +parse as you see fit. -Sometimes you don't necessarily have a document with a known type, and are trying to generically -inspect or walk over JSON elements. To do that, you can use iterators and the type() method. For -example, here's a quick and dirty recursive function that verbosely prints the JSON document as JSON -(* ignoring nuances like trailing commas and escaping strings, for brevity's sake): - -```c++ -void print_json(dom::element element) { - switch (element.type()) { - case dom::element_type::ARRAY: - cout << "["; - for (dom::element child : dom::array(element)) { - print_json(child); - cout << ","; - } - cout << "]"; - break; - case dom::element_type::OBJECT: - cout << "{"; - for (dom::key_value_pair field : dom::object(element)) { - cout << "\"" << field.key << "\": "; - print_json(field.value); - } - cout << "}"; - break; - case dom::element_type::INT64: - cout << int64_t(element) << endl; - break; - case dom::element_type::UINT64: - cout << uint64_t(element) << endl; - break; - case dom::element_type::DOUBLE: - cout << double(element) << endl; - break; - case dom::element_type::STRING: - cout << std::string_view(element) << endl; - break; - case dom::element_type::BOOL: - cout << bool(element) << endl; - break; - case dom::element_type::NULL_VALUE: - cout << "null" << endl; - break; - } -} - -void basics_treewalk_1() { - dom::parser parser; - print_json(parser.load("twitter.json")); -} +```C++ +simdjson::ondemand::parser parser; +simdjson::padded_string docdata = R"({"value":12321323213213213213213213213211223})"_padded; +simdjson::ondemand::document doc = parser.iterate(docdata); +simdjson::ondemand::object obj = doc.get_object(); +std::string_view token = obj["value"].raw_json_token(); +// token has value "12321323213213213213213213213211223" ``` +The `raw_json_token` method even works when the JSON value is a string. In such cases, it +will return the complete string with the quotes and with eventual escaped sequences as in the +source document. + Newline-Delimited JSON (ndjson) and JSON lines ---------------------------------------------- diff --git a/tests/ondemand/ondemand_misc_tests.cpp b/tests/ondemand/ondemand_misc_tests.cpp index 6717caa5..b3b06308 100644 --- a/tests/ondemand/ondemand_misc_tests.cpp +++ b/tests/ondemand/ondemand_misc_tests.cpp @@ -5,7 +5,32 @@ using namespace simdjson; namespace misc_tests { using namespace std; - + simdjson_warn_unused bool big_integer() { + TEST_START(); + simdjson::ondemand::parser parser; + simdjson::padded_string docdata = R"({"value":12321323213213213213213213213211223})"_padded; + simdjson::ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + simdjson::ondemand::object o; + ASSERT_SUCCESS(doc.get_object().get(o)); + string_view token; + ASSERT_SUCCESS(o["value"].raw_json_token().get(token)); + ASSERT_EQUAL(token, "12321323213213213213213213213211223"); + return true; + } + simdjson_warn_unused bool big_integer_in_string() { + TEST_START(); + simdjson::ondemand::parser parser; + simdjson::padded_string docdata = R"({"value":"12321323213213213213213213213211223"})"_padded; + simdjson::ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + simdjson::ondemand::object o; + ASSERT_SUCCESS(doc.get_object().get(o)); + string_view token; + ASSERT_SUCCESS(o["value"].raw_json_token().get(token)); + ASSERT_EQUAL(token, "\"12321323213213213213213213213211223\""); + return true; + } simdjson_warn_unused bool test_raw_json_token(string_view json, string_view expected_token, int expected_start_index = 0) { string title = "'"; title.append(json.data(), json.length()); @@ -64,6 +89,8 @@ namespace misc_tests { bool run() { return + big_integer_in_string() && + big_integer() && raw_json_token() && true; } diff --git a/tests/ondemand/ondemand_readme_examples.cpp b/tests/ondemand/ondemand_readme_examples.cpp index 5637fe46..902684d5 100644 --- a/tests/ondemand/ondemand_readme_examples.cpp +++ b/tests/ondemand/ondemand_readme_examples.cpp @@ -76,6 +76,29 @@ bool using_the_parsed_json_2() { TEST_SUCCEED(); } + bool big_integer() { + TEST_START(); + simdjson::ondemand::parser parser; + simdjson::padded_string docdata = R"({"value":12321323213213213213213213213211223})"_padded; + simdjson::ondemand::document doc = parser.iterate(docdata); + simdjson::ondemand::object obj = doc.get_object(); + string_view token = obj["value"].raw_json_token(); + std::cout << token << std::endl; + // token == "12321323213213213213213213213211223" + TEST_SUCCEED(); + } + + bool big_integer_in_string() { + TEST_START(); + simdjson::ondemand::parser parser; + simdjson::padded_string docdata = R"({"value":"12321323213213213213213213213211223"})"_padded; + simdjson::ondemand::document doc = parser.iterate(docdata); + simdjson::ondemand::object obj = doc.get_object(); + string_view token = obj["value"].raw_json_token(); + std::cout << token << std::endl; + // token == "\"12321323213213213213213213213211223\"" + TEST_SUCCEED(); + } bool using_the_parsed_json_3() { TEST_START(); @@ -171,6 +194,8 @@ int main() { && basics_2() && using_the_parsed_json_1() && using_the_parsed_json_2() + && big_integer() + && big_integer_in_string() && using_the_parsed_json_3() && using_the_parsed_json_4() && using_the_parsed_json_5()