diff --git a/doc/basics.md b/doc/basics.md index e979ef4c..5002914b 100644 --- a/doc/basics.md +++ b/doc/basics.md @@ -966,11 +966,11 @@ int main(void) { ### Current location in document -Sometimes, it might be helpful to know the current location in the document during iteration. This is especially useful when -encountering errors. Using `current_location()` in combination of exception-free error handling makes it easy to identify broken JSON -and errors. Users can call the `current_location()` method on a document instance to retrieve a `const char *` pointer to the current -location in the document. This method also works even after an error has invalidated the document and the parser (e.g. `TAPE_ERROR`, -`INCOMPLETE_ARRAY_OR_OBJECT`). As an example, consider the following, +Sometimes, it might be helpful to know the current location in the document during iteration. This is especially useful when encountering errors. The `current_location()` method on a +`document` instances makes it easy to identify common JSON errors. Users can call the `current_location()` method on a validdocument instance to retrieve a `const char *` pointer to the current location in the document. This method also works even after an error has invalidated the document and the parser (e.g. `TAPE_ERROR`, `INCOMPLETE_ARRAY_OR_OBJECT`). +When the input was a `padding_string` or another null-terminated source, then you may +use the `const char *` pointer as a C string. As an example, consider the following +example where we used the exception-free simdjson interface: ```c++ auto broken_json = R"( {"double": 13.06, false, "integer": -343} )"_padded; // Missing key @@ -984,9 +984,24 @@ if (error) { } ``` -In the previous example, we tried to access the `"integer"` key, but since the parser had to go through a value without a key before -(`false`), a `TAPE_ERROR` error gets thrown. `current_location()` will then point at the location of the error, and the user can now easily see the relevant problem. `current_location()` also has uses when the error/exception is triggered but an incorrect -call done by the user. For example, +You may also use `current_location()` with exceptions as follows: + +```c++ +auto broken_json = R"( {"double": 13.06, false, "integer": -343} )"_padded; +ondemand::parser parser; +ondemand::document doc = parser.iterate(broken_json); +try { + return int64_t(doc["integer"]); +} catch(simdjson_error& err) { + std::cerr << doc.current_location() << std::endl; + return -1; +} +``` + +In these examples, we tried to access the `"integer"` key, but since the parser +had to go through a value without a key before (`false`), a `TAPE_ERROR` error is thrown. +The pointer returned by the `current_location()` method then points at the location of the error. The `current_location()` may also be used when the error is triggered +by a user action, even if the JSON input is valid. Consider the following example: ```c++ auto json = R"( [1,2,3] )"_padded; @@ -1000,7 +1015,8 @@ if (error) { } ``` -If the location is invalid (i.e. at the end of a document), `current_location()` will return an `OUT_OF_BOUNDS` error. Example: +If the location is invalid (i.e. at the end of a document), the `current_location()` +methods returns an `OUT_OF_BOUNDS` error. For example: ```c++ auto json = R"( [1,2,3] )"_padded; @@ -1012,8 +1028,8 @@ for (auto val : doc) { std::cout << doc.current_location() << std::endl; // Throws OUT_OF_BOUNDS ``` -Finally, note that `current_location()` can also be used even when no exceptions/errors are thrown. This can be helpful for users -that want to know the current state of iteration during parsing. For example, +Finally, the `current_location()` method may also be used even when no exceptions/errors +are thrown. This can be helpful for users that want to know the current state of iteration during parsing. For example: ```c++ auto json = R"( [[1,2,3], -23.4, {"key": "value"}, true] )"_padded; @@ -1028,6 +1044,15 @@ for (auto val : doc) { } ``` +The `current_location()` method requires a valid `document` instance. If the +`iterate` function fails to return a valid document, then you cannot use +`current_location()` to identify the location of an error in the input string. +The errors reported by `iterate` function include EMPTY if no JSON document is detected, +UTF8_ERROR if the string is not a valid UTF-8 string, UNESCAPED_CHARS if a string +contains control characters that must be escaped and UNCLOSED_STRING if there +is an unclosed string in the document. We do not provide location information for these +errors. + Rewinding ---------- diff --git a/include/simdjson/generic/ondemand/value_iterator-inl.h b/include/simdjson/generic/ondemand/value_iterator-inl.h index eeda81d0..1a7b5c40 100644 --- a/include/simdjson/generic/ondemand/value_iterator-inl.h +++ b/include/simdjson/generic/ondemand/value_iterator-inl.h @@ -454,37 +454,37 @@ simdjson_warn_unused simdjson_really_inline simdjson_result val } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("uint64"); } + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("uint64"); } + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("int64"); } + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("int64"); } + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("double"); } + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("double"); } + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("bool"); } + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } simdjson_really_inline bool value_iterator::is_null() noexcept { @@ -533,9 +533,8 @@ simdjson_really_inline simdjson_result value_iterator::get_root_number() } number num; error_code error = numberparsing::parse_number(tmpbuf, num); - if(error == INCORRECT_TYPE) { return error; } - advance_root_scalar("number"); // we consume! if(error) { return error; } + advance_root_scalar("number"); return num; } @@ -554,7 +553,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iter return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() != INCORRECT_TYPE) { advance_root_scalar("uint64"); } + if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { @@ -566,7 +565,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iter return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() != INCORRECT_TYPE) { advance_root_scalar("uint64"); } + if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { @@ -579,7 +578,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_itera } auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() != INCORRECT_TYPE) { advance_root_scalar("int64"); } + if(result.error() == SUCCESS) { advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { @@ -592,7 +591,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_itera } auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() != INCORRECT_TYPE) { advance_root_scalar("int64"); } + if(result.error() == SUCCESS) { advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { @@ -607,7 +606,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterat return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); - if(result.error() != INCORRECT_TYPE) { advance_root_scalar("double"); } + if(result.error() == SUCCESS) { advance_root_scalar("double"); } return result; } @@ -623,7 +622,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterat return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() != INCORRECT_TYPE) { advance_root_scalar("double"); } + if(result.error() == SUCCESS) { advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { @@ -631,13 +630,14 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1]; if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } - advance_root_scalar("bool"); - return parse_bool(tmpbuf); + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("bool"); } + return result; } simdjson_really_inline bool value_iterator::is_root_null() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); - auto result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5]))); if(result) { advance_root_scalar("null"); } return result; diff --git a/tests/ondemand/ondemand_error_location_tests.cpp b/tests/ondemand/ondemand_error_location_tests.cpp index d16b8244..fb7af2ec 100644 --- a/tests/ondemand/ondemand_error_location_tests.cpp +++ b/tests/ondemand/ondemand_error_location_tests.cpp @@ -83,7 +83,7 @@ namespace error_location_tests { double d; ASSERT_ERROR(doc.at_pointer("/b/c/0").get(d), NUMBER_ERROR); ASSERT_SUCCESS(doc.current_location().get(ptr)); - ASSERT_EQUAL(ptr, ", 2.3]}} "); + ASSERT_EQUAL(ptr, "1.2., 2.3]}} "); uint64_t i; ASSERT_ERROR(doc.at_pointer("/a/2/1").get(i), TAPE_ERROR); ASSERT_SUCCESS(doc.current_location().get(ptr)); @@ -93,14 +93,14 @@ namespace error_location_tests { bool broken_json1() { TEST_START(); - auto json = R"( �{"a":1, 3} )"_padded; + auto json = " \xc3\x94\xc3\xb8\xe2\x84\xa6{\"a\":1, 3} "_padded; ondemand::parser parser; ondemand::document doc; const char * ptr; ASSERT_SUCCESS(parser.iterate(json).get(doc)); ASSERT_ERROR(doc["a"], INCORRECT_TYPE); ASSERT_SUCCESS(doc.current_location().get(ptr)); - ASSERT_EQUAL(ptr, "�{\"a\":1, 3} "); + ASSERT_EQUAL(ptr, "\xc3\x94\xc3\xb8\xe2\x84\xa6{\"a\":1, 3} "); TEST_SUCCEED(); } @@ -221,7 +221,20 @@ namespace error_location_tests { double d; ASSERT_ERROR(doc.at_pointer("/0").get_double().get(d), NUMBER_ERROR); ASSERT_SUCCESS(doc.current_location().get(ptr)); - ASSERT_EQUAL(ptr, "] "); + ASSERT_EQUAL(ptr, "13.34.514] "); + TEST_SUCCEED(); + } + bool number_parsing_root_error() { + TEST_START(); + auto json = R"( 13.34.514 )"_padded; + ondemand::parser parser; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(json).get(doc)); + const char * ptr; + double d; + ASSERT_ERROR(doc.get_double().get(d), NUMBER_ERROR); + ASSERT_SUCCESS(doc.current_location().get(ptr)); + ASSERT_EQUAL(ptr, "13.34.514 "); TEST_SUCCEED(); } @@ -239,6 +252,7 @@ namespace error_location_tests { no_such_field() && object_with_no_such_field() && number_parsing_error() && + number_parsing_root_error() && true; } diff --git a/tests/ondemand/ondemand_readme_examples.cpp b/tests/ondemand/ondemand_readme_examples.cpp index 7d8b5ac3..83798d49 100644 --- a/tests/ondemand/ondemand_readme_examples.cpp +++ b/tests/ondemand/ondemand_readme_examples.cpp @@ -708,6 +708,20 @@ bool simple_error_example() { return false; } } + + int64_t current_location_tape_error_with_except() { + auto broken_json = R"( {"double": 13.06, false, "integer": -343} )"_padded; + ondemand::parser parser; + ondemand::document doc = parser.iterate(broken_json); + try { + return int64_t(doc["integer"]); + } catch(simdjson_error& err) { + std::cerr << err.error() << std::endl; + std::cerr << doc.current_location() << std::endl; + return -1; + } + } + #endif int load_example() { @@ -885,6 +899,7 @@ int main() { && current_location_no_error() #if SIMDJSON_EXCEPTIONS && number_tests() + && current_location_tape_error_with_except() #endif ) { return 0;