Verification and bug fix of issue 1511 (#1602)

* Verification and bug fix.

* Removing comment.

* Removing spaces.

* Guarding exceptions.

* Tweaking the test
This commit is contained in:
Daniel Lemire 2021-06-06 17:55:33 -04:00 committed by GitHub
parent 893e613faa
commit eb0ae041e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 111 additions and 5 deletions

View File

@ -177,7 +177,8 @@ Let us illustrate the idea with code:
auto json = R"([1,2,3] {"1":1,"2":3,"4":4} [1,2,3] )"_padded;
simdjson::dom::parser parser;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json).get(stream) );
auto error = parser.parse_many(json).get(stream);
if( error ) { /* do something */ }
auto i = stream.begin();
for(; i != stream.end(); ++i) {
auto doc = *i;

View File

@ -16,6 +16,10 @@ inline void document::rewind() noexcept {
iter.rewind();
}
inline std::string document::to_debug_string() noexcept {
return iter.to_string();
}
simdjson_really_inline value_iterator document::resume_value_iterator() noexcept {
return value_iterator(&iter, 1, iter.root_checkpoint());
}

View File

@ -303,7 +303,10 @@ public:
* beginning of the document, as if it had just been created.
*/
inline void rewind() noexcept;
/**
* Returns debugging information.
*/
inline std::string to_debug_string() noexcept;
protected:
simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept;
simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept;
@ -319,7 +322,6 @@ protected:
json_iterator iter{}; ///< Current position in the document
static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0
friend struct simdjson_result<document>;
friend class array_iterator;
friend class value;
friend class ondemand::parser;

View File

@ -6,6 +6,7 @@ simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexc
: token(std::forward<token_iterator>(other.token)),
parser{other.parser},
_string_buf_loc{other._string_buf_loc},
error{other.error},
_depth{other._depth}
{
other.parser = nullptr;
@ -14,6 +15,7 @@ simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&o
token = other.token;
parser = other.parser;
_string_buf_loc = other._string_buf_loc;
error = other.error;
_depth = other._depth;
other.parser = nullptr;
return *this;
@ -142,6 +144,7 @@ inline std::string json_iterator::to_string() const noexcept {
return std::string("json_iterator [ depth : ") + std::to_string(_depth)
+ std::string(", structural : '") + std::string(current_structural,1)
+ std::string("', offset : ") + std::to_string(token.current_offset())
+ std::string("', error : ") + error_message(error)
+ std::string(" ]");
}

View File

@ -341,7 +341,6 @@ protected:
friend class field;
friend class object;
friend struct simdjson_result<value>;
friend struct simdjson_result<document>;
friend struct simdjson_result<field>;
};

View File

@ -10,6 +10,7 @@ namespace parse_api_tests {
const padded_string BASIC_NDJSON = "[1,2,3]\n[4,5,6]"_padded;
const padded_string EMPTY_NDJSON = ""_padded;
bool parser_iterate_empty() {
TEST_START();
FILE *p;
@ -31,7 +32,7 @@ namespace parse_api_tests {
std::cout << "Warning: I could not create temporary file " << tmpfilename << std::endl;
std::cout << "We omit testing the empty file case." << std::endl;
}
return true;
TEST_SUCCEED();
}
bool parser_iterate() {
@ -162,6 +163,101 @@ namespace parse_api_tests {
simdjson_unused ondemand::array array = doc;
TEST_SUCCEED();
}
bool parser_document_reuse() {
TEST_START();
ondemand::document doc;
// A document spans about 40 bytes. Nevertheless, some users
// would rather reuse them.
std::cout << sizeof(doc) << std::endl;
auto json = R"({"key": "value"})"_padded;
auto jsonbad = R"({"key": "value")"_padded; // deliberaty broken
auto jsonunclosedstring = "{\"coordinates:[{\"x\":1.1,\"y\":2.2,\"z\":3.3}]}"_padded;
std::string output;
ondemand::parser parser;
std::cout << "correct document (1)" << std::endl;
ASSERT_SUCCESS( parser.iterate(json).get(doc) );
ASSERT_SUCCESS(simdjson::to_string(doc).get(output));
std::cout << output << std::endl;
std::cout << "correct document (2)" << std::endl;
ASSERT_SUCCESS( parser.iterate(json).get(doc) );
for(ondemand::field field : doc.get_object() ) {
std::cout << "field: " << field.key() << std::endl;
}
std::cout << "unclosed string document " << std::endl;
simdjson::error_code error;
if((error = parser.iterate(jsonunclosedstring).get(doc)) == SUCCESS) {
// fallback kernel:
ASSERT_EQUAL( doc.get_object().find_field("coordinates").error(), TAPE_ERROR );
} else {
// regular kernels:
ASSERT_EQUAL( error, UNCLOSED_STRING );
}
std::cout << "truncated document " << std::endl;
ASSERT_SUCCESS( parser.iterate(jsonbad).get(doc) );
ASSERT_EQUAL( simdjson::to_string(doc).get(output), TAPE_ERROR );
std::cout << "correct document with new doc" << std::endl;
ondemand::document doc2;
ASSERT_SUCCESS( parser.iterate(json).get(doc2) );
for(ondemand::field field : doc2.get_object() ) {
std::cout << "field: " << field.key() << std::endl;
}
std::cout << "correct document (3): " << doc.to_debug_string() << std::endl;
std::cout << "correct document (3)" << std::endl;
ASSERT_SUCCESS( parser.iterate(json).get(doc) );
std::cout << doc.to_debug_string() << std::endl;
for(ondemand::field field : doc.get_object() ) {
std::cout << "field: " << field.key() << std::endl;
}
std::cout << "unclosed string document " << std::endl;
ASSERT_SUCCESS( parser.iterate(jsonbad).get(doc) );
ASSERT_EQUAL( simdjson::to_string(doc).get(output), TAPE_ERROR );
// next two lines are terrible code.
doc.~document();
doc = ondemand::document();
//
std::cout << "correct document (4)" << std::endl;
ASSERT_SUCCESS( parser.iterate(json).get(doc) );
ASSERT_SUCCESS( simdjson::to_string(doc).get(output) );
std::cout << output << std::endl;
std::cout << "unclosed string document " << std::endl;
if((error = parser.iterate(jsonunclosedstring).get(doc)) == SUCCESS) {
// fallback kernel:
ASSERT_EQUAL( doc.get_object().find_field("coordinates").error(), TAPE_ERROR );
} else {
// regular kernels:
ASSERT_EQUAL( error, UNCLOSED_STRING );
}
// next two lines are terrible code.
doc.~document();
doc = ondemand::document();
//
std::cout << "correct document (5)" << std::endl;
ASSERT_SUCCESS( parser.iterate(json).get(doc) );
ASSERT_SUCCESS( simdjson::to_string(doc).get(output) );
std::cout << output << std::endl;
TEST_SUCCEED();
}
#endif // SIMDJSON_EXCEPTIONS
bool run() {
@ -171,6 +267,7 @@ namespace parse_api_tests {
parser_iterate_padded_string_view() &&
parser_iterate_insufficient_padding() &&
#if SIMDJSON_EXCEPTIONS
parser_document_reuse() &&
parser_iterate_exception() &&
#endif // SIMDJSON_EXCEPTIONS
true;