Fixing minor issue with document stream (DOM). (#1648)

* Fixing minor issue with document stream (DOM).

* Porting over the fix.
This commit is contained in:
Daniel Lemire 2021-07-05 17:40:04 -04:00 committed by GitHub
parent 90efd79055
commit bea1483cde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 117 additions and 13 deletions

69
.vscode/settings.json vendored
View File

@ -8,6 +8,73 @@
"array": "cpp",
"iterator": "cpp",
"chrono": "cpp",
"optional": "cpp"
"optional": "cpp",
"__locale": "cpp",
"__bit_reference": "cpp",
"__config": "cpp",
"__debug": "cpp",
"__errc": "cpp",
"__functional_base": "cpp",
"__hash_table": "cpp",
"__mutex_base": "cpp",
"__node_handle": "cpp",
"__nullptr": "cpp",
"__split_buffer": "cpp",
"__string": "cpp",
"__threading_support": "cpp",
"__tree": "cpp",
"__tuple": "cpp",
"algorithm": "cpp",
"atomic": "cpp",
"bit": "cpp",
"bitset": "cpp",
"cctype": "cpp",
"cinttypes": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"complex": "cpp",
"condition_variable": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"exception": "cpp",
"fstream": "cpp",
"functional": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"ios": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"locale": "cpp",
"map": "cpp",
"memory": "cpp",
"mutex": "cpp",
"new": "cpp",
"ostream": "cpp",
"ratio": "cpp",
"set": "cpp",
"sstream": "cpp",
"stack": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"string": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"thread": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"typeinfo": "cpp",
"unordered_map": "cpp",
"utility": "cpp",
"vector": "cpp"
}
}

View File

@ -187,7 +187,7 @@ inline void document_stream::start() noexcept {
// Always run the first stage 1 parse immediately
batch_start = 0;
error = run_stage1(*parser, batch_start);
if(error == EMPTY) {
while(error == EMPTY) {
// In exceptional cases, we may start with an empty block
batch_start = next_batch_start();
if (batch_start >= len) { return; }
@ -204,7 +204,6 @@ inline void document_stream::start() noexcept {
if (error) { return; }
}
#endif // SIMDJSON_THREADS_ENABLED
next();
}
@ -226,7 +225,7 @@ simdjson_really_inline std::string_view document_stream::iterator::source() cons
inline void document_stream::next() noexcept {
// We always enter at once once in an error condition.
// We always exit at once, once in an error condition.
if (error) { return; }
// Load the next document from the batch

View File

@ -164,7 +164,16 @@ simdjson_really_inline error_code scan() {
// We truncate the input to the end of the last complete document (or zero).
auto new_structural_indexes = find_next_document_index(parser);
if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse.
if(parser.structural_indexes[0] == 0) {
// If the buffer is partial and we started at index 0 but the document is
// incomplete, it's too big to parse.
return CAPACITY;
} else {
// It is possible that the document could be parsed, we just had a lot
// of white space.
parser.n_structural_indexes = 0;
return EMPTY;
}
}
parser.n_structural_indexes = new_structural_indexes;
} else if(partial == stage1_mode::streaming_final) {

View File

@ -291,7 +291,16 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp
// We truncate the input to the end of the last complete document (or zero).
auto new_structural_indexes = find_next_document_index(parser);
if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse.
if(parser.structural_indexes[0] == 0) {
// If the buffer is partial and we started at index 0 but the document is
// incomplete, it's too big to parse.
return CAPACITY;
} else {
// It is possible that the document could be parsed, we just had a lot
// of white space.
parser.n_structural_indexes = 0;
return EMPTY;
}
}
parser.n_structural_indexes = new_structural_indexes;

View File

@ -128,24 +128,43 @@ namespace document_stream_tests {
bool test_leading_spaces() {
std::cout << "Running " << __func__ << std::endl;
const simdjson::padded_string input = R"( [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
const simdjson::padded_string input = R"( [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
size_t count = 0;
simdjson::dom::parser parser;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS(parser.parse_many(input, 32).get(stream));
count = 0;
for(auto doc: stream) {
auto error = doc.error();
if(error) {
std::cout << "Expected no error but got " << error << std::endl;
return false;
}
count++;
auto error = doc.error();
if(error) {
std::cout << "Expected no error but got " << error << std::endl;
return false;
}
count++;
}
return count == 15;
}
bool test_crazy_leading_spaces() {
std::cout << "Running " << __func__ << std::endl;
const simdjson::padded_string input = R"( [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
size_t count = 0;
simdjson::dom::parser parser;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS(parser.parse_many(input, 32).get(stream));
count = 0;
for(auto doc: stream) {
auto error = doc.error();
if(error) {
std::cout << "Expected no error but got " << error << std::endl;
return false;
}
count++;
}
return count == 15;
}
bool issue1307() {
std::cout << "Running " << __func__ << std::endl;
const simdjson::padded_string input = decode_base64("AgAMACA=");
@ -795,6 +814,7 @@ namespace document_stream_tests {
stress_data_race() &&
stress_data_race_with_error() &&
test_leading_spaces() &&
test_crazy_leading_spaces() &&
simple_example() &&
truncated_window() &&
truncated_window_unclosed_string_in_object() &&