Fixing minor issue with document stream (DOM). (#1648)
* Fixing minor issue with document stream (DOM). * Porting over the fix.
This commit is contained in:
parent
90efd79055
commit
bea1483cde
|
@ -8,6 +8,73 @@
|
|||
"array": "cpp",
|
||||
"iterator": "cpp",
|
||||
"chrono": "cpp",
|
||||
"optional": "cpp"
|
||||
"optional": "cpp",
|
||||
"__locale": "cpp",
|
||||
"__bit_reference": "cpp",
|
||||
"__config": "cpp",
|
||||
"__debug": "cpp",
|
||||
"__errc": "cpp",
|
||||
"__functional_base": "cpp",
|
||||
"__hash_table": "cpp",
|
||||
"__mutex_base": "cpp",
|
||||
"__node_handle": "cpp",
|
||||
"__nullptr": "cpp",
|
||||
"__split_buffer": "cpp",
|
||||
"__string": "cpp",
|
||||
"__threading_support": "cpp",
|
||||
"__tree": "cpp",
|
||||
"__tuple": "cpp",
|
||||
"algorithm": "cpp",
|
||||
"atomic": "cpp",
|
||||
"bit": "cpp",
|
||||
"bitset": "cpp",
|
||||
"cctype": "cpp",
|
||||
"cinttypes": "cpp",
|
||||
"clocale": "cpp",
|
||||
"cmath": "cpp",
|
||||
"complex": "cpp",
|
||||
"condition_variable": "cpp",
|
||||
"cstdarg": "cpp",
|
||||
"cstddef": "cpp",
|
||||
"cstdint": "cpp",
|
||||
"cstdio": "cpp",
|
||||
"cstdlib": "cpp",
|
||||
"cstring": "cpp",
|
||||
"ctime": "cpp",
|
||||
"cwchar": "cpp",
|
||||
"cwctype": "cpp",
|
||||
"deque": "cpp",
|
||||
"exception": "cpp",
|
||||
"fstream": "cpp",
|
||||
"functional": "cpp",
|
||||
"initializer_list": "cpp",
|
||||
"iomanip": "cpp",
|
||||
"ios": "cpp",
|
||||
"iosfwd": "cpp",
|
||||
"iostream": "cpp",
|
||||
"istream": "cpp",
|
||||
"limits": "cpp",
|
||||
"locale": "cpp",
|
||||
"map": "cpp",
|
||||
"memory": "cpp",
|
||||
"mutex": "cpp",
|
||||
"new": "cpp",
|
||||
"ostream": "cpp",
|
||||
"ratio": "cpp",
|
||||
"set": "cpp",
|
||||
"sstream": "cpp",
|
||||
"stack": "cpp",
|
||||
"stdexcept": "cpp",
|
||||
"streambuf": "cpp",
|
||||
"string": "cpp",
|
||||
"string_view": "cpp",
|
||||
"system_error": "cpp",
|
||||
"thread": "cpp",
|
||||
"tuple": "cpp",
|
||||
"type_traits": "cpp",
|
||||
"typeinfo": "cpp",
|
||||
"unordered_map": "cpp",
|
||||
"utility": "cpp",
|
||||
"vector": "cpp"
|
||||
}
|
||||
}
|
|
@ -187,7 +187,7 @@ inline void document_stream::start() noexcept {
|
|||
// Always run the first stage 1 parse immediately
|
||||
batch_start = 0;
|
||||
error = run_stage1(*parser, batch_start);
|
||||
if(error == EMPTY) {
|
||||
while(error == EMPTY) {
|
||||
// In exceptional cases, we may start with an empty block
|
||||
batch_start = next_batch_start();
|
||||
if (batch_start >= len) { return; }
|
||||
|
@ -204,7 +204,6 @@ inline void document_stream::start() noexcept {
|
|||
if (error) { return; }
|
||||
}
|
||||
#endif // SIMDJSON_THREADS_ENABLED
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
|
@ -226,7 +225,7 @@ simdjson_really_inline std::string_view document_stream::iterator::source() cons
|
|||
|
||||
|
||||
inline void document_stream::next() noexcept {
|
||||
// We always enter at once once in an error condition.
|
||||
// We always exit at once, once in an error condition.
|
||||
if (error) { return; }
|
||||
|
||||
// Load the next document from the batch
|
||||
|
|
|
@ -164,7 +164,16 @@ simdjson_really_inline error_code scan() {
|
|||
// We truncate the input to the end of the last complete document (or zero).
|
||||
auto new_structural_indexes = find_next_document_index(parser);
|
||||
if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
|
||||
return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse.
|
||||
if(parser.structural_indexes[0] == 0) {
|
||||
// If the buffer is partial and we started at index 0 but the document is
|
||||
// incomplete, it's too big to parse.
|
||||
return CAPACITY;
|
||||
} else {
|
||||
// It is possible that the document could be parsed, we just had a lot
|
||||
// of white space.
|
||||
parser.n_structural_indexes = 0;
|
||||
return EMPTY;
|
||||
}
|
||||
}
|
||||
parser.n_structural_indexes = new_structural_indexes;
|
||||
} else if(partial == stage1_mode::streaming_final) {
|
||||
|
|
|
@ -291,7 +291,16 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp
|
|||
// We truncate the input to the end of the last complete document (or zero).
|
||||
auto new_structural_indexes = find_next_document_index(parser);
|
||||
if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
|
||||
return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse.
|
||||
if(parser.structural_indexes[0] == 0) {
|
||||
// If the buffer is partial and we started at index 0 but the document is
|
||||
// incomplete, it's too big to parse.
|
||||
return CAPACITY;
|
||||
} else {
|
||||
// It is possible that the document could be parsed, we just had a lot
|
||||
// of white space.
|
||||
parser.n_structural_indexes = 0;
|
||||
return EMPTY;
|
||||
}
|
||||
}
|
||||
|
||||
parser.n_structural_indexes = new_structural_indexes;
|
||||
|
|
|
@ -128,24 +128,43 @@ namespace document_stream_tests {
|
|||
|
||||
bool test_leading_spaces() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
const simdjson::padded_string input = R"( [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
|
||||
const simdjson::padded_string input = R"( [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
|
||||
size_t count = 0;
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS(parser.parse_many(input, 32).get(stream));
|
||||
count = 0;
|
||||
for(auto doc: stream) {
|
||||
auto error = doc.error();
|
||||
if(error) {
|
||||
std::cout << "Expected no error but got " << error << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
auto error = doc.error();
|
||||
if(error) {
|
||||
std::cout << "Expected no error but got " << error << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
return count == 15;
|
||||
}
|
||||
|
||||
|
||||
bool test_crazy_leading_spaces() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
const simdjson::padded_string input = R"( [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
|
||||
size_t count = 0;
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS(parser.parse_many(input, 32).get(stream));
|
||||
count = 0;
|
||||
for(auto doc: stream) {
|
||||
auto error = doc.error();
|
||||
if(error) {
|
||||
std::cout << "Expected no error but got " << error << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
return count == 15;
|
||||
}
|
||||
|
||||
bool issue1307() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
const simdjson::padded_string input = decode_base64("AgAMACA=");
|
||||
|
@ -795,6 +814,7 @@ namespace document_stream_tests {
|
|||
stress_data_race() &&
|
||||
stress_data_race_with_error() &&
|
||||
test_leading_spaces() &&
|
||||
test_crazy_leading_spaces() &&
|
||||
simple_example() &&
|
||||
truncated_window() &&
|
||||
truncated_window_unclosed_string_in_object() &&
|
||||
|
|
Loading…
Reference in New Issue