Fix an issue with truncated-byte function. (#1674)
This commit is contained in:
parent
f657516a7e
commit
d83e69d977
|
@ -1046,7 +1046,12 @@ for (auto doc: stream) {
|
|||
ondemand::value val;
|
||||
error = doc.at_pointer("/4").get(val);
|
||||
// error == simdjson::CAPACITY
|
||||
if(error) { std::cerr << error << std::endl; break; }
|
||||
if(error) {
|
||||
std::cerr << error << std::endl;
|
||||
// We left 293 bytes unprocessed at the tail end of the input.
|
||||
std::cout << " unprocessed bytes at the end: " << stream.truncated_bytes() << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
|
@ -1062,6 +1067,7 @@ This example should print out:
|
|||
5 = 5
|
||||
5 = 5
|
||||
This parser can't support a document that big
|
||||
unprocessed bytes at the end: 293
|
||||
```
|
||||
|
||||
If your documents are large (e.g., larger than a megabyte), then the `iterate_many` function is maybe ill-suited. It is really meant to support reading efficiently streams of relatively small documents (e.g., a few kilobytes each). If you have larger documents, you should use other functions like `iterate`.
|
||||
|
|
|
@ -256,6 +256,7 @@ inline size_t document_stream::size_in_bytes() const noexcept {
|
|||
}
|
||||
|
||||
inline size_t document_stream::truncated_bytes() const noexcept {
|
||||
if(error == CAPACITY) { return len - batch_start; }
|
||||
return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1];
|
||||
}
|
||||
|
||||
|
|
|
@ -126,6 +126,7 @@ inline size_t document_stream::size_in_bytes() const noexcept {
|
|||
}
|
||||
|
||||
inline size_t document_stream::truncated_bytes() const noexcept {
|
||||
if(error == CAPACITY) { return len - batch_start; }
|
||||
return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1];
|
||||
}
|
||||
|
||||
|
|
|
@ -611,6 +611,43 @@ namespace document_stream_tests {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool issue1668() {
|
||||
TEST_START();
|
||||
auto json = R"([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100])"_padded;
|
||||
simdjson::dom::parser odparser;
|
||||
simdjson::dom::document_stream odstream;
|
||||
ASSERT_SUCCESS( odparser.parse_many(json.data(), json.length(), 50).get(odstream) );
|
||||
for (auto doc: odstream) {
|
||||
simdjson::dom::element val;
|
||||
ASSERT_ERROR(doc.at_pointer("/40").get(val), simdjson::CAPACITY);
|
||||
ASSERT_EQUAL(odstream.truncated_bytes(), json.length());
|
||||
}
|
||||
TEST_SUCCEED();
|
||||
}
|
||||
|
||||
bool issue1668_long() {
|
||||
TEST_START();
|
||||
auto json = R"([1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100])"_padded;
|
||||
simdjson::dom::parser odparser;
|
||||
simdjson::dom::document_stream odstream;
|
||||
size_t counter{0};
|
||||
ASSERT_SUCCESS( odparser.parse_many(json.data(), json.length(), 50).get(odstream) );
|
||||
for (auto doc: odstream) {
|
||||
if(counter < 6) {
|
||||
int64_t val;
|
||||
ASSERT_SUCCESS(doc.at_pointer("/4").get(val));
|
||||
ASSERT_EQUAL(val, 5);
|
||||
} else {
|
||||
simdjson::dom::element val;
|
||||
ASSERT_ERROR(doc.at_pointer("/4").get(val), simdjson::CAPACITY);
|
||||
// We left 293 bytes unprocessed.
|
||||
ASSERT_EQUAL(odstream.truncated_bytes(), 293);
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
TEST_SUCCEED();
|
||||
}
|
||||
|
||||
bool small_window() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
std::vector<char> input;
|
||||
|
|
|
@ -231,6 +231,7 @@ namespace document_stream_tests {
|
|||
ASSERT_SUCCESS( parser.iterate_many(json, window_size).get(stream) );
|
||||
auto i = stream.begin();
|
||||
ASSERT_ERROR(i.error(), CAPACITY);
|
||||
ASSERT_EQUAL(stream.truncated_bytes(), json.length());
|
||||
TEST_SUCCEED();
|
||||
}
|
||||
|
||||
|
@ -356,6 +357,7 @@ namespace document_stream_tests {
|
|||
for (auto doc: odstream) {
|
||||
ondemand::value val;
|
||||
ASSERT_ERROR(doc.at_pointer("/40").get(val), CAPACITY);
|
||||
ASSERT_EQUAL(odstream.truncated_bytes(), json.length());
|
||||
}
|
||||
TEST_SUCCEED();
|
||||
}
|
||||
|
@ -376,6 +378,8 @@ namespace document_stream_tests {
|
|||
} else {
|
||||
ondemand::value val;
|
||||
ASSERT_ERROR(doc.at_pointer("/4").get(val), CAPACITY);
|
||||
// We left 293 bytes unprocessed.
|
||||
ASSERT_EQUAL(odstream.truncated_bytes(), 293);
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
|
|
|
@ -428,7 +428,12 @@ bool stream_capacity_example() {
|
|||
ondemand::value val;
|
||||
error = doc.at_pointer("/4").get(val);
|
||||
// error == simdjson::CAPACITY
|
||||
if(error) { std::cerr << error << std::endl; break; }
|
||||
if(error) {
|
||||
std::cerr << error << std::endl;
|
||||
// We left 293 bytes unprocessed at the tail end of the input.
|
||||
std::cout << " unprocessed bytes at the end: " << stream.truncated_bytes() << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue