Merge pull request #946 from simdjson/issue937

Fixing issue 937
This commit is contained in:
Daniel Lemire 2020-06-18 18:20:44 -04:00 committed by GitHub
commit b8202dab3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 3 deletions

View File

@ -15,6 +15,7 @@ namespace dom {
#ifdef SIMDJSON_THREADS_ENABLED
/** @private Custom worker class **/
struct stage1_worker {
stage1_worker() noexcept = default;
stage1_worker(const stage1_worker&) = delete;
@ -93,7 +94,23 @@ public:
* @param other the end iterator to compare to.
*/
really_inline bool operator!=(const iterator &other) const noexcept;
/**
* @private
*
* Gives the current index in the input document in bytes.
*
* auto stream = parser.parse_many(json,window);
* auto i = stream.begin();
* for(; i != stream.end(); ++i) {
* auto doc = *i;
* size_t index = i.current_index();
* }
*
* This function (current_index()) is experimental and the usage
* may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier.
*/
really_inline size_t current_index() noexcept;
private:
really_inline iterator(document_stream &s, bool finished) noexcept;
/** The document_stream we're iterating through. */
@ -204,6 +221,9 @@ private:
#endif // SIMDJSON_THREADS_ENABLED
friend class dom::parser;
size_t doc_index{};
}; // class document_stream
} // namespace dom

View File

@ -143,10 +143,14 @@ inline void document_stream::start() noexcept {
next();
}
really_inline size_t document_stream::iterator::current_index() noexcept {
return stream.doc_index;
}
inline void document_stream::next() noexcept {
if (error) { return; }
// Load the next document from the batch
doc_index = batch_start + parser.implementation->structural_indexes[parser.implementation->next_structural_index];
error = parser.implementation->stage2_next(parser.doc);
// If that was the last document in the batch, load another batch (if available)
while (error == EMPTY) {
@ -160,6 +164,7 @@ inline void document_stream::next() noexcept {
#endif
if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
// Run stage 2 on the first document in the batch
doc_index = batch_start + parser.implementation->structural_indexes[parser.implementation->next_structural_index];
error = parser.implementation->stage2_next(parser.doc);
}
}

View File

@ -371,7 +371,33 @@ namespace document_stream_tests {
simdjson::padded_string str("{}",2);
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
}
bool test_current_index() {
std::cout << "Running " << __func__ << std::endl;
std::string base("1 ");// one JSON!
std::string json;
for(size_t k = 0; k < 1000; k++) {
json += base;
}
simdjson::dom::parser parser;
const size_t window = 32; // deliberately small
auto stream = parser.parse_many(json,window);
auto i = stream.begin();
size_t count = 0;
for(; i != stream.end(); ++i) {
auto doc = *i;
if (doc.error()) {
std::cerr << doc.error() << std::endl;
return false;
}
if( i.current_index() != count) {
std::cout << "index:" << i.current_index() << std::endl;
std::cout << "expected index:" << count << std::endl;
return false;
}
count += base.size();
}
return true;
}
bool small_window() {
std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
@ -541,7 +567,8 @@ namespace document_stream_tests {
}
bool run() {
return small_window() &&
return test_current_index() &&
small_window() &&
large_window() &&
json_issue467() &&
document_stream_test() &&