Merge pull request #946 from simdjson/issue937

Fixing issue 937
This commit is contained in:
Daniel Lemire 2020-06-18 18:20:44 -04:00 committed by GitHub
commit b8202dab3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 3 deletions

View File

@ -15,6 +15,7 @@ namespace dom {
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
/** @private Custom worker class **/
struct stage1_worker { struct stage1_worker {
stage1_worker() noexcept = default; stage1_worker() noexcept = default;
stage1_worker(const stage1_worker&) = delete; stage1_worker(const stage1_worker&) = delete;
@ -93,7 +94,23 @@ public:
* @param other the end iterator to compare to. * @param other the end iterator to compare to.
*/ */
really_inline bool operator!=(const iterator &other) const noexcept; really_inline bool operator!=(const iterator &other) const noexcept;
/**
* @private
*
* Gives the current index in the input document in bytes.
*
* auto stream = parser.parse_many(json,window);
* auto i = stream.begin();
* for(; i != stream.end(); ++i) {
* auto doc = *i;
* size_t index = i.current_index();
* }
*
* This function (current_index()) is experimental and the usage
* may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier.
*/
really_inline size_t current_index() noexcept;
private: private:
really_inline iterator(document_stream &s, bool finished) noexcept; really_inline iterator(document_stream &s, bool finished) noexcept;
/** The document_stream we're iterating through. */ /** The document_stream we're iterating through. */
@ -204,6 +221,9 @@ private:
#endif // SIMDJSON_THREADS_ENABLED #endif // SIMDJSON_THREADS_ENABLED
friend class dom::parser; friend class dom::parser;
size_t doc_index{};
}; // class document_stream }; // class document_stream
} // namespace dom } // namespace dom

View File

@ -143,10 +143,14 @@ inline void document_stream::start() noexcept {
next(); next();
} }
really_inline size_t document_stream::iterator::current_index() noexcept {
return stream.doc_index;
}
inline void document_stream::next() noexcept { inline void document_stream::next() noexcept {
if (error) { return; } if (error) { return; }
// Load the next document from the batch // Load the next document from the batch
doc_index = batch_start + parser.implementation->structural_indexes[parser.implementation->next_structural_index];
error = parser.implementation->stage2_next(parser.doc); error = parser.implementation->stage2_next(parser.doc);
// If that was the last document in the batch, load another batch (if available) // If that was the last document in the batch, load another batch (if available)
while (error == EMPTY) { while (error == EMPTY) {
@ -160,6 +164,7 @@ inline void document_stream::next() noexcept {
#endif #endif
if (error) { continue; } // If the error was EMPTY, we may want to load another batch. if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
// Run stage 2 on the first document in the batch // Run stage 2 on the first document in the batch
doc_index = batch_start + parser.implementation->structural_indexes[parser.implementation->next_structural_index];
error = parser.implementation->stage2_next(parser.doc); error = parser.implementation->stage2_next(parser.doc);
} }
} }

View File

@ -371,7 +371,33 @@ namespace document_stream_tests {
simdjson::padded_string str("{}",2); simdjson::padded_string str("{}",2);
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str); simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
} }
bool test_current_index() {
std::cout << "Running " << __func__ << std::endl;
std::string base("1 ");// one JSON!
std::string json;
for(size_t k = 0; k < 1000; k++) {
json += base;
}
simdjson::dom::parser parser;
const size_t window = 32; // deliberately small
auto stream = parser.parse_many(json,window);
auto i = stream.begin();
size_t count = 0;
for(; i != stream.end(); ++i) {
auto doc = *i;
if (doc.error()) {
std::cerr << doc.error() << std::endl;
return false;
}
if( i.current_index() != count) {
std::cout << "index:" << i.current_index() << std::endl;
std::cout << "expected index:" << count << std::endl;
return false;
}
count += base.size();
}
return true;
}
bool small_window() { bool small_window() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded; auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
@ -541,7 +567,8 @@ namespace document_stream_tests {
} }
bool run() { bool run() {
return small_window() && return test_current_index() &&
small_window() &&
large_window() && large_window() &&
json_issue467() && json_issue467() &&
document_stream_test() && document_stream_test() &&