commit
b8202dab3b
|
@ -15,6 +15,7 @@ namespace dom {
|
||||||
|
|
||||||
|
|
||||||
#ifdef SIMDJSON_THREADS_ENABLED
|
#ifdef SIMDJSON_THREADS_ENABLED
|
||||||
|
/** @private Custom worker class **/
|
||||||
struct stage1_worker {
|
struct stage1_worker {
|
||||||
stage1_worker() noexcept = default;
|
stage1_worker() noexcept = default;
|
||||||
stage1_worker(const stage1_worker&) = delete;
|
stage1_worker(const stage1_worker&) = delete;
|
||||||
|
@ -93,7 +94,23 @@ public:
|
||||||
* @param other the end iterator to compare to.
|
* @param other the end iterator to compare to.
|
||||||
*/
|
*/
|
||||||
really_inline bool operator!=(const iterator &other) const noexcept;
|
really_inline bool operator!=(const iterator &other) const noexcept;
|
||||||
|
/**
|
||||||
|
* @private
|
||||||
|
*
|
||||||
|
* Gives the current index in the input document in bytes.
|
||||||
|
*
|
||||||
|
* auto stream = parser.parse_many(json,window);
|
||||||
|
* auto i = stream.begin();
|
||||||
|
* for(; i != stream.end(); ++i) {
|
||||||
|
* auto doc = *i;
|
||||||
|
* size_t index = i.current_index();
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* This function (current_index()) is experimental and the usage
|
||||||
|
* may change in future versions of simdjson: we find the API somewhat
|
||||||
|
* awkward and we would like to offer something friendlier.
|
||||||
|
*/
|
||||||
|
really_inline size_t current_index() noexcept;
|
||||||
private:
|
private:
|
||||||
really_inline iterator(document_stream &s, bool finished) noexcept;
|
really_inline iterator(document_stream &s, bool finished) noexcept;
|
||||||
/** The document_stream we're iterating through. */
|
/** The document_stream we're iterating through. */
|
||||||
|
@ -204,6 +221,9 @@ private:
|
||||||
#endif // SIMDJSON_THREADS_ENABLED
|
#endif // SIMDJSON_THREADS_ENABLED
|
||||||
|
|
||||||
friend class dom::parser;
|
friend class dom::parser;
|
||||||
|
|
||||||
|
size_t doc_index{};
|
||||||
|
|
||||||
}; // class document_stream
|
}; // class document_stream
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
|
|
|
@ -143,10 +143,14 @@ inline void document_stream::start() noexcept {
|
||||||
next();
|
next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
really_inline size_t document_stream::iterator::current_index() noexcept {
|
||||||
|
return stream.doc_index;
|
||||||
|
}
|
||||||
inline void document_stream::next() noexcept {
|
inline void document_stream::next() noexcept {
|
||||||
if (error) { return; }
|
if (error) { return; }
|
||||||
|
|
||||||
// Load the next document from the batch
|
// Load the next document from the batch
|
||||||
|
doc_index = batch_start + parser.implementation->structural_indexes[parser.implementation->next_structural_index];
|
||||||
error = parser.implementation->stage2_next(parser.doc);
|
error = parser.implementation->stage2_next(parser.doc);
|
||||||
// If that was the last document in the batch, load another batch (if available)
|
// If that was the last document in the batch, load another batch (if available)
|
||||||
while (error == EMPTY) {
|
while (error == EMPTY) {
|
||||||
|
@ -160,6 +164,7 @@ inline void document_stream::next() noexcept {
|
||||||
#endif
|
#endif
|
||||||
if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
|
if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
|
||||||
// Run stage 2 on the first document in the batch
|
// Run stage 2 on the first document in the batch
|
||||||
|
doc_index = batch_start + parser.implementation->structural_indexes[parser.implementation->next_structural_index];
|
||||||
error = parser.implementation->stage2_next(parser.doc);
|
error = parser.implementation->stage2_next(parser.doc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -371,7 +371,33 @@ namespace document_stream_tests {
|
||||||
simdjson::padded_string str("{}",2);
|
simdjson::padded_string str("{}",2);
|
||||||
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
|
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
|
||||||
}
|
}
|
||||||
|
bool test_current_index() {
|
||||||
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
|
std::string base("1 ");// one JSON!
|
||||||
|
std::string json;
|
||||||
|
for(size_t k = 0; k < 1000; k++) {
|
||||||
|
json += base;
|
||||||
|
}
|
||||||
|
simdjson::dom::parser parser;
|
||||||
|
const size_t window = 32; // deliberately small
|
||||||
|
auto stream = parser.parse_many(json,window);
|
||||||
|
auto i = stream.begin();
|
||||||
|
size_t count = 0;
|
||||||
|
for(; i != stream.end(); ++i) {
|
||||||
|
auto doc = *i;
|
||||||
|
if (doc.error()) {
|
||||||
|
std::cerr << doc.error() << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if( i.current_index() != count) {
|
||||||
|
std::cout << "index:" << i.current_index() << std::endl;
|
||||||
|
std::cout << "expected index:" << count << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
count += base.size();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
bool small_window() {
|
bool small_window() {
|
||||||
std::cout << "Running " << __func__ << std::endl;
|
std::cout << "Running " << __func__ << std::endl;
|
||||||
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
||||||
|
@ -541,7 +567,8 @@ namespace document_stream_tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool run() {
|
bool run() {
|
||||||
return small_window() &&
|
return test_current_index() &&
|
||||||
|
small_window() &&
|
||||||
large_window() &&
|
large_window() &&
|
||||||
json_issue467() &&
|
json_issue467() &&
|
||||||
document_stream_test() &&
|
document_stream_test() &&
|
||||||
|
|
Loading…
Reference in New Issue