Fixes issue 1088 (#1096)

This commit is contained in:
Daniel Lemire 2020-08-06 11:42:13 -04:00 committed by GitHub
parent 75c75ac00c
commit 83615ff351
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 5 deletions

View File

@ -121,7 +121,28 @@ public:
* may change in future versions of simdjson: we find the API somewhat * may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier. * awkward and we would like to offer something friendlier.
*/ */
really_inline size_t current_index() noexcept; really_inline size_t current_index() const noexcept;
/**
* @private
*
* Gives a view of the current document.
*
* document_stream stream = parser.parse_many(json,window);
* for(auto i = stream.begin(); i != stream.end(); ++i) {
* auto doc = *i;
* std::string_view v = i->source();
* }
*
* The returned string_view instance is simply a map to the (unparsed)
* source string: it may thus include white-space characters and all manner
* of padding.
*
* This function (source()) is experimental and the usage
* may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier.
*/
really_inline std::string_view source() const noexcept;
private: private:
really_inline iterator(document_stream &s, bool finished) noexcept; really_inline iterator(document_stream &s, bool finished) noexcept;
/** The document_stream we're iterating through. */ /** The document_stream we're iterating through. */

View File

@ -150,9 +150,16 @@ inline void document_stream::start() noexcept {
next(); next();
} }
really_inline size_t document_stream::iterator::current_index() noexcept { really_inline size_t document_stream::iterator::current_index() const noexcept {
return stream.doc_index; return stream.doc_index;
} }
really_inline std::string_view document_stream::iterator::source() const noexcept {
size_t next_doc_index = stream.batch_start + stream.parser->implementation->structural_indexes[stream.parser->implementation->next_structural_index];
return std::string_view(reinterpret_cast<const char*>(stream.buf) + current_index(), next_doc_index - current_index() - 1);
}
inline void document_stream::next() noexcept { inline void document_stream::next() noexcept {
if (error) { return; } if (error) { return; }

View File

@ -5,6 +5,19 @@
#include "simdjson.h" #include "simdjson.h"
#include "test_macros.h" #include "test_macros.h"
std::string trim(const std::string s) {
auto start = s.begin();
auto end = s.end();
while (start != s.end() && std::isspace(*start)) {
start++;
}
do {
end--;
} while (std::distance(start, end) > 0 && std::isspace(*end));
return std::string(start, end + 1);
}
namespace document_stream_tests { namespace document_stream_tests {
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) { static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
simdjson::dom::document_stream stream; simdjson::dom::document_stream stream;
@ -19,10 +32,18 @@ namespace document_stream_tests {
} }
bool test_current_index() { bool test_current_index() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
std::string base("1 ");// one JSON! std::string base1("1 ");// one JSON!
std::string base2("{\"k\":1} ");// one JSON!
std::string base3("[1,2] ");// one JSON!
assert(base1.size() == base2.size());
assert(base2.size() == base3.size());
std::vector<std::string> source_strings = {base1, base2, base3};
std::string json; std::string json;
for(size_t k = 0; k < 1000; k++) { for(size_t k = 0; k < 1000; k++) {
json += base; json += base1;
json += base2;
json += base3;
} }
simdjson::dom::parser parser; simdjson::dom::parser parser;
const size_t window = 32; // deliberately small const size_t window = 32; // deliberately small
@ -38,7 +59,13 @@ namespace document_stream_tests {
std::cout << "expected index:" << count << std::endl; std::cout << "expected index:" << count << std::endl;
return false; return false;
} }
count += base.size(); std::string answer = source_strings[(count / base1.size()) % source_strings.size()];
if(trim(std::string(i.source())) != trim(answer)) {
std::cout << "got: '" << i.source() << "'" << std::endl;
std::cout << "expected : '" << answer << "'" << std::endl;
return false;
}
count += base1.size();
} }
return true; return true;
} }