This allows the users to disable threading. (#1122)

* This allows the users to disable threading.

* This would disable bash scripts under FreeBSD. (#1118)

* This would disable bash scripts under FreeBSD.

* Let us also disable GIT.

* Let us try to just disable GIT

* Nope. We must have both bash and git disabled.

* This allows the users to disable threading.
This commit is contained in:
Daniel Lemire 2020-08-18 16:43:08 -04:00 committed by GitHub
parent 109bb505d8
commit fc15147cf5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 3 deletions

View File

@ -169,6 +169,11 @@ private:
/** /**
* Construct a document_stream. Does not allocate or parse anything until the iterator is * Construct a document_stream. Does not allocate or parse anything until the iterator is
* used. * used.
*
* @param parser is a reference to the parser instance used to generate this document_stream
* @param buf is the raw byte buffer we need to process
* @param len is the length of the raw byte buffer in bytes
* @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document)
*/ */
really_inline document_stream( really_inline document_stream(
dom::parser &parser, dom::parser &parser,
@ -231,6 +236,9 @@ private:
size_t doc_index{}; size_t doc_index{};
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
/** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
bool use_thread;
inline void load_from_stage1_thread() noexcept; inline void load_from_stage1_thread() noexcept;
/** Start a thread to run stage 1 on the next batch. */ /** Start a thread to run stage 1 on the next batch. */

View File

@ -342,6 +342,14 @@ public:
*/ */
really_inline void set_max_capacity(size_t max_capacity) noexcept; really_inline void set_max_capacity(size_t max_capacity) noexcept;
#ifdef SIMDJSON_THREADS_ENABLED
/**
* The parser instance can use threads when they are available to speed up some
* operations. It is enabled by default. Changing this attribute will change the
* behavior of the parser for future operations.
*/
bool threaded{true};
#endif
/** @private Use the new DOM API instead */ /** @private Use the new DOM API instead */
class Iterator; class Iterator;
/** @private Use simdjson_error instead */ /** @private Use simdjson_error instead */
@ -380,6 +388,7 @@ public:
/** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
inline bool dump_raw_tape(std::ostream &os) const noexcept; inline bool dump_raw_tape(std::ostream &os) const noexcept;
private: private:
/** /**
* The maximum document length this parser will automatically support. * The maximum document length this parser will automatically support.
@ -421,6 +430,8 @@ private:
friend class parser::Iterator; friend class parser::Iterator;
friend class document_stream; friend class document_stream;
}; // class parser }; // class parser
} // namespace dom } // namespace dom

View File

@ -75,6 +75,9 @@ really_inline document_stream::document_stream(
len{_len}, len{_len},
batch_size{_batch_size}, batch_size{_batch_size},
error{SUCCESS} error{SUCCESS}
#ifdef SIMDJSON_THREADS_ENABLED
, use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change
#endif
{ {
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
if(worker.get() == nullptr) { if(worker.get() == nullptr) {
@ -88,7 +91,11 @@ really_inline document_stream::document_stream() noexcept
buf{nullptr}, buf{nullptr},
len{0}, len{0},
batch_size{0}, batch_size{0},
error{UNINITIALIZED} { error{UNINITIALIZED}
#ifdef SIMDJSON_THREADS_ENABLED
, use_thread(false)
#endif
{
} }
really_inline document_stream::~document_stream() noexcept { really_inline document_stream::~document_stream() noexcept {
@ -137,7 +144,7 @@ inline void document_stream::start() noexcept {
if (error) { return; } if (error) { return; }
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
if (next_batch_start() < len) { if (use_thread && next_batch_start() < len) {
// Kick off the first thread if needed // Kick off the first thread if needed
error = stage1_thread_parser.ensure_capacity(batch_size); error = stage1_thread_parser.ensure_capacity(batch_size);
if (error) { return; } if (error) { return; }
@ -172,7 +179,11 @@ inline void document_stream::next() noexcept {
if (batch_start >= len) { break; } if (batch_start >= len) { break; }
#ifdef SIMDJSON_THREADS_ENABLED #ifdef SIMDJSON_THREADS_ENABLED
if(use_thread) {
load_from_stage1_thread(); load_from_stage1_thread();
} else {
error = run_stage1(*parser, batch_start);
}
#else #else
error = run_stage1(*parser, batch_start); error = run_stage1(*parser, batch_start);
#endif #endif

View File

@ -69,6 +69,7 @@ namespace document_stream_tests {
} }
return true; return true;
} }
bool small_window() { bool small_window() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded; auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
@ -91,6 +92,31 @@ namespace document_stream_tests {
return true; return true;
} }
#ifdef SIMDJSON_THREADS_ENABLED
bool threaded_disabled() {
std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
parser.threaded = false;
size_t count = 0;
size_t window_size = 10; // deliberately too small
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, window_size).get(stream) );
for (auto doc : stream) {
if (!doc.error()) {
std::cerr << "Expected a capacity error " << doc.error() << std::endl;
return false;
}
count++;
}
if(count == 2) {
std::cerr << "Expected a capacity error " << std::endl;
return false;
}
return true;
}
#endif
bool large_window() { bool large_window() {
std::cout << "Running " << __func__ << std::endl; std::cout << "Running " << __func__ << std::endl;
#if SIZE_MAX > 17179869184 #if SIZE_MAX > 17179869184
@ -222,6 +248,9 @@ namespace document_stream_tests {
bool run() { bool run() {
return test_current_index() && return test_current_index() &&
#ifdef SIMDJSON_THREADS_ENABLED
threaded_disabled() &&
#endif
small_window() && small_window() &&
large_window() && large_window() &&
json_issue467() && json_issue467() &&