Merge pull request #903 from simdjson/jkeiser/dom-parser-implementation

Move parser state to implementation-specific class
This commit is contained in:
John Keiser 2020-06-04 13:09:57 -07:00 committed by GitHub
commit ae6dddfff4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 786 additions and 20865 deletions

View File

@ -84,7 +84,7 @@ struct json_stats {
bytes = json.size(); bytes = json.size();
blocks = bytes / BYTES_PER_BLOCK; blocks = bytes / BYTES_PER_BLOCK;
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
structurals = parser.n_structural_indexes-1; structurals = parser.implementation->n_structural_indexes-1;
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions // Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
bool last_block_has_utf8 = false; bool last_block_has_utf8 = false;
@ -141,7 +141,7 @@ struct json_stats {
for (size_t block=0; block<blocks; block++) { for (size_t block=0; block<blocks; block++) {
// Count structurals in the block // Count structurals in the block
int block_structurals=0; int block_structurals=0;
while (structural < parser.n_structural_indexes && parser.structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) { while (structural < parser.implementation->n_structural_indexes && parser.implementation->structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
block_structurals++; block_structurals++;
structural++; structural++;
} }
@ -320,7 +320,7 @@ struct benchmarker {
// Stage 1 (find structurals) // Stage 1 (find structurals)
collector.start(); collector.start();
error = active_implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false); error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), false);
event_count stage1_count = collector.end(); event_count stage1_count = collector.end();
stage1 << stage1_count; stage1 << stage1_count;
if (error) { if (error) {
@ -334,7 +334,7 @@ struct benchmarker {
} else { } else {
event_count stage2_count; event_count stage2_count;
collector.start(); collector.start();
error = active_implementation->stage2((const uint8_t *)json.data(), json.size(), parser); error = parser.implementation->stage2(parser.doc);
if (error) { if (error) {
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error)); exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error));
} }
@ -345,7 +345,7 @@ struct benchmarker {
// Calculate stats the first time we parse // Calculate stats the first time we parse
if (stats == NULL) { if (stats == NULL) {
if (stage1_only) { // we need stage 2 once if (stage1_only) { // we need stage 2 once
error = active_implementation->stage2((const uint8_t *)json.data(), json.size(), parser); error = parser.implementation->stage2(parser.doc);
if (error) { if (error) {
printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n"); printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
} }

View File

@ -106,7 +106,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
answer.non_ascii_byte_count = count_nonasciibytes( answer.non_ascii_byte_count = count_nonasciibytes(
reinterpret_cast<const uint8_t *>(p.data()), p.size()); reinterpret_cast<const uint8_t *>(p.data()), p.size());
answer.byte_count = p.size(); answer.byte_count = p.size();
answer.structural_indexes_count = parser.n_structural_indexes; answer.structural_indexes_count = parser.implementation->n_structural_indexes;
simdjson_recurse(answer, doc); simdjson_recurse(answer, doc);
return answer; return answer;
} }
@ -163,7 +163,6 @@ int main(int argc, char *argv[]) {
s.true_count, s.false_count, s.byte_count, s.structural_indexes_count); s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
#ifdef __linux__ #ifdef __linux__
simdjson::dom::parser parser; simdjson::dom::parser parser;
const simdjson::implementation &stage_parser = *simdjson::active_implementation;
simdjson::error_code alloc_error = parser.allocate(p.size()); simdjson::error_code alloc_error = parser.allocate(p.size());
if (alloc_error) { if (alloc_error) {
std::cerr << alloc_error << std::endl; std::cerr << alloc_error << std::endl;
@ -181,14 +180,14 @@ int main(int argc, char *argv[]) {
for (uint32_t i = 0; i < iterations; i++) { for (uint32_t i = 0; i < iterations; i++) {
unified.start(); unified.start();
// The default template is simdjson::architecture::NATIVE. // The default template is simdjson::architecture::NATIVE.
bool isok = (stage_parser.stage1((const uint8_t *)p.data(), p.size(), parser, false) == simdjson::SUCCESS); bool isok = (parser.implementation->stage1((const uint8_t *)p.data(), p.size(), false) == simdjson::SUCCESS);
unified.end(results); unified.end(results);
cy1 += results[0]; cy1 += results[0];
cl1 += results[1]; cl1 += results[1];
unified.start(); unified.start();
isok = isok && (stage_parser.stage2((const uint8_t *)p.data(), p.size(), parser) == simdjson::SUCCESS); isok = isok && (parser.implementation->stage2(parser.doc) == simdjson::SUCCESS);
unified.end(results); unified.end(results);
cy2 += results[0]; cy2 += results[0];

View File

@ -4,6 +4,7 @@
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/dom/document.h" #include "simdjson/dom/document.h"
#include "simdjson/error.h" #include "simdjson/error.h"
#include "simdjson/internal/dom_parser_implementation.h"
#include "simdjson/internal/tape_ref.h" #include "simdjson/internal/tape_ref.h"
#include "simdjson/minify.h" #include "simdjson/minify.h"
#include "simdjson/padded_string.h" #include "simdjson/padded_string.h"
@ -14,22 +15,6 @@
namespace simdjson { namespace simdjson {
namespace internal {
// expectation: sizeof(scope_descriptor) = 64/8.
struct scope_descriptor {
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
uint32_t count; // how many elements in the scope
}; // struct scope_descriptor
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address;
#else
typedef char ret_address;
#endif
} // namespace internal
namespace dom { namespace dom {
class document_stream; class document_stream;
@ -67,14 +52,14 @@ public:
* *
* @param other The parser to take. Its capacity is zeroed. * @param other The parser to take. Its capacity is zeroed.
*/ */
parser(parser &&other) = default; really_inline parser(parser &&other) noexcept;
parser(const parser &) = delete; ///< @private Disallow copying parser(const parser &) = delete; ///< @private Disallow copying
/** /**
* Take another parser's buffers and state. * Take another parser's buffers and state.
* *
* @param other The parser to take. Its capacity is zeroed. * @param other The parser to take. Its capacity is zeroed.
*/ */
parser &operator=(parser &&other) = default; really_inline parser &operator=(parser &&other) noexcept;
parser &operator=(const parser &) = delete; ///< @private Disallow copying parser &operator=(const parser &) = delete; ///< @private Disallow copying
/** Deallocate the JSON parser. */ /** Deallocate the JSON parser. */
@ -334,7 +319,8 @@ public:
/** /**
* Set max_capacity. This is the largest document this parser can automatically support. * Set max_capacity. This is the largest document this parser can automatically support.
* *
* The parser may reallocate internal buffers as needed up to this amount. * The parser may reallocate internal buffers as needed up to this amount as documents are passed
* to it.
* *
* This call will not allocate or deallocate, even if capacity is currently above max_capacity. * This call will not allocate or deallocate, even if capacity is currently above max_capacity.
* *
@ -347,19 +333,8 @@ public:
/** @private Use simdjson_error instead */ /** @private Use simdjson_error instead */
using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
/** @private Next location to write to in the tape */ /** @private [for benchmarking access] The implementation to use */
uint32_t current_loc{0}; std::unique_ptr<internal::dom_parser_implementation> implementation{};
/** @private Number of structural indices passed from stage 1 to stage 2 */
uint32_t n_structural_indexes{0};
/** @private Structural indices passed from stage 1 to stage 2 */
std::unique_ptr<uint32_t[]> structural_indexes{};
/** @private Tape location of each open { or [ */
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
/** @private Return address of each open { or [ */
std::unique_ptr<internal::ret_address[]> ret_address{};
/** @private Use `if (parser.parse(...).error())` instead */ /** @private Use `if (parser.parse(...).error())` instead */
bool valid{false}; bool valid{false};
@ -399,20 +374,6 @@ private:
*/ */
size_t _max_capacity; size_t _max_capacity;
/**
* The maximum document length this parser supports.
*
* Buffers are large enough to handle any document up to this length.
*/
size_t _capacity{0};
/**
* The maximum depth (number of nested objects and arrays) supported by this parser.
*
* Defaults to DEFAULT_MAX_DEPTH.
*/
size_t _max_depth{0};
/** /**
* The loaded buffer (reused each time load() is called) * The loaded buffer (reused each time load() is called)
*/ */

View File

@ -2,6 +2,7 @@
#define SIMDJSON_IMPLEMENTATION_H #define SIMDJSON_IMPLEMENTATION_H
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/internal/dom_parser_implementation.h"
#include <optional> #include <optional>
#include <string> #include <string>
#include <atomic> #include <atomic>
@ -10,8 +11,8 @@
namespace simdjson { namespace simdjson {
namespace dom { namespace dom {
class parser; class document;
} } // namespace dom
/** /**
* An implementation of simdjson for a particular CPU architecture. * An implementation of simdjson for a particular CPU architecture.
@ -54,16 +55,19 @@ public:
/** /**
* @private For internal implementation use * @private For internal implementation use
* *
* Run a full document parse (ensure_capacity, stage1 and stage2). * const implementation *impl = simdjson::active_implementation;
* cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
* *
* Overridden by each implementation. * @param capacity The largest document that will be passed to the parser.
* * @param max_depth The maximum JSON object/array nesting this parser is expected to handle.
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param dst The place to put the resulting parser implementation.
* @param len the length of the json document. * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
* @return the error code, or SUCCESS if there was no error.
*/ */
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0; virtual error_code create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation> &dst
) const noexcept = 0;
/** /**
* @private For internal implementation use * @private For internal implementation use
@ -80,50 +84,6 @@ public:
*/ */
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
/**
* @private For internal implementation use
*
* Stage 1 of the document parser.
*
* Overridden by each implementation.
*
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
* @param len the length of the json document.
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
* @param streaming whether this is being called by parser::parse_many.
* @return the error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) const noexcept = 0;
/**
* @private For internal implementation use
*
* Stage 2 of the document parser.
*
* Overridden by each implementation.
*
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
* @param len the length of the json document.
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
* @return the error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0;
/**
* @private For internal implementation use
*
* Stage 2 of the document parser for parser::parse_many.
*
* Overridden by each implementation.
*
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
* @param len the length of the json document.
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
* @param next_json the next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time.
* @return the error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again.
*/
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) const noexcept = 0;
protected: protected:
/** @private Construct an implementation with the given name and description. For subclasses. */ /** @private Construct an implementation with the given name and description. For subclasses. */
really_inline implementation( really_inline implementation(

View File

@ -34,18 +34,18 @@ namespace internal {
* */ * */
inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) { inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) {
// this function can be generally useful // this function can be generally useful
if (parser.n_structural_indexes == 0) if (parser.implementation->n_structural_indexes == 0)
return 0; return 0;
auto last_i = parser.n_structural_indexes - 1; auto last_i = parser.implementation->n_structural_indexes - 1;
if (parser.structural_indexes[last_i] == size) { if (parser.implementation->structural_indexes[last_i] == size) {
if (last_i == 0) if (last_i == 0)
return 0; return 0;
last_i = parser.n_structural_indexes - 2; last_i = parser.implementation->n_structural_indexes - 2;
} }
auto arr_cnt = 0; auto arr_cnt = 0;
auto obj_cnt = 0; auto obj_cnt = 0;
for (auto i = last_i; i > 0; i--) { for (auto i = last_i; i > 0; i--) {
auto idxb = parser.structural_indexes[i]; auto idxb = parser.implementation->structural_indexes[i];
switch (buf[idxb]) { switch (buf[idxb]) {
case ':': case ':':
case ',': case ',':
@ -63,7 +63,7 @@ inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const do
arr_cnt++; arr_cnt++;
break; break;
} }
auto idxa = parser.structural_indexes[i - 1]; auto idxa = parser.implementation->structural_indexes[i - 1];
switch (buf[idxa]) { switch (buf[idxa]) {
case '{': case '{':
case '[': case '[':
@ -172,17 +172,17 @@ inline error_code document_stream::json_parse() noexcept {
if (_batch_size == 0) { if (_batch_size == 0) {
return simdjson::UTF8_ERROR; return simdjson::UTF8_ERROR;
} }
auto stage1_is_ok = error_code(simdjson::active_implementation->stage1(buf(), _batch_size, parser, true)); auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, true));
if (stage1_is_ok != simdjson::SUCCESS) { if (stage1_is_ok != simdjson::SUCCESS) {
return stage1_is_ok; return stage1_is_ok;
} }
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser); uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
if (last_index == 0) { if (last_index == 0) {
if (parser.n_structural_indexes == 0) { if (parser.implementation->n_structural_indexes == 0) {
return simdjson::EMPTY; return simdjson::EMPTY;
} }
} else { } else {
parser.n_structural_indexes = last_index + 1; parser.implementation->n_structural_indexes = last_index + 1;
} }
} }
// the second thread is running or done. // the second thread is running or done.
@ -191,15 +191,15 @@ inline error_code document_stream::json_parse() noexcept {
if (stage1_is_ok_thread != simdjson::SUCCESS) { if (stage1_is_ok_thread != simdjson::SUCCESS) {
return stage1_is_ok_thread; return stage1_is_ok_thread;
} }
std::swap(parser.structural_indexes, parser_thread.structural_indexes); std::swap(parser.implementation->structural_indexes, parser_thread.implementation->structural_indexes);
parser.n_structural_indexes = parser_thread.n_structural_indexes; parser.implementation->n_structural_indexes = parser_thread.implementation->n_structural_indexes;
advance(last_json_buffer_loc); advance(last_json_buffer_loc);
n_bytes_parsed += last_json_buffer_loc; n_bytes_parsed += last_json_buffer_loc;
} }
// let us decide whether we will start a new thread // let us decide whether we will start a new thread
if (remaining() - _batch_size > 0) { if (remaining() - _batch_size > 0) {
last_json_buffer_loc = last_json_buffer_loc =
parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)]; parser.implementation->structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc); _batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
if (_batch_size > 0) { if (_batch_size > 0) {
_batch_size = internal::trimmed_length_safe_utf8( _batch_size = internal::trimmed_length_safe_utf8(
@ -214,22 +214,22 @@ inline error_code document_stream::json_parse() noexcept {
// this->stage1_is_ok_thread // this->stage1_is_ok_thread
// there is only one thread that may write to this value // there is only one thread that may write to this value
stage_1_thread = std::thread([this, b, bs] { stage_1_thread = std::thread([this, b, bs] {
this->stage1_is_ok_thread = error_code(simdjson::active_implementation->stage1(b, bs, this->parser_thread, true)); this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, true));
}); });
} }
} }
next_json = 0; next_json = 0;
load_next_batch = false; load_next_batch = false;
} // load_next_batch } // load_next_batch
error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json); error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
if (res == simdjson::SUCCESS_AND_HAS_MORE) { if (res == simdjson::SUCCESS_AND_HAS_MORE) {
n_parsed_docs++; n_parsed_docs++;
current_buffer_loc = parser.structural_indexes[next_json]; current_buffer_loc = parser.implementation->structural_indexes[next_json];
load_next_batch = (current_buffer_loc == last_json_buffer_loc); load_next_batch = (current_buffer_loc == last_json_buffer_loc);
} else if (res == simdjson::SUCCESS) { } else if (res == simdjson::SUCCESS) {
n_parsed_docs++; n_parsed_docs++;
if (remaining() > _batch_size) { if (remaining() > _batch_size) {
current_buffer_loc = parser.structural_indexes[next_json - 1]; current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
load_next_batch = true; load_next_batch = true;
res = simdjson::SUCCESS_AND_HAS_MORE; res = simdjson::SUCCESS_AND_HAS_MORE;
} }
@ -249,28 +249,28 @@ inline error_code document_stream::json_parse() noexcept {
n_bytes_parsed += current_buffer_loc; n_bytes_parsed += current_buffer_loc;
_batch_size = (std::min)(_batch_size, remaining()); _batch_size = (std::min)(_batch_size, remaining());
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size); _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true); auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, true);
if (stage1_is_ok != simdjson::SUCCESS) { if (stage1_is_ok != simdjson::SUCCESS) {
return stage1_is_ok; return stage1_is_ok;
} }
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser); uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
if (last_index == 0) { if (last_index == 0) {
if (parser.n_structural_indexes == 0) { if (parser.implementation->n_structural_indexes == 0) {
return EMPTY; return EMPTY;
} }
} else { } else {
parser.n_structural_indexes = last_index + 1; parser.implementation->n_structural_indexes = last_index + 1;
} }
load_next_batch = false; load_next_batch = false;
} // load_next_batch } // load_next_batch
error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json); error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) { if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
n_parsed_docs++; n_parsed_docs++;
current_buffer_loc = parser.structural_indexes[next_json]; current_buffer_loc = parser.implementation->structural_indexes[next_json];
} else if (res == simdjson::SUCCESS) { } else if (res == simdjson::SUCCESS) {
n_parsed_docs++; n_parsed_docs++;
if (remaining() > _batch_size) { if (remaining() > _batch_size) {
current_buffer_loc = parser.structural_indexes[next_json - 1]; current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
next_json = 1; next_json = 1;
load_next_batch = true; load_next_batch = true;
res = simdjson::SUCCESS_AND_HAS_MORE; res = simdjson::SUCCESS_AND_HAS_MORE;

View File

@ -17,8 +17,11 @@ namespace dom {
// //
really_inline parser::parser(size_t max_capacity) noexcept really_inline parser::parser(size_t max_capacity) noexcept
: _max_capacity{max_capacity}, : _max_capacity{max_capacity},
loaded_bytes(nullptr, &aligned_free_char) loaded_bytes(nullptr, &aligned_free_char) {
{} }
really_inline parser::parser(parser &&other) noexcept = default;
really_inline parser &parser::operator=(parser &&other) noexcept = default;
inline bool parser::is_valid() const noexcept { return valid; } inline bool parser::is_valid() const noexcept { return valid; }
inline int parser::get_error_code() const noexcept { return error; } inline int parser::get_error_code() const noexcept { return error; }
inline std::string parser::get_error_message() const noexcept { return error_message(error); } inline std::string parser::get_error_message() const noexcept { return error_message(error); }
@ -101,15 +104,12 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
memcpy((void *)buf, tmp_buf, len); memcpy((void *)buf, tmp_buf, len);
} }
code = simdjson::active_implementation->parse(buf, len, *this); code = implementation->parse(buf, len, doc);
if (realloc_if_needed) { if (realloc_if_needed) {
aligned_free((void *)buf); // must free before we exit aligned_free((void *)buf); // must free before we exit
} }
if (code) { return code; } if (code) { return code; }
// We're indicating validity via the simdjson_result<element>, so set the parse state back to invalid
valid = false;
error = UNINITIALIZED;
return doc.root(); return doc.root();
} }
really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
@ -136,81 +136,30 @@ inline document_stream parser::parse_many(const padded_string &s, size_t batch_s
} }
really_inline size_t parser::capacity() const noexcept { really_inline size_t parser::capacity() const noexcept {
return _capacity; return implementation ? implementation->capacity() : 0;
} }
really_inline size_t parser::max_capacity() const noexcept { really_inline size_t parser::max_capacity() const noexcept {
return _max_capacity; return _max_capacity;
} }
really_inline size_t parser::max_depth() const noexcept { really_inline size_t parser::max_depth() const noexcept {
return _max_depth; return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
} }
WARN_UNUSED WARN_UNUSED
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
// //
// If capacity has changed, reallocate capacity-based buffers // Reallocate implementation and document if needed
// //
if (_capacity != capacity) { error_code err;
// Set capacity to 0 until we finish, in case there's an error if (implementation) {
_capacity = 0; err = implementation->allocate(capacity, max_depth);
} else {
// err = simdjson::active_implementation->create_dom_parser_implementation(capacity, max_depth, implementation);
// Reallocate the document
//
error_code err = doc.allocate(capacity);
if (err) { return err; }
//
// Don't allocate 0 bytes, just return.
//
if (capacity == 0) {
structural_indexes.reset();
return SUCCESS;
}
//
// Initialize stage 1 output
//
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc
if (!structural_indexes) {
return MEMALLOC;
}
_capacity = capacity;
//
// If capacity hasn't changed, but the document was taken, allocate a new document.
//
} else if (!doc.tape) {
error_code err = doc.allocate(capacity);
if (err) { return err; }
} }
if (err) { return err; }
// if (implementation->capacity() != capacity || !doc.tape) {
// If max_depth has changed, reallocate those buffers return doc.allocate(capacity);
//
if (max_depth != _max_depth) {
_max_depth = 0;
if (max_depth == 0) {
ret_address.reset();
containing_scope.reset();
return SUCCESS;
}
//
// Initialize stage 2 state
//
containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]); // TODO realloc
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
if (!ret_address || !containing_scope) {
// Could not allocate memory
return MEMALLOC;
}
_max_depth = max_depth;
} }
return SUCCESS; return SUCCESS;
} }
@ -220,24 +169,24 @@ inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcep
return !allocate(capacity, max_depth); return !allocate(capacity, max_depth);
} }
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
_max_capacity = max_capacity;
}
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
// If we don't have enough capacity, (try to) automatically bump it. // If we don't have enough capacity, (try to) automatically bump it.
// If the document was taken, reallocate that too. // If the document was taken, reallocate that too.
// Both in one if statement to minimize unlikely branching. // Both in one if statement to minimize unlikely branching.
if (unlikely(desired_capacity > capacity() || !doc.tape)) { if (unlikely(capacity() < desired_capacity || !doc.tape)) {
if (desired_capacity > max_capacity()) { if (desired_capacity > max_capacity()) {
return error = CAPACITY; return error = CAPACITY;
} }
return allocate(desired_capacity, _max_depth > 0 ? _max_depth : DEFAULT_MAX_DEPTH); return allocate(desired_capacity, max_depth());
} }
return SUCCESS; return SUCCESS;
} }
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
_max_capacity = max_capacity;
}
} // namespace dom } // namespace dom
} // namespace simdjson } // namespace simdjson

View File

@ -0,0 +1,185 @@
#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
#include "simdjson/common_defs.h"
#include "simdjson/error.h"
#include <memory>
namespace simdjson {
namespace dom {
class document;
} // namespace dom
namespace internal {
/**
* An implementation of simdjson's DOM parser for a particular CPU architecture.
*
* This class is expected to be accessed only by pointer, and never move in memory (though the
* pointer can move).
*/
class dom_parser_implementation {
public:
/**
* @private For internal implementation use
*
* Run a full JSON parse on a single document (stage1 + stage2).
*
* Guaranteed only to be called when capacity > document length.
*
* Overridden by each implementation.
*
* @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
* @param len The length of the json document.
* @return The error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
/**
* @private For internal implementation use
*
* Stage 1 of the document parser.
*
* Guaranteed only to be called when capacity > document length.
*
* Overridden by each implementation.
*
* @param buf The json document to parse.
* @param len The length of the json document.
* @param streaming Whether this is being called by parser::parse_many.
* @return The error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0;
/**
* @private For internal implementation use
*
* Stage 2 of the document parser.
*
* Called after stage1().
*
* Overridden by each implementation.
*
* @param doc The document to output to.
* @return The error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code stage2(dom::document &doc) noexcept = 0;
/**
* @private For internal implementation use
*
* Stage 2 of the document parser for parser::parse_many.
*
* Guaranteed only to be called after stage1(), with buf and len being a subset of the total stage1 buf/len.
* Overridden by each implementation.
*
* @param buf The json document to parse.
* @param len The length of the json document.
* @param doc The document to output to.
* @param next_json The next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time.
* @return The error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again.
*/
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept = 0;
/**
* Change the capacity of this parser.
*
* Generally used for reallocation.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth.
* @return The error code, or SUCCESS if there was no error.
*/
virtual error_code set_capacity(size_t capacity) noexcept = 0;
/**
* Change the max depth of this parser.
*
* Generally used for reallocation.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth.
* @return The error code, or SUCCESS if there was no error.
*/
virtual error_code set_max_depth(size_t max_depth) noexcept = 0;
/**
* Deallocate this parser.
*/
virtual ~dom_parser_implementation() = default;
/** Next location to write to in the tape */
uint32_t current_loc{0};
/** Number of structural indices passed from stage 1 to stage 2 */
uint32_t n_structural_indexes{0};
/** Structural indices passed from stage 1 to stage 2 */
std::unique_ptr<uint32_t[]> structural_indexes{};
/**
* The largest document this parser can support without reallocating.
*
* @return Current capacity, in bytes.
*/
really_inline size_t capacity() const noexcept;
/**
* The maximum level of nested object and arrays supported by this parser.
*
* @return Maximum depth, in bytes.
*/
really_inline size_t max_depth() const noexcept;
/**
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
* and `max_depth` depth.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return The error, if there is one.
*/
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
protected:
/**
* The maximum document length this parser supports.
*
* Buffers are large enough to handle any document up to this length.
*/
size_t _capacity{0};
/**
* The maximum depth (number of nested objects and arrays) supported by this parser.
*
* Defaults to DEFAULT_MAX_DEPTH.
*/
size_t _max_depth{0};
}; // class dom_parser_implementation
really_inline size_t dom_parser_implementation::capacity() const noexcept {
return _capacity;
}
really_inline size_t dom_parser_implementation::max_depth() const noexcept {
return _max_depth;
}
WARN_UNUSED
inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
if (this->max_depth() != max_depth) {
error_code err = set_max_depth(max_depth);
if (err) { return err; }
}
if (_capacity != capacity) {
error_code err = set_capacity(capacity);
if (err) { return err; }
}
return SUCCESS;
}
} // namespace internal
} // namespace simdjson
#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H

View File

@ -1,2 +0,0 @@
Try :
c++ -O3 -std=c++17 -pthread -o amalgamate_demo amalgamate_demo.cpp && ./amalgamate_demo ../jsonexamples/twitter.json ../jsonexamples/amazon_cellphones.ndjson

View File

@ -1,42 +0,0 @@
/* auto-generated on Thu 21 May 2020 14:01:15 EDT. Do not edit! */
#include <iostream>
#include "simdjson.h"
#include "simdjson.cpp"
int main(int argc, char *argv[]) {
if(argc < 2) {
std::cerr << "Please specify at least one file name. " << std::endl;
}
const char * filename = argv[1];
simdjson::dom::parser parser;
simdjson::error_code error;
UNUSED simdjson::dom::element elem;
parser.load(filename).tie(elem, error); // do the parsing
if (error) {
std::cout << "parse failed" << std::endl;
std::cout << "error code: " << error << std::endl;
std::cout << error << std::endl;
return EXIT_FAILURE;
} else {
std::cout << "parse valid" << std::endl;
}
if(argc == 2) {
return EXIT_SUCCESS;
}
// parse_many
const char * filename2 = argv[2];
for (auto result : parser.load_many(filename2)) {
error = result.error();
}
if (error) {
std::cout << "parse_many failed" << std::endl;
std::cout << "error code: " << error << std::endl;
std::cout << error << std::endl;
return EXIT_FAILURE;
} else {
std::cout << "parse_many valid" << std::endl;
}
return EXIT_SUCCESS;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,13 @@
#include "simdjson.h" #include "simdjson.h"
#include "arm64/implementation.h"
#include "arm64/dom_parser_implementation.h"
//
// Stage 1
//
#include "arm64/bitmask.h" #include "arm64/bitmask.h"
#include "arm64/simd.h" #include "arm64/simd.h"
#include "arm64/bitmanipulation.h" #include "arm64/bitmanipulation.h"
#include "arm64/implementation.h"
namespace simdjson { namespace simdjson {
namespace arm64 { namespace arm64 {
@ -79,8 +84,35 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
#include "generic/stage1/utf8_lookup2_algorithm.h" #include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h" #include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept { WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
return arm64::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming); this->buf = _buf;
this->len = _len;
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
}
} // namespace arm64
} // namespace simdjson
//
// Stage 2
//
#include "arm64/stringparsing.h"
#include "arm64/numberparsing.h"
namespace simdjson {
namespace arm64 {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
} }
} // namespace arm64 } // namespace arm64

View File

@ -0,0 +1,15 @@
#ifndef SIMDJSON_ARM64_DOM_PARSER_IMPLEMENTATION_H
#define SIMDJSON_ARM64_DOM_PARSER_IMPLEMENTATION_H
#include "simdjson.h"
#include "isadetection.h"
namespace simdjson {
namespace arm64 {
#include "generic/dom_parser_implementation.h"
} // namespace arm64
} // namespace simdjson
#endif // SIMDJSON_ARM64_DOM_PARSER_IMPLEMENTATION_H

View File

@ -0,0 +1,25 @@
#include "simdjson.h"
#include "arm64/implementation.h"
#include "arm64/dom_parser_implementation.h"
TARGET_HASWELL
namespace simdjson {
namespace arm64 {
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept {
dst.reset( new (std::nothrow) dom_parser_implementation() );
if (!dst) { return MEMALLOC; }
dst->set_capacity(capacity);
dst->set_max_depth(max_depth);
return SUCCESS;
}
} // namespace arm64
} // namespace simdjson
UNTARGET_REGION

View File

@ -12,11 +12,12 @@ using namespace simdjson::dom;
class implementation final : public simdjson::implementation { class implementation final : public simdjson::implementation {
public: public:
really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {} really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {}
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final; WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity,
size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
}; };
} // namespace arm64 } // namespace arm64

View File

@ -1,21 +0,0 @@
#ifndef SIMDJSON_ARM64_STAGE2_H
#define SIMDJSON_ARM64_STAGE2_H
#include "simdjson.h"
#include "arm64/implementation.h"
#include "arm64/stringparsing.h"
#include "arm64/numberparsing.h"
namespace simdjson {
namespace arm64 {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace arm64
} // namespace simdjson
#endif // SIMDJSON_ARM64_STAGE2_H

View File

@ -1,6 +1,10 @@
#include "simdjson.h" #include "simdjson.h"
#include "fallback/implementation.h" #include "fallback/implementation.h"
#include "fallback/dom_parser_implementation.h"
//
// Stage 1
//
namespace simdjson { namespace simdjson {
namespace fallback { namespace fallback {
namespace stage1 { namespace stage1 {
@ -8,8 +12,13 @@ namespace stage1 {
class structural_scanner { class structural_scanner {
public: public:
really_inline structural_scanner(const uint8_t *_buf, uint32_t _len, parser &_doc_parser, bool _streaming) really_inline structural_scanner(dom_parser_implementation &_parser, bool _streaming)
: buf{_buf}, next_structural_index{_doc_parser.structural_indexes.get()}, doc_parser{_doc_parser}, idx{0}, len{_len}, error{SUCCESS}, streaming{_streaming} {} : buf{_parser.buf},
next_structural_index{_parser.structural_indexes.get()},
parser{_parser},
len{static_cast<uint32_t>(_parser.len)},
streaming{_streaming} {
}
really_inline void add_structural() { really_inline void add_structural() {
*next_structural_index = idx; *next_structural_index = idx;
@ -119,33 +128,32 @@ really_inline error_code scan() {
break; break;
} }
} }
if (unlikely(next_structural_index == doc_parser.structural_indexes.get())) { if (unlikely(next_structural_index == parser.structural_indexes.get())) {
return EMPTY; return EMPTY;
} }
*next_structural_index = len; *next_structural_index = len;
next_structural_index++; next_structural_index++;
doc_parser.n_structural_indexes = uint32_t(next_structural_index - doc_parser.structural_indexes.get()); parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get());
return error; return error;
} }
private: private:
const uint8_t *buf; const uint8_t *buf;
uint32_t *next_structural_index; uint32_t *next_structural_index;
parser &doc_parser; dom_parser_implementation &parser;
uint32_t idx;
uint32_t len; uint32_t len;
error_code error; uint32_t idx{0};
error_code error{SUCCESS};
bool streaming; bool streaming;
}; // structural_scanner }; // structural_scanner
} // namespace stage1 } // namespace stage1
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept { WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
if (unlikely(len > parser.capacity())) { this->buf = _buf;
return CAPACITY; this->len = _len;
} stage1::structural_scanner scanner(*this, streaming);
stage1::structural_scanner scanner(buf, uint32_t(len), parser, streaming);
return scanner.scan(); return scanner.scan();
} }
@ -207,3 +215,27 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
} // namespace fallback } // namespace fallback
} // namespace simdjson } // namespace simdjson
//
// Stage 2
//
#include "fallback/stringparsing.h"
#include "fallback/numberparsing.h"
namespace simdjson {
namespace fallback {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
}
} // namespace fallback
} // namespace simdjson

View File

@ -0,0 +1,15 @@
#ifndef SIMDJSON_FALLBACK_DOM_PARSER_IMPLEMENTATION_H
#define SIMDJSON_FALLBACK_DOM_PARSER_IMPLEMENTATION_H
#include "simdjson.h"
#include "isadetection.h"
namespace simdjson {
namespace fallback {
#include "generic/dom_parser_implementation.h"
} // namespace fallback
} // namespace simdjson
#endif // SIMDJSON_FALLBACK_DOM_PARSER_IMPLEMENTATION_H

View File

@ -0,0 +1,25 @@
#include "simdjson.h"
#include "fallback/implementation.h"
#include "fallback/dom_parser_implementation.h"
TARGET_HASWELL
namespace simdjson {
namespace fallback {
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept {
dst.reset( new (std::nothrow) dom_parser_implementation() );
if (!dst) { return MEMALLOC; }
dst->set_capacity(capacity);
dst->set_max_depth(max_depth);
return SUCCESS;
}
} // namespace fallback
} // namespace simdjson
UNTARGET_REGION

View File

@ -16,11 +16,12 @@ public:
"Generic fallback implementation", "Generic fallback implementation",
0 0
) {} ) {}
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final; WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity,
size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
}; };
} // namespace fallback } // namespace fallback

View File

@ -1,17 +0,0 @@
#include "simdjson.h"
#include "fallback/implementation.h"
#include "fallback/stringparsing.h"
#include "fallback/numberparsing.h"
namespace simdjson {
namespace fallback {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace fallback
} // namespace simdjson

View File

@ -0,0 +1,58 @@
// expectation: sizeof(scope_descriptor) = 64/8.
struct scope_descriptor {
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
uint32_t count; // how many elements in the scope
}; // struct scope_descriptor
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address_t;
#else
typedef char ret_address_t;
#endif
class dom_parser_implementation final : public internal::dom_parser_implementation {
public:
/** Tape location of each open { or [ */
std::unique_ptr<scope_descriptor[]> containing_scope{};
/** Return address of each open { or [ */
std::unique_ptr<ret_address_t[]> ret_address{};
/** Buffer passed to stage 1 */
const uint8_t *buf{};
/** Length passed to stage 1 */
size_t len{0};
/** Document passed to stage 2 */
dom::document *doc{};
/** Error code (TODO remove, this is not even used, we just set it so the g++ optimizer doesn't get confused) */
error_code error{UNINITIALIZED};
really_inline dom_parser_implementation();
dom_parser_implementation(const dom_parser_implementation &) = delete;
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
};
#include "generic/stage1/allocate.h"
#include "generic/stage2/allocate.h"
really_inline dom_parser_implementation::dom_parser_implementation() {}
// Leaving these here so they can be inlined if so desired
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
error_code err = stage1::allocate::set_capacity(*this, capacity);
if (err) { _capacity = 0; return err; }
_capacity = capacity;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
if (err) { _max_depth = 0; return err; }
_max_depth = max_depth;
return SUCCESS;
}

View File

@ -0,0 +1,15 @@
namespace stage1 {
namespace allocate {
//
// Allocates stage 1 internal state and outputs in the parser
//
really_inline error_code set_capacity(internal::dom_parser_implementation &parser, size_t capacity) {
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
parser.structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
if (!parser.structural_indexes) { return MEMALLOC; }
return SUCCESS;
}
} // namespace allocate
} // namespace stage1

View File

@ -58,7 +58,7 @@ public:
class json_structural_indexer { class json_structural_indexer {
public: public:
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
static error_code index(const uint8_t *buf, size_t len, parser &parser, bool streaming) noexcept; static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept;
private: private:
really_inline json_structural_indexer(uint32_t *structural_indexes) really_inline json_structural_indexer(uint32_t *structural_indexes)
@ -66,7 +66,7 @@ private:
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept; really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx); really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
really_inline error_code finish(parser &parser, size_t idx, size_t len, bool streaming); really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming);
json_scanner scanner{}; json_scanner scanner{};
utf8_checker checker{}; utf8_checker checker{};
@ -83,7 +83,7 @@ really_inline void json_structural_indexer::next(simd::simd8x64<uint8_t> in, jso
unescaped_chars_error |= block.non_quote_inside_string(unescaped); unescaped_chars_error |= block.non_quote_inside_string(unescaped);
} }
really_inline error_code json_structural_indexer::finish(parser &parser, size_t idx, size_t len, bool streaming) { really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming) {
// Write out the final iteration's structurals // Write out the final iteration's structurals
indexer.write(uint32_t(idx-64), prev_structurals); indexer.write(uint32_t(idx-64), prev_structurals);
@ -155,7 +155,7 @@ really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_b
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings, // The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8. // you may want to call on a function like trimmed_length_safe_utf8.
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, parser &parser, bool streaming) noexcept { error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
if (unlikely(len > parser.capacity())) { return CAPACITY; } if (unlikely(len > parser.capacity())) { return CAPACITY; }
buf_block_reader<STEP_SIZE> reader(buf, len); buf_block_reader<STEP_SIZE> reader(buf, len);

View File

@ -0,0 +1,18 @@
namespace stage2 {
namespace allocate {
//
// Allocates stage 2 internal state and outputs in the parser
//
really_inline error_code set_max_depth(dom_parser_implementation &parser, size_t max_depth) {
parser.containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]);
parser.ret_address.reset(new (std::nothrow) ret_address_t[max_depth]);
if (!parser.ret_address || !parser.containing_scope) {
return MEMALLOC;
}
return SUCCESS;
}
} // namespace allocate
} // namespace stage2

View File

@ -1,10 +1,10 @@
namespace stage2 { namespace stage2 {
struct streaming_structural_parser: structural_parser { struct streaming_structural_parser: structural_parser {
really_inline streaming_structural_parser(const uint8_t *buf, size_t len, parser &_doc_parser, uint32_t next_structural) : structural_parser(buf, len, _doc_parser, next_structural) {} really_inline streaming_structural_parser(dom_parser_implementation &_parser, uint32_t next_structural) : structural_parser(_parser, next_structural) {}
// override to add streaming // override to add streaming
WARN_UNUSED really_inline error_code start(UNUSED size_t len, ret_address finish_parser) { WARN_UNUSED really_inline error_code start(ret_address_t finish_parser) {
log_start(); log_start();
init(); // sets is_valid to false init(); // sets is_valid to false
// Capacity ain't no thang for streaming, so we don't check it. // Capacity ain't no thang for streaming, so we don't check it.
@ -12,29 +12,29 @@ struct streaming_structural_parser: structural_parser {
advance_char(); advance_char();
// Push the root scope (there is always at least one scope) // Push the root scope (there is always at least one scope)
if (start_document(finish_parser)) { if (start_document(finish_parser)) {
return on_error(DEPTH_ERROR); return parser.error = DEPTH_ERROR;
} }
return SUCCESS; return SUCCESS;
} }
// override to add streaming // override to add streaming
WARN_UNUSED really_inline error_code finish() { WARN_UNUSED really_inline error_code finish() {
if ( structurals.past_end(doc_parser.n_structural_indexes) ) { if ( structurals.past_end(parser.n_structural_indexes) ) {
log_error("IMPOSSIBLE: past the end of the JSON!"); log_error("IMPOSSIBLE: past the end of the JSON!");
return on_error(TAPE_ERROR); return parser.error = TAPE_ERROR;
} }
end_document(); end_document();
if (depth != 0) { if (depth != 0) {
log_error("Unclosed objects or arrays!"); log_error("Unclosed objects or arrays!");
return on_error(TAPE_ERROR); return parser.error = TAPE_ERROR;
} }
if (doc_parser.containing_scope[depth].tape_index != 0) { if (parser.containing_scope[depth].tape_index != 0) {
log_error("IMPOSSIBLE: root scope tape index did not start at 0!"); log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
return on_error(TAPE_ERROR); return parser.error = TAPE_ERROR;
} }
bool finished = structurals.at_end(doc_parser.n_structural_indexes); bool finished = structurals.at_end(parser.n_structural_indexes);
if (!finished) { log_value("(and has more)"); } if (!finished) { log_value("(and has more)"); }
return on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE); return finished ? SUCCESS : SUCCESS_AND_HAS_MORE;
} }
}; };
@ -44,10 +44,13 @@ struct streaming_structural_parser: structural_parser {
* The JSON is parsed to a tape, see the accompanying tape.md file * The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation. * for documentation.
***********/ ***********/
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, parser &doc_parser, size_t &next_json) const noexcept { WARN_UNUSED error_code dom_parser_implementation::stage2(const uint8_t *_buf, size_t _len, dom::document &_doc, size_t &next_json) noexcept {
this->buf = _buf;
this->len = _len;
this->doc = &_doc;
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES(); static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
stage2::streaming_structural_parser parser(buf, len, doc_parser, uint32_t(next_json)); stage2::streaming_structural_parser parser(*this, uint32_t(next_json));
error_code result = parser.start(len, addresses.finish); error_code result = parser.start(addresses.finish);
if (result) { return result; } if (result) { return result; }
// //
// Read first value // Read first value
@ -123,7 +126,7 @@ object_continue:
} }
scope_end: scope_end:
CONTINUE( parser.doc_parser.ret_address[parser.depth] ); CONTINUE( parser.parser.ret_address[parser.depth] );
// //
// Array parser parsers // Array parser parsers

View File

@ -5,8 +5,6 @@
namespace stage2 { namespace stage2 {
using internal::ret_address;
#ifdef SIMDJSON_USE_COMPUTED_GOTO #ifdef SIMDJSON_USE_COMPUTED_GOTO
#define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue } #define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
#define GOTO(address) { goto *(address); } #define GOTO(address) { goto *(address); }
@ -36,76 +34,74 @@ using internal::ret_address;
#endif // SIMDJSON_USE_COMPUTED_GOTO #endif // SIMDJSON_USE_COMPUTED_GOTO
struct unified_machine_addresses { struct unified_machine_addresses {
ret_address array_begin; ret_address_t array_begin;
ret_address array_continue; ret_address_t array_continue;
ret_address error; ret_address_t error;
ret_address finish; ret_address_t finish;
ret_address object_begin; ret_address_t object_begin;
ret_address object_continue; ret_address_t object_continue;
}; };
#undef FAIL_IF #undef FAIL_IF
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } } #define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
struct number_writer { struct number_writer {
parser &doc_parser; dom_parser_implementation &parser;
really_inline void write_s64(int64_t value) noexcept { really_inline void write_s64(int64_t value) noexcept {
append_tape(0, internal::tape_type::INT64); append_tape(0, internal::tape_type::INT64);
std::memcpy(&doc_parser.doc.tape[doc_parser.current_loc], &value, sizeof(value)); std::memcpy(&parser.doc->tape[parser.current_loc], &value, sizeof(value));
++doc_parser.current_loc; ++parser.current_loc;
} }
really_inline void write_u64(uint64_t value) noexcept { really_inline void write_u64(uint64_t value) noexcept {
append_tape(0, internal::tape_type::UINT64); append_tape(0, internal::tape_type::UINT64);
doc_parser.doc.tape[doc_parser.current_loc++] = value; parser.doc->tape[parser.current_loc++] = value;
} }
really_inline void write_double(double value) noexcept { really_inline void write_double(double value) noexcept {
append_tape(0, internal::tape_type::DOUBLE); append_tape(0, internal::tape_type::DOUBLE);
static_assert(sizeof(value) == sizeof(doc_parser.doc.tape[doc_parser.current_loc]), "mismatch size"); static_assert(sizeof(value) == sizeof(parser.doc->tape[parser.current_loc]), "mismatch size");
memcpy(&doc_parser.doc.tape[doc_parser.current_loc++], &value, sizeof(double)); memcpy(&parser.doc->tape[parser.current_loc++], &value, sizeof(double));
// doc.tape[doc.current_loc++] = *((uint64_t *)&d); // doc->tape[doc->current_loc++] = *((uint64_t *)&d);
} }
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept { really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56); parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
} }
}; // struct number_writer }; // struct number_writer
struct structural_parser { struct structural_parser {
structural_iterator structurals; structural_iterator structurals;
parser &doc_parser; dom_parser_implementation &parser;
/** Next write location in the string buf for stage 2 parsing */ /** Next write location in the string buf for stage 2 parsing */
uint8_t *current_string_buf_loc{}; uint8_t *current_string_buf_loc{};
uint32_t depth; uint32_t depth;
really_inline structural_parser( really_inline structural_parser(
const uint8_t *buf, dom_parser_implementation &_parser,
size_t len,
parser &_doc_parser,
uint32_t next_structural = 0 uint32_t next_structural = 0
) : structurals(buf, len, _doc_parser.structural_indexes.get(), next_structural), doc_parser{_doc_parser}, depth{0} {} ) : structurals(_parser.buf, _parser.len, _parser.structural_indexes.get(), next_structural), parser{_parser}, depth{0} {}
WARN_UNUSED really_inline bool start_scope(ret_address continue_state) { WARN_UNUSED really_inline bool start_scope(ret_address_t continue_state) {
doc_parser.containing_scope[depth].tape_index = doc_parser.current_loc; parser.containing_scope[depth].tape_index = parser.current_loc;
doc_parser.containing_scope[depth].count = 0; parser.containing_scope[depth].count = 0;
doc_parser.current_loc++; // We don't actually *write* the start element until the end. parser.current_loc++; // We don't actually *write* the start element until the end.
doc_parser.ret_address[depth] = continue_state; parser.ret_address[depth] = continue_state;
depth++; depth++;
bool exceeded_max_depth = depth >= doc_parser.max_depth(); bool exceeded_max_depth = depth >= parser.max_depth();
if (exceeded_max_depth) { log_error("Exceeded max depth!"); } if (exceeded_max_depth) { log_error("Exceeded max depth!"); }
return exceeded_max_depth; return exceeded_max_depth;
} }
WARN_UNUSED really_inline bool start_document(ret_address continue_state) { WARN_UNUSED really_inline bool start_document(ret_address_t continue_state) {
log_start_value("document"); log_start_value("document");
return start_scope(continue_state); return start_scope(continue_state);
} }
WARN_UNUSED really_inline bool start_object(ret_address continue_state) { WARN_UNUSED really_inline bool start_object(ret_address_t continue_state) {
log_start_value("object"); log_start_value("object");
return start_scope(continue_state); return start_scope(continue_state);
} }
WARN_UNUSED really_inline bool start_array(ret_address continue_state) { WARN_UNUSED really_inline bool start_array(ret_address_t continue_state) {
log_start_value("array"); log_start_value("array");
return start_scope(continue_state); return start_scope(continue_state);
} }
@ -113,16 +109,16 @@ struct structural_parser {
// this function is responsible for annotating the start of the scope // this function is responsible for annotating the start of the scope
really_inline void end_scope(internal::tape_type start, internal::tape_type end) noexcept { really_inline void end_scope(internal::tape_type start, internal::tape_type end) noexcept {
depth--; depth--;
// write our doc.tape location to the header scope // write our doc->tape location to the header scope
// The root scope gets written *at* the previous location. // The root scope gets written *at* the previous location.
append_tape(doc_parser.containing_scope[depth].tape_index, end); append_tape(parser.containing_scope[depth].tape_index, end);
// count can overflow if it exceeds 24 bits... so we saturate // count can overflow if it exceeds 24 bits... so we saturate
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
const uint32_t start_tape_index = doc_parser.containing_scope[depth].tape_index; const uint32_t start_tape_index = parser.containing_scope[depth].tape_index;
const uint32_t count = doc_parser.containing_scope[depth].count; const uint32_t count = parser.containing_scope[depth].count;
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
// This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index] // This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
write_tape(start_tape_index, doc_parser.current_loc | (uint64_t(cntsat) << 32), start); write_tape(start_tape_index, parser.current_loc | (uint64_t(cntsat) << 32), start);
} }
really_inline void end_object() { really_inline void end_object() {
@ -139,11 +135,11 @@ struct structural_parser {
} }
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept { really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56); parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
} }
really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept { really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept {
doc_parser.doc.tape[loc] = val | ((uint64_t(char(t))) << 56); parser.doc->tape[loc] = val | ((uint64_t(char(t))) << 56);
} }
// increment_count increments the count of keys in an object or values in an array. // increment_count increments the count of keys in an object or values in an array.
@ -151,12 +147,12 @@ struct structural_parser {
// must be increment in the preceding depth (depth-1) where the array or // must be increment in the preceding depth (depth-1) where the array or
// the object resides. // the object resides.
really_inline void increment_count() { really_inline void increment_count() {
doc_parser.containing_scope[depth - 1].count++; // we have a key value pair in the object at parser.depth - 1 parser.containing_scope[depth - 1].count++; // we have a key value pair in the object at parser.depth - 1
} }
really_inline uint8_t *on_start_string() noexcept { really_inline uint8_t *on_start_string() noexcept {
// we advance the point, accounting for the fact that we have a NULL termination // we advance the point, accounting for the fact that we have a NULL termination
append_tape(current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING); append_tape(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING);
return current_string_buf_loc + sizeof(uint32_t); return current_string_buf_loc + sizeof(uint32_t);
} }
@ -186,7 +182,7 @@ struct structural_parser {
WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) { WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
log_value("number"); log_value("number");
number_writer writer{doc_parser}; number_writer writer{parser};
bool succeeded = numberparsing::parse_number(src, found_minus, writer); bool succeeded = numberparsing::parse_number(src, found_minus, writer);
if (!succeeded) { log_error("Invalid number"); } if (!succeeded) { log_error("Invalid number"); }
return !succeeded; return !succeeded;
@ -243,7 +239,7 @@ struct structural_parser {
return false; return false;
} }
WARN_UNUSED really_inline ret_address parse_value(const unified_machine_addresses &addresses, ret_address continue_state) { WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
switch (structurals.current_char()) { switch (structurals.current_char()) {
case '"': case '"':
FAIL_IF( parse_string() ); FAIL_IF( parse_string() );
@ -272,37 +268,27 @@ struct structural_parser {
WARN_UNUSED really_inline error_code finish() { WARN_UNUSED really_inline error_code finish() {
// the string might not be NULL terminated. // the string might not be NULL terminated.
if ( !structurals.at_end(doc_parser.n_structural_indexes) ) { if ( !structurals.at_end(parser.n_structural_indexes) ) {
log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
return on_error(TAPE_ERROR); return parser.error = TAPE_ERROR;
} }
end_document(); end_document();
if (depth != 0) { if (depth != 0) {
log_error("Unclosed objects or arrays!"); log_error("Unclosed objects or arrays!");
return on_error(TAPE_ERROR); return parser.error = TAPE_ERROR;
} }
if (doc_parser.containing_scope[depth].tape_index != 0) { if (parser.containing_scope[depth].tape_index != 0) {
log_error("IMPOSSIBLE: root scope tape index did not start at 0!"); log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
return on_error(TAPE_ERROR); return parser.error = TAPE_ERROR;
} }
return on_success(SUCCESS); return SUCCESS;
}
really_inline error_code on_error(error_code new_error_code) noexcept {
doc_parser.error = new_error_code;
return new_error_code;
}
really_inline error_code on_success(error_code success_code) noexcept {
doc_parser.error = success_code;
doc_parser.valid = true;
return success_code;
} }
WARN_UNUSED really_inline error_code error() { WARN_UNUSED really_inline error_code error() {
/* We do not need the next line because this is done by doc_parser.init_stage2(), /* We do not need the next line because this is done by parser.init_stage2(),
* pessimistically. * pessimistically.
* doc_parser.is_valid = false; * parser.is_valid = false;
* At this point in the code, we have all the time in the world. * At this point in the code, we have all the time in the world.
* Note that we know exactly where we are in the document so we could, * Note that we know exactly where we are in the document so we could,
* without any overhead on the processing code, report a specific * without any overhead on the processing code, report a specific
@ -310,12 +296,12 @@ struct structural_parser {
* We could even trigger special code paths to assess what happened * We could even trigger special code paths to assess what happened
* carefully, * carefully,
* all without any added cost. */ * all without any added cost. */
if (depth >= doc_parser.max_depth()) { if (depth >= parser.max_depth()) {
return on_error(DEPTH_ERROR); return parser.error = DEPTH_ERROR;
} }
switch (structurals.current_char()) { switch (structurals.current_char()) {
case '"': case '"':
return on_error(STRING_ERROR); return parser.error = STRING_ERROR;
case '0': case '0':
case '1': case '1':
case '2': case '2':
@ -327,36 +313,35 @@ struct structural_parser {
case '8': case '8':
case '9': case '9':
case '-': case '-':
return on_error(NUMBER_ERROR); return parser.error = NUMBER_ERROR;
case 't': case 't':
return on_error(T_ATOM_ERROR); return parser.error = T_ATOM_ERROR;
case 'n': case 'n':
return on_error(N_ATOM_ERROR); return parser.error = N_ATOM_ERROR;
case 'f': case 'f':
return on_error(F_ATOM_ERROR); return parser.error = F_ATOM_ERROR;
default: default:
return on_error(TAPE_ERROR); return parser.error = TAPE_ERROR;
} }
} }
really_inline void init() { really_inline void init() {
current_string_buf_loc = doc_parser.doc.string_buf.get(); current_string_buf_loc = parser.doc->string_buf.get();
doc_parser.current_loc = 0; parser.current_loc = 0;
doc_parser.valid = false; parser.error = UNINITIALIZED;
doc_parser.error = UNINITIALIZED;
} }
WARN_UNUSED really_inline error_code start(size_t len, ret_address finish_state) { WARN_UNUSED really_inline error_code start(size_t len, ret_address_t finish_state) {
log_start(); log_start();
init(); // sets is_valid to false init(); // sets is_valid to false
if (len > doc_parser.capacity()) { if (len > parser.capacity()) {
return CAPACITY; return parser.error = CAPACITY;
} }
// Advance to the first character as soon as possible // Advance to the first character as soon as possible
structurals.advance_char(); structurals.advance_char();
// Push the root scope (there is always at least one scope) // Push the root scope (there is always at least one scope)
if (start_document(finish_state)) { if (start_document(finish_state)) {
return on_error(DEPTH_ERROR); return parser.error = DEPTH_ERROR;
} }
return SUCCESS; return SUCCESS;
} }
@ -398,9 +383,10 @@ struct structural_parser {
* The JSON is parsed to a tape, see the accompanying tape.md file * The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation. * for documentation.
***********/ ***********/
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, parser &doc_parser) const noexcept { WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
this->doc = &_doc;
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES(); static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
stage2::structural_parser parser(buf, len, doc_parser); stage2::structural_parser parser(*this);
error_code result = parser.start(len, addresses.finish); error_code result = parser.start(len, addresses.finish);
if (result) { return result; } if (result) { return result; }
@ -479,7 +465,7 @@ object_continue:
} }
scope_end: scope_end:
CONTINUE( parser.doc_parser.ret_address[parser.depth] ); CONTINUE( parser.parser.ret_address[parser.depth] );
// //
// Array parser states // Array parser states
@ -516,11 +502,3 @@ finish:
error: error:
return parser.error(); return parser.error();
} }
WARN_UNUSED error_code implementation::parse(const uint8_t *buf, size_t len, parser &doc_parser) const noexcept {
error_code code = stage1(buf, len, doc_parser, false);
if (!code) {
code = stage2(buf, len, doc_parser);
}
return code;
}

View File

@ -1,9 +1,13 @@
#include "simdjson.h" #include "simdjson.h"
#include "haswell/implementation.h"
#include "haswell/dom_parser_implementation.h"
//
// Stage 1
//
#include "haswell/bitmask.h" #include "haswell/bitmask.h"
#include "haswell/simd.h" #include "haswell/simd.h"
#include "haswell/bitmanipulation.h" #include "haswell/bitmanipulation.h"
#include "haswell/implementation.h"
TARGET_HASWELL TARGET_HASWELL
namespace simdjson { namespace simdjson {
@ -68,11 +72,38 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
#include "generic/stage1/utf8_lookup2_algorithm.h" #include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h" #include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept { WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
return haswell::stage1::json_structural_indexer::index<128>(buf, len, parser, streaming); this->buf = _buf;
this->len = _len;
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
}
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION
//
// Stage 2
//
#include "haswell/stringparsing.h"
#include "haswell/numberparsing.h"
TARGET_HASWELL
namespace simdjson {
namespace haswell {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
} }
} // namespace haswell } // namespace haswell
} // namespace simdjson } // namespace simdjson
UNTARGET_REGION UNTARGET_REGION

View File

@ -0,0 +1,15 @@
#ifndef SIMDJSON_HASWELL_DOM_PARSER_IMPLEMENTATION_H
#define SIMDJSON_HASWELL_DOM_PARSER_IMPLEMENTATION_H
#include "simdjson.h"
#include "isadetection.h"
namespace simdjson {
namespace haswell {
#include "generic/dom_parser_implementation.h"
} // namespace haswell
} // namespace simdjson
#endif // SIMDJSON_HASWELL_DOM_PARSER_IMPLEMENTATION_H

View File

@ -0,0 +1,25 @@
#include "simdjson.h"
#include "haswell/implementation.h"
#include "haswell/dom_parser_implementation.h"
TARGET_HASWELL
namespace simdjson {
namespace haswell {
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept {
dst.reset( new (std::nothrow) dom_parser_implementation() );
if (!dst) { return MEMALLOC; }
dst->set_capacity(capacity);
dst->set_max_depth(max_depth);
return SUCCESS;
}
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION

View File

@ -7,8 +7,6 @@
namespace simdjson { namespace simdjson {
namespace haswell { namespace haswell {
using namespace simdjson::dom;
class implementation final : public simdjson::implementation { class implementation final : public simdjson::implementation {
public: public:
really_inline implementation() : simdjson::implementation( really_inline implementation() : simdjson::implementation(
@ -16,11 +14,12 @@ public:
"Intel/AMD AVX2", "Intel/AMD AVX2",
instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2 instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2
) {} ) {}
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final; WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity,
size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
}; };
} // namespace haswell } // namespace haswell

View File

@ -1,18 +0,0 @@
#include "simdjson.h"
#include "haswell/implementation.h"
#include "haswell/stringparsing.h"
#include "haswell/numberparsing.h"
TARGET_HASWELL
namespace simdjson {
namespace haswell {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION

View File

@ -38,21 +38,16 @@ public:
const std::string &name() const noexcept final { return set_best()->name(); } const std::string &name() const noexcept final { return set_best()->name(); }
const std::string &description() const noexcept final { return set_best()->description(); } const std::string &description() const noexcept final { return set_best()->description(); }
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept final { WARN_UNUSED error_code create_dom_parser_implementation(
return set_best()->parse(buf, len, parser); size_t capacity,
size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final {
return set_best()->create_dom_parser_implementation(capacity, max_length, dst);
} }
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
return set_best()->minify(buf, len, dst, dst_len); return set_best()->minify(buf, len, dst, dst_len);
} }
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) const noexcept final {
return set_best()->stage1(buf, len, parser, streaming);
}
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept final {
return set_best()->stage2(buf, len, parser);
}
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) const noexcept final {
return set_best()->stage2(buf, len, parser, next_json);
}
really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
private: private:
@ -81,21 +76,16 @@ const std::initializer_list<const implementation *> available_implementation_poi
// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support // So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
class unsupported_implementation final : public implementation { class unsupported_implementation final : public implementation {
public: public:
WARN_UNUSED error_code parse(const uint8_t *, size_t, dom::parser &) const noexcept final { WARN_UNUSED error_code create_dom_parser_implementation(
size_t,
size_t,
std::unique_ptr<internal::dom_parser_implementation>&
) const noexcept final {
return UNSUPPORTED_ARCHITECTURE; return UNSUPPORTED_ARCHITECTURE;
} }
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final { WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE; return UNSUPPORTED_ARCHITECTURE;
} }
WARN_UNUSED error_code stage1(const uint8_t *, size_t, dom::parser &, bool) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &, size_t &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {} unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
}; };

View File

@ -13,20 +13,20 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
#include "simdprune_tables.h" #include "simdprune_tables.h"
#if SIMDJSON_IMPLEMENTATION_ARM64 #if SIMDJSON_IMPLEMENTATION_ARM64
#include "arm64/stage1.cpp" #include "arm64/implementation.cpp"
#include "arm64/stage2.cpp" #include "arm64/dom_parser_implementation.cpp"
#endif #endif
#if SIMDJSON_IMPLEMENTATION_FALLBACK #if SIMDJSON_IMPLEMENTATION_FALLBACK
#include "fallback/stage1.cpp" #include "fallback/implementation.cpp"
#include "fallback/stage2.cpp" #include "fallback/dom_parser_implementation.cpp"
#endif #endif
#if SIMDJSON_IMPLEMENTATION_HASWELL #if SIMDJSON_IMPLEMENTATION_HASWELL
#include "haswell/stage1.cpp" #include "haswell/implementation.cpp"
#include "haswell/stage2.cpp" #include "haswell/dom_parser_implementation.cpp"
#endif #endif
#if SIMDJSON_IMPLEMENTATION_WESTMERE #if SIMDJSON_IMPLEMENTATION_WESTMERE
#include "westmere/stage1.cpp" #include "westmere/implementation.cpp"
#include "westmere/stage2.cpp" #include "westmere/dom_parser_implementation.cpp"
#endif #endif
SIMDJSON_POP_DISABLE_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS

View File

@ -1,4 +1,10 @@
#include "simdjson.h" #include "simdjson.h"
#include "westmere/implementation.h"
#include "westmere/dom_parser_implementation.h"
//
// Stage 1
//
#include "westmere/bitmask.h" #include "westmere/bitmask.h"
#include "westmere/simd.h" #include "westmere/simd.h"
#include "westmere/bitmanipulation.h" #include "westmere/bitmanipulation.h"
@ -67,11 +73,38 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
#include "generic/stage1/utf8_lookup2_algorithm.h" #include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h" #include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept { WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
return westmere::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming); this->buf = _buf;
this->len = _len;
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
}
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION
//
// Stage 2
//
#include "westmere/stringparsing.h"
#include "westmere/numberparsing.h"
TARGET_WESTMERE
namespace simdjson {
namespace westmere {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
} }
} // namespace westmere } // namespace westmere
} // namespace simdjson } // namespace simdjson
UNTARGET_REGION UNTARGET_REGION

View File

@ -0,0 +1,15 @@
#ifndef SIMDJSON_WESTMERE_DOM_PARSER_IMPLEMENTATION_H
#define SIMDJSON_WESTMERE_DOM_PARSER_IMPLEMENTATION_H
#include "simdjson.h"
#include "isadetection.h"
namespace simdjson {
namespace westmere {
#include "generic/dom_parser_implementation.h"
} // namespace westmere
} // namespace simdjson
#endif // SIMDJSON_WESTMERE_DOM_PARSER_IMPLEMENTATION_H

View File

@ -0,0 +1,25 @@
#include "simdjson.h"
#include "westmere/implementation.h"
#include "westmere/dom_parser_implementation.h"
TARGET_HASWELL
namespace simdjson {
namespace westmere {
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept {
dst.reset( new (std::nothrow) dom_parser_implementation() );
if (!dst) { return MEMALLOC; }
dst->set_capacity(capacity);
dst->set_max_depth(max_depth);
return SUCCESS;
}
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION

View File

@ -13,11 +13,12 @@ using namespace simdjson::dom;
class implementation final : public simdjson::implementation { class implementation final : public simdjson::implementation {
public: public:
really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", instruction_set::SSE42 | instruction_set::PCLMULQDQ) {} really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", instruction_set::SSE42 | instruction_set::PCLMULQDQ) {}
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final; WARN_UNUSED error_code create_dom_parser_implementation(
size_t capacity,
size_t max_length,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
}; };
} // namespace westmere } // namespace westmere

View File

@ -1,18 +0,0 @@
#include "simdjson.h"
#include "westmere/implementation.h"
#include "westmere/stringparsing.h"
#include "westmere/numberparsing.h"
TARGET_WESTMERE
namespace simdjson {
namespace westmere {
#include "generic/stage2/logger.h"
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION

View File

@ -176,7 +176,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
s.non_ascii_byte_count = count_nonasciibytes( s.non_ascii_byte_count = count_nonasciibytes(
reinterpret_cast<const uint8_t *>(p.data()), p.size()); reinterpret_cast<const uint8_t *>(p.data()), p.size());
s.byte_count = p.size(); s.byte_count = p.size();
s.structural_indexes_count = parser.n_structural_indexes; s.structural_indexes_count = parser.implementation->n_structural_indexes;
// simdjson::document::iterator iter(doc); // simdjson::document::iterator iter(doc);
recurse(doc, s, 0); recurse(doc, s, 0);