Store all parser state in the implementation

This commit is contained in:
John Keiser 2020-06-01 12:14:09 -07:00
parent 86f8a4a9d2
commit 1aab4752e2
25 changed files with 329 additions and 20675 deletions

View File

@ -84,7 +84,7 @@ struct json_stats {
bytes = json.size();
blocks = bytes / BYTES_PER_BLOCK;
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
structurals = parser.n_structural_indexes-1;
structurals = parser.implementation->n_structural_indexes-1;
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
bool last_block_has_utf8 = false;
@ -141,7 +141,7 @@ struct json_stats {
for (size_t block=0; block<blocks; block++) {
// Count structurals in the block
int block_structurals=0;
while (structural < parser.n_structural_indexes && parser.structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
while (structural < parser.implementation->n_structural_indexes && parser.implementation->structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
block_structurals++;
structural++;
}
@ -320,7 +320,7 @@ struct benchmarker {
// Stage 1 (find structurals)
collector.start();
error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), false);
event_count stage1_count = collector.end();
stage1 << stage1_count;
if (error) {
@ -334,7 +334,7 @@ struct benchmarker {
} else {
event_count stage2_count;
collector.start();
error = parser.implementation->stage2(parser);
error = parser.implementation->stage2(parser.doc);
if (error) {
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error));
}
@ -345,7 +345,7 @@ struct benchmarker {
// Calculate stats the first time we parse
if (stats == NULL) {
if (stage1_only) { // we need stage 2 once
error = parser.implementation->stage2(parser);
error = parser.implementation->stage2(parser.doc);
if (error) {
printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
}

View File

@ -106,7 +106,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
answer.non_ascii_byte_count = count_nonasciibytes(
reinterpret_cast<const uint8_t *>(p.data()), p.size());
answer.byte_count = p.size();
answer.structural_indexes_count = parser.n_structural_indexes;
answer.structural_indexes_count = parser.implementation->n_structural_indexes;
simdjson_recurse(answer, doc);
return answer;
}
@ -163,7 +163,6 @@ int main(int argc, char *argv[]) {
s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
#ifdef __linux__
simdjson::dom::parser parser;
const simdjson::implementation &stage_parser = *simdjson::active_implementation;
simdjson::error_code alloc_error = parser.allocate(p.size());
if (alloc_error) {
std::cerr << alloc_error << std::endl;
@ -181,14 +180,14 @@ int main(int argc, char *argv[]) {
for (uint32_t i = 0; i < iterations; i++) {
unified.start();
// The default template is simdjson::architecture::NATIVE.
bool isok = (stage_parser.stage1((const uint8_t *)p.data(), p.size(), parser, false) == simdjson::SUCCESS);
bool isok = (parser.implementation->stage1((const uint8_t *)p.data(), p.size(), false) == simdjson::SUCCESS);
unified.end(results);
cy1 += results[0];
cl1 += results[1];
unified.start();
isok = isok && (stage_parser.stage2((const uint8_t *)p.data(), p.size(), parser) == simdjson::SUCCESS);
isok = isok && (parser.implementation->stage2(parser.doc) == simdjson::SUCCESS);
unified.end(results);
cy2 += results[0];

View File

@ -15,22 +15,6 @@
namespace simdjson {
namespace internal {
// expectation: sizeof(scope_descriptor) = 64/8.
struct scope_descriptor {
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
uint32_t count; // how many elements in the scope
}; // struct scope_descriptor
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address;
#else
typedef char ret_address;
#endif
} // namespace internal
namespace dom {
class document_stream;
@ -68,14 +52,14 @@ public:
*
* @param other The parser to take. Its capacity is zeroed.
*/
parser(parser &&other) = default;
really_inline parser(parser &&other) noexcept;
parser(const parser &) = delete; ///< @private Disallow copying
/**
* Take another parser's buffers and state.
*
* @param other The parser to take. Its capacity is zeroed.
*/
parser &operator=(parser &&other) = default;
really_inline parser &operator=(parser &&other) noexcept;
parser &operator=(const parser &) = delete; ///< @private Disallow copying
/** Deallocate the JSON parser. */
@ -352,21 +336,6 @@ public:
/** @private [for benchmarking access] The implementation to use */
std::unique_ptr<internal::dom_parser_implementation> implementation{};
public:
/** @private Next location to write to in the tape */
uint32_t current_loc{0};
/** @private Number of structural indices passed from stage 1 to stage 2 */
uint32_t n_structural_indexes{0};
/** @private Structural indices passed from stage 1 to stage 2 */
std::unique_ptr<uint32_t[]> structural_indexes{};
/** @private Tape location of each open { or [ */
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
/** @private Return address of each open { or [ */
std::unique_ptr<internal::ret_address[]> ret_address{};
/** @private Use `if (parser.parse(...).error())` instead */
bool valid{false};
/** @private Use `parser.parse(...).error()` instead */
@ -405,20 +374,6 @@ private:
*/
size_t _max_capacity;
/**
* The maximum document length this parser supports.
*
* Buffers are large enough to handle any document up to this length.
*/
size_t _capacity{0};
/**
* The maximum depth (number of nested objects and arrays) supported by this parser.
*
* Defaults to DEFAULT_MAX_DEPTH.
*/
size_t _max_depth{0};
/**
* The loaded buffer (reused each time load() is called)
*/

View File

@ -11,7 +11,7 @@
namespace simdjson {
namespace dom {
class parser;
class document;
} // namespace dom
/**

View File

@ -34,18 +34,18 @@ namespace internal {
* */
inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) {
// this function can be generally useful
if (parser.n_structural_indexes == 0)
if (parser.implementation->n_structural_indexes == 0)
return 0;
auto last_i = parser.n_structural_indexes - 1;
if (parser.structural_indexes[last_i] == size) {
auto last_i = parser.implementation->n_structural_indexes - 1;
if (parser.implementation->structural_indexes[last_i] == size) {
if (last_i == 0)
return 0;
last_i = parser.n_structural_indexes - 2;
last_i = parser.implementation->n_structural_indexes - 2;
}
auto arr_cnt = 0;
auto obj_cnt = 0;
for (auto i = last_i; i > 0; i--) {
auto idxb = parser.structural_indexes[i];
auto idxb = parser.implementation->structural_indexes[i];
switch (buf[idxb]) {
case ':':
case ',':
@ -63,7 +63,7 @@ inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const do
arr_cnt++;
break;
}
auto idxa = parser.structural_indexes[i - 1];
auto idxa = parser.implementation->structural_indexes[i - 1];
switch (buf[idxa]) {
case '{':
case '[':
@ -172,17 +172,17 @@ inline error_code document_stream::json_parse() noexcept {
if (_batch_size == 0) {
return simdjson::UTF8_ERROR;
}
auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, parser, true));
auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, true));
if (stage1_is_ok != simdjson::SUCCESS) {
return stage1_is_ok;
}
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
if (last_index == 0) {
if (parser.n_structural_indexes == 0) {
if (parser.implementation->n_structural_indexes == 0) {
return simdjson::EMPTY;
}
} else {
parser.n_structural_indexes = last_index + 1;
parser.implementation->n_structural_indexes = last_index + 1;
}
}
// the second thread is running or done.
@ -191,15 +191,15 @@ inline error_code document_stream::json_parse() noexcept {
if (stage1_is_ok_thread != simdjson::SUCCESS) {
return stage1_is_ok_thread;
}
std::swap(parser.structural_indexes, parser_thread.structural_indexes);
parser.n_structural_indexes = parser_thread.n_structural_indexes;
std::swap(parser.implementation->structural_indexes, parser_thread.implementation->structural_indexes);
parser.implementation->n_structural_indexes = parser_thread.implementation->n_structural_indexes;
advance(last_json_buffer_loc);
n_bytes_parsed += last_json_buffer_loc;
}
// let us decide whether we will start a new thread
if (remaining() - _batch_size > 0) {
last_json_buffer_loc =
parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
parser.implementation->structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
if (_batch_size > 0) {
_batch_size = internal::trimmed_length_safe_utf8(
@ -214,22 +214,22 @@ inline error_code document_stream::json_parse() noexcept {
// this->stage1_is_ok_thread
// there is only one thread that may write to this value
stage_1_thread = std::thread([this, b, bs] {
this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, this->parser_thread, true));
this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, true));
});
}
}
next_json = 0;
load_next_batch = false;
} // load_next_batch
error_code res = parser.implementation->stage2(buf(), remaining(), parser, next_json);
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
if (res == simdjson::SUCCESS_AND_HAS_MORE) {
n_parsed_docs++;
current_buffer_loc = parser.structural_indexes[next_json];
current_buffer_loc = parser.implementation->structural_indexes[next_json];
load_next_batch = (current_buffer_loc == last_json_buffer_loc);
} else if (res == simdjson::SUCCESS) {
n_parsed_docs++;
if (remaining() > _batch_size) {
current_buffer_loc = parser.structural_indexes[next_json - 1];
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
load_next_batch = true;
res = simdjson::SUCCESS_AND_HAS_MORE;
}
@ -249,28 +249,28 @@ inline error_code document_stream::json_parse() noexcept {
n_bytes_parsed += current_buffer_loc;
_batch_size = (std::min)(_batch_size, remaining());
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, parser, true);
auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, true);
if (stage1_is_ok != simdjson::SUCCESS) {
return stage1_is_ok;
}
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
if (last_index == 0) {
if (parser.n_structural_indexes == 0) {
if (parser.implementation->n_structural_indexes == 0) {
return EMPTY;
}
} else {
parser.n_structural_indexes = last_index + 1;
parser.implementation->n_structural_indexes = last_index + 1;
}
load_next_batch = false;
} // load_next_batch
error_code res = parser.implementation->stage2(buf(), remaining(), parser, next_json);
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
n_parsed_docs++;
current_buffer_loc = parser.structural_indexes[next_json];
current_buffer_loc = parser.implementation->structural_indexes[next_json];
} else if (res == simdjson::SUCCESS) {
n_parsed_docs++;
if (remaining() > _batch_size) {
current_buffer_loc = parser.structural_indexes[next_json - 1];
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
next_json = 1;
load_next_batch = true;
res = simdjson::SUCCESS_AND_HAS_MORE;

View File

@ -17,8 +17,11 @@ namespace dom {
//
really_inline parser::parser(size_t max_capacity) noexcept
: _max_capacity{max_capacity},
loaded_bytes(nullptr, &aligned_free_char)
{}
loaded_bytes(nullptr, &aligned_free_char) {
}
really_inline parser::parser(parser &&other) noexcept = default;
really_inline parser &parser::operator=(parser &&other) noexcept = default;
inline bool parser::is_valid() const noexcept { return valid; }
inline int parser::get_error_code() const noexcept { return error; }
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
@ -101,15 +104,12 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
memcpy((void *)buf, tmp_buf, len);
}
code = implementation->parse(buf, len, *this);
code = implementation->parse(buf, len, doc);
if (realloc_if_needed) {
aligned_free((void *)buf); // must free before we exit
}
if (code) { return code; }
// We're indicating validity via the simdjson_result<element>, so set the parse state back to invalid
valid = false;
error = UNINITIALIZED;
return doc.root();
}
really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
@ -136,77 +136,31 @@ inline document_stream parser::parse_many(const padded_string &s, size_t batch_s
}
really_inline size_t parser::capacity() const noexcept {
return _capacity;
return implementation ? implementation->capacity() : 0;
}
really_inline size_t parser::max_capacity() const noexcept {
return _max_capacity;
}
really_inline size_t parser::max_depth() const noexcept {
return _max_depth;
return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
}
WARN_UNUSED
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
//
// If capacity has changed, reallocate capacity-based buffers
// Reallocate implementation and document if needed
//
if (_capacity != capacity || _max_depth != max_depth) {
error_code err;
if (_capacity != capacity) {
//
// Reallocate the document
//
err = doc.allocate(capacity);
//
// Initialize stage 1 output
//
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc
if (!structural_indexes) { _capacity = _max_depth = 0; return err; }
//
// Reallocate implementation capacity
//
if (implementation && !err) { err = implementation->set_capacity(capacity); }
}
if (_max_depth != max_depth && !err) {
//
// Reallocate stage 2 state
//
containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]); // TODO realloc
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
if (!ret_address || !containing_scope) {
err = MEMALLOC;
}
//
// Reallocate implementation max depth
//
if (implementation && !err) { err = implementation->set_max_depth(max_depth); }
}
//
// Create the implementation if it doesn't already exist
//
if (!implementation && !err) {
if (implementation) {
err = implementation->allocate(capacity, max_depth);
} else {
err = simdjson::active_implementation->create_dom_parser_implementation(capacity, max_depth, implementation);
}
if (err) { _capacity = _max_depth = 0; return err; }
_capacity = capacity;
_max_depth = max_depth;
//
// If capacity hasn't changed, but the document was taken, allocate a new document.
//
} else if (!doc.tape) {
error_code err = doc.allocate(capacity);
if (err) { return err; }
}
if (implementation->capacity() != capacity || !doc.tape) {
return doc.allocate(capacity);
}
return SUCCESS;
}
@ -215,24 +169,24 @@ inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcep
return !allocate(capacity, max_depth);
}
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
_max_capacity = max_capacity;
}
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
// If we don't have enough capacity, (try to) automatically bump it.
// If the document was taken, reallocate that too.
// Both in one if statement to minimize unlikely branching.
if (unlikely(desired_capacity > capacity() || !doc.tape)) {
if (unlikely(capacity() < desired_capacity || !doc.tape)) {
if (desired_capacity > max_capacity()) {
return error = CAPACITY;
}
return allocate(desired_capacity, _max_depth > 0 ? _max_depth : DEFAULT_MAX_DEPTH);
return allocate(desired_capacity, max_depth());
}
return SUCCESS;
}
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
_max_capacity = max_capacity;
}
} // namespace dom
} // namespace simdjson

View File

@ -8,16 +8,32 @@
namespace simdjson {
namespace dom {
class parser;
class document;
} // namespace dom
namespace internal {
// expectation: sizeof(scope_descriptor) = 64/8.
struct scope_descriptor {
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
uint32_t count; // how many elements in the scope
}; // struct scope_descriptor
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address;
#else
typedef char ret_address;
#endif
/**
* An implementation of simdjson's DOM parser for a particular CPU architecture.
*
* This class is expected to be accessed only by pointer, and never move in memory (though the
* pointer can move).
*/
class dom_parser_implementation {
public:
/**
* @private For internal implementation use
*
@ -29,10 +45,9 @@ public:
*
* @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
* @param len The length of the json document.
* @param parser The parser object. TODO replace this with dom::document & when state is moved to the implementation.
* @return The error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept = 0;
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
/**
* @private For internal implementation use
@ -45,25 +60,24 @@ public:
*
* @param buf The json document to parse.
* @param len The length of the json document.
* @param parser The parser object. TODO replace this with structural_indexes & when state is moved to the implementation.
* @param streaming Whether this is being called by parser::parse_many.
* @return The error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept = 0;
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0;
/**
* @private For internal implementation use
*
* Stage 2 of the document parser.
*
* Guaranteed only to be called after stage1(), with the same buf/len as stage1().
* Called after stage1().
*
* Overridden by each implementation.
*
* @param parser The parser object. TODO replace this with dom::document & when state is moved to the implementation.
* @param doc The document to output to.
* @return The error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code stage2(dom::parser &parser) noexcept = 0;
WARN_UNUSED virtual error_code stage2(dom::document &doc) noexcept = 0;
/**
* @private For internal implementation use
@ -75,11 +89,11 @@ public:
*
* @param buf The json document to parse.
* @param len The length of the json document.
* @param parser The parser object. TODO replace this with dom::document & when state is moved to the implementation.
* @param doc The document to output to.
* @param next_json The next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time.
* @return The error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again.
*/
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept = 0;
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept = 0;
/**
* Change the capacity of this parser.
@ -103,10 +117,86 @@ public:
*/
virtual error_code set_max_depth(size_t max_depth) noexcept = 0;
/**
* Deallocate this parser.
*/
virtual ~dom_parser_implementation() = default;
/** Next location to write to in the tape */
uint32_t current_loc{0};
/** Number of structural indices passed from stage 1 to stage 2 */
uint32_t n_structural_indexes{0};
/** Structural indices passed from stage 1 to stage 2 */
std::unique_ptr<uint32_t[]> structural_indexes{};
/** Tape location of each open { or [ */
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
/** Return address of each open { or [ */
std::unique_ptr<internal::ret_address[]> ret_address{};
/**
* The largest document this parser can support without reallocating.
*
* @return Current capacity, in bytes.
*/
really_inline size_t capacity() const noexcept;
/**
* The maximum level of nested object and arrays supported by this parser.
*
* @return Maximum depth, in bytes.
*/
really_inline size_t max_depth() const noexcept;
/**
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
* and `max_depth` depth.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return The error, if there is one.
*/
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
protected:
/**
* The maximum document length this parser supports.
*
* Buffers are large enough to handle any document up to this length.
*/
size_t _capacity{0};
/**
* The maximum depth (number of nested objects and arrays) supported by this parser.
*
* Defaults to DEFAULT_MAX_DEPTH.
*/
size_t _max_depth{0};
}; // class dom_parser_implementation
really_inline size_t dom_parser_implementation::capacity() const noexcept {
return _capacity;
}
really_inline size_t dom_parser_implementation::max_depth() const noexcept {
return _max_depth;
}
WARN_UNUSED
inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
if (this->max_depth() != max_depth) {
error_code err = set_max_depth(max_depth);
if (err) { return err; }
}
if (_capacity != capacity) {
error_code err = set_capacity(capacity);
if (err) { return err; }
}
return SUCCESS;
}
} // namespace internal
} // namespace simdjson

View File

@ -1,2 +0,0 @@
Try :
c++ -O3 -std=c++17 -pthread -o amalgamate_demo amalgamate_demo.cpp && ./amalgamate_demo ../jsonexamples/twitter.json ../jsonexamples/amazon_cellphones.ndjson

View File

@ -1,42 +0,0 @@
/* auto-generated on Thu 21 May 2020 14:01:15 EDT. Do not edit! */
#include <iostream>
#include "simdjson.h"
#include "simdjson.cpp"
int main(int argc, char *argv[]) {
if(argc < 2) {
std::cerr << "Please specify at least one file name. " << std::endl;
}
const char * filename = argv[1];
simdjson::dom::parser parser;
simdjson::error_code error;
UNUSED simdjson::dom::element elem;
parser.load(filename).tie(elem, error); // do the parsing
if (error) {
std::cout << "parse failed" << std::endl;
std::cout << "error code: " << error << std::endl;
std::cout << error << std::endl;
return EXIT_FAILURE;
} else {
std::cout << "parse valid" << std::endl;
}
if(argc == 2) {
return EXIT_SUCCESS;
}
// parse_many
const char * filename2 = argv[2];
for (auto result : parser.load_many(filename2)) {
error = result.error();
}
if (error) {
std::cout << "parse_many failed" << std::endl;
std::cout << "error code: " << error << std::endl;
std::cout << error << std::endl;
return EXIT_FAILURE;
} else {
std::cout << "parse_many valid" << std::endl;
}
return EXIT_SUCCESS;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -84,10 +84,10 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, parser &parser, bool streaming) noexcept {
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;
this->len = _len;
return arm64::stage1::json_structural_indexer::index<64>(_buf, _len, parser, streaming);
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
}
} // namespace arm64
@ -109,5 +109,11 @@ namespace arm64 {
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
}
} // namespace arm64
} // namespace simdjson

View File

@ -11,38 +11,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
public:
const uint8_t *buf{}; // Buffer passed to stage 1
size_t len{0}; // Length passed to stage 1
dom::document *doc{}; // Document passed to stage 2
really_inline dom_parser_implementation();
dom_parser_implementation(const dom_parser_implementation &) = delete;
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
};
#include "generic/stage1/allocate.h"
#include "generic/stage2/allocate.h"
really_inline dom_parser_implementation::dom_parser_implementation() {}
// Leaving these here so they can be inlined if so desired
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
error_code err = stage1::allocate::set_capacity(*this, capacity);
if (err) { _capacity = 0; return err; }
_capacity = capacity;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
if (err) { _max_depth = 0; return err; }
_max_depth = max_depth;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
error_code code = stage1(_buf, _len, parser, false);
if (!code) {
code = stage2(_buf, _len, parser);
}
return code;
}
} // namespace arm64
} // namespace simdjson

View File

@ -12,8 +12,13 @@ namespace stage1 {
class structural_scanner {
public:
really_inline structural_scanner(const uint8_t *_buf, uint32_t _len, dom::parser &_parser, bool _streaming)
: buf{_buf}, next_structural_index{_parser.structural_indexes.get()}, parser{_parser}, idx{0}, len{_len}, error{SUCCESS}, streaming{_streaming} {}
really_inline structural_scanner(dom_parser_implementation &_parser, bool _streaming)
: buf{_parser.buf},
next_structural_index{_parser.structural_indexes.get()},
parser{_parser},
len{static_cast<uint32_t>(_parser.len)},
streaming{_streaming} {
}
really_inline void add_structural() {
*next_structural_index = idx;
@ -135,23 +140,20 @@ really_inline error_code scan() {
private:
const uint8_t *buf;
uint32_t *next_structural_index;
dom::parser &parser;
uint32_t idx;
dom_parser_implementation &parser;
uint32_t len;
error_code error;
uint32_t idx{0};
error_code error{SUCCESS};
bool streaming;
}; // structural_scanner
} // namespace stage1
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, dom::parser &parser, bool streaming) noexcept {
if (unlikely(_len > parser.capacity())) {
return CAPACITY;
}
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;
this->len = _len;
stage1::structural_scanner scanner(_buf, uint32_t(_len), parser, streaming);
stage1::structural_scanner scanner(*this, streaming);
return scanner.scan();
}
@ -229,5 +231,11 @@ namespace fallback {
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
}
} // namespace fallback
} // namespace simdjson

View File

@ -11,38 +11,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
public:
const uint8_t *buf{}; // Buffer passed to stage 1
size_t len{0}; // Length passed to stage 1
dom::document *doc{}; // Document passed to stage 2
really_inline dom_parser_implementation();
dom_parser_implementation(const dom_parser_implementation &) = delete;
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
};
#include "generic/stage1/allocate.h"
#include "generic/stage2/allocate.h"
really_inline dom_parser_implementation::dom_parser_implementation() {}
// Leaving these here so they can be inlined if so desired
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
error_code err = stage1::allocate::set_capacity(*this, capacity);
if (err) { _capacity = 0; return err; }
_capacity = capacity;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
if (err) { _max_depth = 0; return err; }
_max_depth = max_depth;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
error_code code = stage1(_buf, _len, parser, false);
if (!code) {
code = stage2(parser);
}
return code;
}
} // namespace fallback
} // namespace simdjson

View File

@ -0,0 +1,15 @@
namespace stage1 {
namespace allocate {
//
// Allocates stage 1 internal state and outputs in the parser
//
really_inline error_code set_capacity(internal::dom_parser_implementation &parser, size_t capacity) {
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
parser.structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
if (!parser.structural_indexes) { return MEMALLOC; }
return SUCCESS;
}
} // namespace allocate
} // namespace stage1

View File

@ -58,7 +58,7 @@ public:
class json_structural_indexer {
public:
template<size_t STEP_SIZE>
static error_code index(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept;
static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept;
private:
really_inline json_structural_indexer(uint32_t *structural_indexes)
@ -66,7 +66,7 @@ private:
template<size_t STEP_SIZE>
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
really_inline error_code finish(dom::parser &parser, size_t idx, size_t len, bool streaming);
really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming);
json_scanner scanner{};
utf8_checker checker{};
@ -83,7 +83,7 @@ really_inline void json_structural_indexer::next(simd::simd8x64<uint8_t> in, jso
unescaped_chars_error |= block.non_quote_inside_string(unescaped);
}
really_inline error_code json_structural_indexer::finish(dom::parser &parser, size_t idx, size_t len, bool streaming) {
really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming) {
// Write out the final iteration's structurals
indexer.write(uint32_t(idx-64), prev_structurals);
@ -155,7 +155,7 @@ really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_b
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8.
template<size_t STEP_SIZE>
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept {
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
if (unlikely(len > parser.capacity())) { return CAPACITY; }
buf_block_reader<STEP_SIZE> reader(buf, len);

View File

@ -0,0 +1,18 @@
namespace stage2 {
namespace allocate {
//
// Allocates stage 2 internal state and outputs in the parser
//
really_inline error_code set_max_depth(dom_parser_implementation &parser, size_t max_depth) {
parser.containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]);
parser.ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
if (!parser.ret_address || !parser.containing_scope) {
return MEMALLOC;
}
return SUCCESS;
}
} // namespace allocate
} // namespace stage2

View File

@ -1,7 +1,7 @@
namespace stage2 {
struct streaming_structural_parser: structural_parser {
really_inline streaming_structural_parser(const uint8_t *buf, size_t len, dom::parser &_doc_parser, uint32_t next_structural) : structural_parser(buf, len, _doc_parser, next_structural) {}
really_inline streaming_structural_parser(dom_parser_implementation &_parser, uint32_t next_structural) : structural_parser(_parser, next_structural) {}
// override to add streaming
WARN_UNUSED really_inline error_code start(ret_address finish_parser) {
@ -44,10 +44,12 @@ struct streaming_structural_parser: structural_parser {
* The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation.
***********/
WARN_UNUSED error_code dom_parser_implementation::stage2(const uint8_t *_buf, size_t _len, dom::parser &doc_parser, size_t &next_json) noexcept {
WARN_UNUSED error_code dom_parser_implementation::stage2(const uint8_t *_buf, size_t _len, dom::document &_doc, size_t &next_json) noexcept {
this->buf = _buf;
this->len = _len;
this->doc = &_doc;
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
stage2::streaming_structural_parser parser(_buf, _len, doc_parser, uint32_t(next_json));
stage2::streaming_structural_parser parser(*this, uint32_t(next_json));
error_code result = parser.start(addresses.finish);
if (result) { return result; }
//

View File

@ -48,41 +48,39 @@ struct unified_machine_addresses {
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
struct number_writer {
dom::parser &parser;
dom_parser_implementation &parser;
really_inline void write_s64(int64_t value) noexcept {
append_tape(0, internal::tape_type::INT64);
std::memcpy(&parser.doc.tape[parser.current_loc], &value, sizeof(value));
std::memcpy(&parser.doc->tape[parser.current_loc], &value, sizeof(value));
++parser.current_loc;
}
really_inline void write_u64(uint64_t value) noexcept {
append_tape(0, internal::tape_type::UINT64);
parser.doc.tape[parser.current_loc++] = value;
parser.doc->tape[parser.current_loc++] = value;
}
really_inline void write_double(double value) noexcept {
append_tape(0, internal::tape_type::DOUBLE);
static_assert(sizeof(value) == sizeof(parser.doc.tape[parser.current_loc]), "mismatch size");
memcpy(&parser.doc.tape[parser.current_loc++], &value, sizeof(double));
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
static_assert(sizeof(value) == sizeof(parser.doc->tape[parser.current_loc]), "mismatch size");
memcpy(&parser.doc->tape[parser.current_loc++], &value, sizeof(double));
// doc->tape[doc->current_loc++] = *((uint64_t *)&d);
}
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
parser.doc.tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
}
}; // struct number_writer
struct structural_parser {
structural_iterator structurals;
dom::parser &parser;
dom_parser_implementation &parser;
/** Next write location in the string buf for stage 2 parsing */
uint8_t *current_string_buf_loc{};
uint32_t depth;
really_inline structural_parser(
const uint8_t *buf,
size_t len,
dom::parser &_parser,
dom_parser_implementation &_parser,
uint32_t next_structural = 0
) : structurals(buf, len, _parser.structural_indexes.get(), next_structural), parser{_parser}, depth{0} {}
) : structurals(_parser.buf, _parser.len, _parser.structural_indexes.get(), next_structural), parser{_parser}, depth{0} {}
WARN_UNUSED really_inline bool start_scope(ret_address continue_state) {
parser.containing_scope[depth].tape_index = parser.current_loc;
@ -113,7 +111,7 @@ struct structural_parser {
// this function is responsible for annotating the start of the scope
really_inline void end_scope(internal::tape_type start, internal::tape_type end) noexcept {
depth--;
// write our doc.tape location to the header scope
// write our doc->tape location to the header scope
// The root scope gets written *at* the previous location.
append_tape(parser.containing_scope[depth].tape_index, end);
// count can overflow if it exceeds 24 bits... so we saturate
@ -121,7 +119,7 @@ struct structural_parser {
const uint32_t start_tape_index = parser.containing_scope[depth].tape_index;
const uint32_t count = parser.containing_scope[depth].count;
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
// This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
// This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
write_tape(start_tape_index, parser.current_loc | (uint64_t(cntsat) << 32), start);
}
@ -139,11 +137,11 @@ struct structural_parser {
}
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
parser.doc.tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
}
really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept {
parser.doc.tape[loc] = val | ((uint64_t(char(t))) << 56);
parser.doc->tape[loc] = val | ((uint64_t(char(t))) << 56);
}
// increment_count increments the count of keys in an object or values in an array.
@ -156,7 +154,7 @@ struct structural_parser {
really_inline uint8_t *on_start_string() noexcept {
// we advance the point, accounting for the fact that we have a NULL termination
append_tape(current_string_buf_loc - parser.doc.string_buf.get(), internal::tape_type::STRING);
append_tape(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING);
return current_string_buf_loc + sizeof(uint32_t);
}
@ -330,7 +328,7 @@ struct structural_parser {
}
really_inline void init() {
current_string_buf_loc = parser.doc.string_buf.get();
current_string_buf_loc = parser.doc->string_buf.get();
parser.current_loc = 0;
}
@ -386,10 +384,11 @@ struct structural_parser {
* The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation.
***********/
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::parser &doc_parser) noexcept {
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
this->doc = &_doc;
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
stage2::structural_parser parser(this->buf, this->len, doc_parser);
error_code result = parser.start(this->len, addresses.finish);
stage2::structural_parser parser(*this);
error_code result = parser.start(len, addresses.finish);
if (result) { return result; }
//

View File

@ -72,10 +72,10 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, dom::parser &parser, bool streaming) noexcept {
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;
this->len = _len;
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, parser, streaming);
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
}
} // namespace haswell
@ -98,6 +98,12 @@ namespace haswell {
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
}
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION

View File

@ -3,7 +3,6 @@
#include "simdjson.h"
#include "isadetection.h"
namespace simdjson {
namespace haswell {
@ -11,38 +10,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
public:
const uint8_t *buf{}; // Buffer passed to stage 1
size_t len{0}; // Length passed to stage 1
dom::document *doc{}; // Document passed to stage 2
really_inline dom_parser_implementation();
dom_parser_implementation(const dom_parser_implementation &) = delete;
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
};
#include "generic/stage1/allocate.h"
#include "generic/stage2/allocate.h"
really_inline dom_parser_implementation::dom_parser_implementation() {}
// Leaving these here so they can be inlined if so desired
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
error_code err = stage1::allocate::set_capacity(*this, capacity);
if (err) { _capacity = 0; return err; }
_capacity = capacity;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
if (err) { _max_depth = 0; return err; }
_max_depth = max_depth;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
error_code code = stage1(_buf, _len, parser, false);
if (!code) {
code = stage2(parser);
}
return code;
}
} // namespace haswell
} // namespace simdjson

View File

@ -73,10 +73,10 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, parser &parser, bool streaming) noexcept {
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
this->buf = _buf;
this->len = _len;
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, parser, streaming);
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
}
} // namespace westmere
@ -99,6 +99,12 @@ namespace westmere {
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
return stage2(_doc);
}
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION

View File

@ -11,38 +11,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
public:
const uint8_t *buf{}; // Buffer passed to stage 1
size_t len{0}; // Length passed to stage 1
dom::document *doc{}; // Document passed to stage 2
really_inline dom_parser_implementation();
dom_parser_implementation(const dom_parser_implementation &) = delete;
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
};
#include "generic/stage1/allocate.h"
#include "generic/stage2/allocate.h"
really_inline dom_parser_implementation::dom_parser_implementation() {}
// Leaving these here so they can be inlined if so desired
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
error_code err = stage1::allocate::set_capacity(*this, capacity);
if (err) { _capacity = 0; return err; }
_capacity = capacity;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
if (err) { _max_depth = 0; return err; }
_max_depth = max_depth;
return SUCCESS;
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
error_code code = stage1(_buf, _len, parser, false);
if (!code) {
code = stage2(parser);
}
return code;
}
} // namespace westmere
} // namespace simdjson

View File

@ -176,7 +176,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
s.non_ascii_byte_count = count_nonasciibytes(
reinterpret_cast<const uint8_t *>(p.data()), p.size());
s.byte_count = p.size();
s.structural_indexes_count = parser.n_structural_indexes;
s.structural_indexes_count = parser.implementation->n_structural_indexes;
// simdjson::document::iterator iter(doc);
recurse(doc, s, 0);