Merge pull request #903 from simdjson/jkeiser/dom-parser-implementation
Move parser state to implementation-specific class
This commit is contained in:
commit
ae6dddfff4
|
@ -84,7 +84,7 @@ struct json_stats {
|
||||||
bytes = json.size();
|
bytes = json.size();
|
||||||
blocks = bytes / BYTES_PER_BLOCK;
|
blocks = bytes / BYTES_PER_BLOCK;
|
||||||
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
|
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
|
||||||
structurals = parser.n_structural_indexes-1;
|
structurals = parser.implementation->n_structural_indexes-1;
|
||||||
|
|
||||||
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
|
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
|
||||||
bool last_block_has_utf8 = false;
|
bool last_block_has_utf8 = false;
|
||||||
|
@ -141,7 +141,7 @@ struct json_stats {
|
||||||
for (size_t block=0; block<blocks; block++) {
|
for (size_t block=0; block<blocks; block++) {
|
||||||
// Count structurals in the block
|
// Count structurals in the block
|
||||||
int block_structurals=0;
|
int block_structurals=0;
|
||||||
while (structural < parser.n_structural_indexes && parser.structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
|
while (structural < parser.implementation->n_structural_indexes && parser.implementation->structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
|
||||||
block_structurals++;
|
block_structurals++;
|
||||||
structural++;
|
structural++;
|
||||||
}
|
}
|
||||||
|
@ -320,7 +320,7 @@ struct benchmarker {
|
||||||
|
|
||||||
// Stage 1 (find structurals)
|
// Stage 1 (find structurals)
|
||||||
collector.start();
|
collector.start();
|
||||||
error = active_implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
|
error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), false);
|
||||||
event_count stage1_count = collector.end();
|
event_count stage1_count = collector.end();
|
||||||
stage1 << stage1_count;
|
stage1 << stage1_count;
|
||||||
if (error) {
|
if (error) {
|
||||||
|
@ -334,7 +334,7 @@ struct benchmarker {
|
||||||
} else {
|
} else {
|
||||||
event_count stage2_count;
|
event_count stage2_count;
|
||||||
collector.start();
|
collector.start();
|
||||||
error = active_implementation->stage2((const uint8_t *)json.data(), json.size(), parser);
|
error = parser.implementation->stage2(parser.doc);
|
||||||
if (error) {
|
if (error) {
|
||||||
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error));
|
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error));
|
||||||
}
|
}
|
||||||
|
@ -345,7 +345,7 @@ struct benchmarker {
|
||||||
// Calculate stats the first time we parse
|
// Calculate stats the first time we parse
|
||||||
if (stats == NULL) {
|
if (stats == NULL) {
|
||||||
if (stage1_only) { // we need stage 2 once
|
if (stage1_only) { // we need stage 2 once
|
||||||
error = active_implementation->stage2((const uint8_t *)json.data(), json.size(), parser);
|
error = parser.implementation->stage2(parser.doc);
|
||||||
if (error) {
|
if (error) {
|
||||||
printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
|
printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
|
||||||
}
|
}
|
||||||
|
|
|
@ -106,7 +106,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
|
||||||
answer.non_ascii_byte_count = count_nonasciibytes(
|
answer.non_ascii_byte_count = count_nonasciibytes(
|
||||||
reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
||||||
answer.byte_count = p.size();
|
answer.byte_count = p.size();
|
||||||
answer.structural_indexes_count = parser.n_structural_indexes;
|
answer.structural_indexes_count = parser.implementation->n_structural_indexes;
|
||||||
simdjson_recurse(answer, doc);
|
simdjson_recurse(answer, doc);
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,6 @@ int main(int argc, char *argv[]) {
|
||||||
s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
|
s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
simdjson::dom::parser parser;
|
simdjson::dom::parser parser;
|
||||||
const simdjson::implementation &stage_parser = *simdjson::active_implementation;
|
|
||||||
simdjson::error_code alloc_error = parser.allocate(p.size());
|
simdjson::error_code alloc_error = parser.allocate(p.size());
|
||||||
if (alloc_error) {
|
if (alloc_error) {
|
||||||
std::cerr << alloc_error << std::endl;
|
std::cerr << alloc_error << std::endl;
|
||||||
|
@ -181,14 +180,14 @@ int main(int argc, char *argv[]) {
|
||||||
for (uint32_t i = 0; i < iterations; i++) {
|
for (uint32_t i = 0; i < iterations; i++) {
|
||||||
unified.start();
|
unified.start();
|
||||||
// The default template is simdjson::architecture::NATIVE.
|
// The default template is simdjson::architecture::NATIVE.
|
||||||
bool isok = (stage_parser.stage1((const uint8_t *)p.data(), p.size(), parser, false) == simdjson::SUCCESS);
|
bool isok = (parser.implementation->stage1((const uint8_t *)p.data(), p.size(), false) == simdjson::SUCCESS);
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
|
|
||||||
cy1 += results[0];
|
cy1 += results[0];
|
||||||
cl1 += results[1];
|
cl1 += results[1];
|
||||||
|
|
||||||
unified.start();
|
unified.start();
|
||||||
isok = isok && (stage_parser.stage2((const uint8_t *)p.data(), p.size(), parser) == simdjson::SUCCESS);
|
isok = isok && (parser.implementation->stage2(parser.doc) == simdjson::SUCCESS);
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
|
|
||||||
cy2 += results[0];
|
cy2 += results[0];
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include "simdjson/common_defs.h"
|
#include "simdjson/common_defs.h"
|
||||||
#include "simdjson/dom/document.h"
|
#include "simdjson/dom/document.h"
|
||||||
#include "simdjson/error.h"
|
#include "simdjson/error.h"
|
||||||
|
#include "simdjson/internal/dom_parser_implementation.h"
|
||||||
#include "simdjson/internal/tape_ref.h"
|
#include "simdjson/internal/tape_ref.h"
|
||||||
#include "simdjson/minify.h"
|
#include "simdjson/minify.h"
|
||||||
#include "simdjson/padded_string.h"
|
#include "simdjson/padded_string.h"
|
||||||
|
@ -14,22 +15,6 @@
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
// expectation: sizeof(scope_descriptor) = 64/8.
|
|
||||||
struct scope_descriptor {
|
|
||||||
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
|
|
||||||
uint32_t count; // how many elements in the scope
|
|
||||||
}; // struct scope_descriptor
|
|
||||||
|
|
||||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
|
||||||
typedef void* ret_address;
|
|
||||||
#else
|
|
||||||
typedef char ret_address;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace internal
|
|
||||||
|
|
||||||
namespace dom {
|
namespace dom {
|
||||||
|
|
||||||
class document_stream;
|
class document_stream;
|
||||||
|
@ -67,14 +52,14 @@ public:
|
||||||
*
|
*
|
||||||
* @param other The parser to take. Its capacity is zeroed.
|
* @param other The parser to take. Its capacity is zeroed.
|
||||||
*/
|
*/
|
||||||
parser(parser &&other) = default;
|
really_inline parser(parser &&other) noexcept;
|
||||||
parser(const parser &) = delete; ///< @private Disallow copying
|
parser(const parser &) = delete; ///< @private Disallow copying
|
||||||
/**
|
/**
|
||||||
* Take another parser's buffers and state.
|
* Take another parser's buffers and state.
|
||||||
*
|
*
|
||||||
* @param other The parser to take. Its capacity is zeroed.
|
* @param other The parser to take. Its capacity is zeroed.
|
||||||
*/
|
*/
|
||||||
parser &operator=(parser &&other) = default;
|
really_inline parser &operator=(parser &&other) noexcept;
|
||||||
parser &operator=(const parser &) = delete; ///< @private Disallow copying
|
parser &operator=(const parser &) = delete; ///< @private Disallow copying
|
||||||
|
|
||||||
/** Deallocate the JSON parser. */
|
/** Deallocate the JSON parser. */
|
||||||
|
@ -334,7 +319,8 @@ public:
|
||||||
/**
|
/**
|
||||||
* Set max_capacity. This is the largest document this parser can automatically support.
|
* Set max_capacity. This is the largest document this parser can automatically support.
|
||||||
*
|
*
|
||||||
* The parser may reallocate internal buffers as needed up to this amount.
|
* The parser may reallocate internal buffers as needed up to this amount as documents are passed
|
||||||
|
* to it.
|
||||||
*
|
*
|
||||||
* This call will not allocate or deallocate, even if capacity is currently above max_capacity.
|
* This call will not allocate or deallocate, even if capacity is currently above max_capacity.
|
||||||
*
|
*
|
||||||
|
@ -347,19 +333,8 @@ public:
|
||||||
/** @private Use simdjson_error instead */
|
/** @private Use simdjson_error instead */
|
||||||
using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
|
using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
|
||||||
|
|
||||||
/** @private Next location to write to in the tape */
|
/** @private [for benchmarking access] The implementation to use */
|
||||||
uint32_t current_loc{0};
|
std::unique_ptr<internal::dom_parser_implementation> implementation{};
|
||||||
|
|
||||||
/** @private Number of structural indices passed from stage 1 to stage 2 */
|
|
||||||
uint32_t n_structural_indexes{0};
|
|
||||||
/** @private Structural indices passed from stage 1 to stage 2 */
|
|
||||||
std::unique_ptr<uint32_t[]> structural_indexes{};
|
|
||||||
|
|
||||||
/** @private Tape location of each open { or [ */
|
|
||||||
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
|
|
||||||
|
|
||||||
/** @private Return address of each open { or [ */
|
|
||||||
std::unique_ptr<internal::ret_address[]> ret_address{};
|
|
||||||
|
|
||||||
/** @private Use `if (parser.parse(...).error())` instead */
|
/** @private Use `if (parser.parse(...).error())` instead */
|
||||||
bool valid{false};
|
bool valid{false};
|
||||||
|
@ -399,20 +374,6 @@ private:
|
||||||
*/
|
*/
|
||||||
size_t _max_capacity;
|
size_t _max_capacity;
|
||||||
|
|
||||||
/**
|
|
||||||
* The maximum document length this parser supports.
|
|
||||||
*
|
|
||||||
* Buffers are large enough to handle any document up to this length.
|
|
||||||
*/
|
|
||||||
size_t _capacity{0};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The maximum depth (number of nested objects and arrays) supported by this parser.
|
|
||||||
*
|
|
||||||
* Defaults to DEFAULT_MAX_DEPTH.
|
|
||||||
*/
|
|
||||||
size_t _max_depth{0};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The loaded buffer (reused each time load() is called)
|
* The loaded buffer (reused each time load() is called)
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#define SIMDJSON_IMPLEMENTATION_H
|
#define SIMDJSON_IMPLEMENTATION_H
|
||||||
|
|
||||||
#include "simdjson/common_defs.h"
|
#include "simdjson/common_defs.h"
|
||||||
|
#include "simdjson/internal/dom_parser_implementation.h"
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -10,8 +11,8 @@
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
||||||
namespace dom {
|
namespace dom {
|
||||||
class parser;
|
class document;
|
||||||
}
|
} // namespace dom
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An implementation of simdjson for a particular CPU architecture.
|
* An implementation of simdjson for a particular CPU architecture.
|
||||||
|
@ -54,16 +55,19 @@ public:
|
||||||
/**
|
/**
|
||||||
* @private For internal implementation use
|
* @private For internal implementation use
|
||||||
*
|
*
|
||||||
* Run a full document parse (ensure_capacity, stage1 and stage2).
|
* const implementation *impl = simdjson::active_implementation;
|
||||||
|
* cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
|
||||||
*
|
*
|
||||||
* Overridden by each implementation.
|
* @param capacity The largest document that will be passed to the parser.
|
||||||
*
|
* @param max_depth The maximum JSON object/array nesting this parser is expected to handle.
|
||||||
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
* @param dst The place to put the resulting parser implementation.
|
||||||
* @param len the length of the json document.
|
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
|
||||||
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
||||||
* @return the error code, or SUCCESS if there was no error.
|
|
||||||
*/
|
*/
|
||||||
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0;
|
virtual error_code create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_depth,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation> &dst
|
||||||
|
) const noexcept = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @private For internal implementation use
|
* @private For internal implementation use
|
||||||
|
@ -80,50 +84,6 @@ public:
|
||||||
*/
|
*/
|
||||||
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
|
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
|
||||||
|
|
||||||
/**
|
|
||||||
* @private For internal implementation use
|
|
||||||
*
|
|
||||||
* Stage 1 of the document parser.
|
|
||||||
*
|
|
||||||
* Overridden by each implementation.
|
|
||||||
*
|
|
||||||
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
|
||||||
* @param len the length of the json document.
|
|
||||||
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
||||||
* @param streaming whether this is being called by parser::parse_many.
|
|
||||||
* @return the error code, or SUCCESS if there was no error.
|
|
||||||
*/
|
|
||||||
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) const noexcept = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @private For internal implementation use
|
|
||||||
*
|
|
||||||
* Stage 2 of the document parser.
|
|
||||||
*
|
|
||||||
* Overridden by each implementation.
|
|
||||||
*
|
|
||||||
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
|
||||||
* @param len the length of the json document.
|
|
||||||
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
||||||
* @return the error code, or SUCCESS if there was no error.
|
|
||||||
*/
|
|
||||||
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @private For internal implementation use
|
|
||||||
*
|
|
||||||
* Stage 2 of the document parser for parser::parse_many.
|
|
||||||
*
|
|
||||||
* Overridden by each implementation.
|
|
||||||
*
|
|
||||||
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
|
||||||
* @param len the length of the json document.
|
|
||||||
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
||||||
* @param next_json the next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time.
|
|
||||||
* @return the error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again.
|
|
||||||
*/
|
|
||||||
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) const noexcept = 0;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/** @private Construct an implementation with the given name and description. For subclasses. */
|
/** @private Construct an implementation with the given name and description. For subclasses. */
|
||||||
really_inline implementation(
|
really_inline implementation(
|
||||||
|
|
|
@ -34,18 +34,18 @@ namespace internal {
|
||||||
* */
|
* */
|
||||||
inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) {
|
inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) {
|
||||||
// this function can be generally useful
|
// this function can be generally useful
|
||||||
if (parser.n_structural_indexes == 0)
|
if (parser.implementation->n_structural_indexes == 0)
|
||||||
return 0;
|
return 0;
|
||||||
auto last_i = parser.n_structural_indexes - 1;
|
auto last_i = parser.implementation->n_structural_indexes - 1;
|
||||||
if (parser.structural_indexes[last_i] == size) {
|
if (parser.implementation->structural_indexes[last_i] == size) {
|
||||||
if (last_i == 0)
|
if (last_i == 0)
|
||||||
return 0;
|
return 0;
|
||||||
last_i = parser.n_structural_indexes - 2;
|
last_i = parser.implementation->n_structural_indexes - 2;
|
||||||
}
|
}
|
||||||
auto arr_cnt = 0;
|
auto arr_cnt = 0;
|
||||||
auto obj_cnt = 0;
|
auto obj_cnt = 0;
|
||||||
for (auto i = last_i; i > 0; i--) {
|
for (auto i = last_i; i > 0; i--) {
|
||||||
auto idxb = parser.structural_indexes[i];
|
auto idxb = parser.implementation->structural_indexes[i];
|
||||||
switch (buf[idxb]) {
|
switch (buf[idxb]) {
|
||||||
case ':':
|
case ':':
|
||||||
case ',':
|
case ',':
|
||||||
|
@ -63,7 +63,7 @@ inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const do
|
||||||
arr_cnt++;
|
arr_cnt++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto idxa = parser.structural_indexes[i - 1];
|
auto idxa = parser.implementation->structural_indexes[i - 1];
|
||||||
switch (buf[idxa]) {
|
switch (buf[idxa]) {
|
||||||
case '{':
|
case '{':
|
||||||
case '[':
|
case '[':
|
||||||
|
@ -172,17 +172,17 @@ inline error_code document_stream::json_parse() noexcept {
|
||||||
if (_batch_size == 0) {
|
if (_batch_size == 0) {
|
||||||
return simdjson::UTF8_ERROR;
|
return simdjson::UTF8_ERROR;
|
||||||
}
|
}
|
||||||
auto stage1_is_ok = error_code(simdjson::active_implementation->stage1(buf(), _batch_size, parser, true));
|
auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, true));
|
||||||
if (stage1_is_ok != simdjson::SUCCESS) {
|
if (stage1_is_ok != simdjson::SUCCESS) {
|
||||||
return stage1_is_ok;
|
return stage1_is_ok;
|
||||||
}
|
}
|
||||||
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
||||||
if (last_index == 0) {
|
if (last_index == 0) {
|
||||||
if (parser.n_structural_indexes == 0) {
|
if (parser.implementation->n_structural_indexes == 0) {
|
||||||
return simdjson::EMPTY;
|
return simdjson::EMPTY;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
parser.n_structural_indexes = last_index + 1;
|
parser.implementation->n_structural_indexes = last_index + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the second thread is running or done.
|
// the second thread is running or done.
|
||||||
|
@ -191,15 +191,15 @@ inline error_code document_stream::json_parse() noexcept {
|
||||||
if (stage1_is_ok_thread != simdjson::SUCCESS) {
|
if (stage1_is_ok_thread != simdjson::SUCCESS) {
|
||||||
return stage1_is_ok_thread;
|
return stage1_is_ok_thread;
|
||||||
}
|
}
|
||||||
std::swap(parser.structural_indexes, parser_thread.structural_indexes);
|
std::swap(parser.implementation->structural_indexes, parser_thread.implementation->structural_indexes);
|
||||||
parser.n_structural_indexes = parser_thread.n_structural_indexes;
|
parser.implementation->n_structural_indexes = parser_thread.implementation->n_structural_indexes;
|
||||||
advance(last_json_buffer_loc);
|
advance(last_json_buffer_loc);
|
||||||
n_bytes_parsed += last_json_buffer_loc;
|
n_bytes_parsed += last_json_buffer_loc;
|
||||||
}
|
}
|
||||||
// let us decide whether we will start a new thread
|
// let us decide whether we will start a new thread
|
||||||
if (remaining() - _batch_size > 0) {
|
if (remaining() - _batch_size > 0) {
|
||||||
last_json_buffer_loc =
|
last_json_buffer_loc =
|
||||||
parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
|
parser.implementation->structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
|
||||||
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
|
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
|
||||||
if (_batch_size > 0) {
|
if (_batch_size > 0) {
|
||||||
_batch_size = internal::trimmed_length_safe_utf8(
|
_batch_size = internal::trimmed_length_safe_utf8(
|
||||||
|
@ -214,22 +214,22 @@ inline error_code document_stream::json_parse() noexcept {
|
||||||
// this->stage1_is_ok_thread
|
// this->stage1_is_ok_thread
|
||||||
// there is only one thread that may write to this value
|
// there is only one thread that may write to this value
|
||||||
stage_1_thread = std::thread([this, b, bs] {
|
stage_1_thread = std::thread([this, b, bs] {
|
||||||
this->stage1_is_ok_thread = error_code(simdjson::active_implementation->stage1(b, bs, this->parser_thread, true));
|
this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, true));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
next_json = 0;
|
next_json = 0;
|
||||||
load_next_batch = false;
|
load_next_batch = false;
|
||||||
} // load_next_batch
|
} // load_next_batch
|
||||||
error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
|
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
|
||||||
if (res == simdjson::SUCCESS_AND_HAS_MORE) {
|
if (res == simdjson::SUCCESS_AND_HAS_MORE) {
|
||||||
n_parsed_docs++;
|
n_parsed_docs++;
|
||||||
current_buffer_loc = parser.structural_indexes[next_json];
|
current_buffer_loc = parser.implementation->structural_indexes[next_json];
|
||||||
load_next_batch = (current_buffer_loc == last_json_buffer_loc);
|
load_next_batch = (current_buffer_loc == last_json_buffer_loc);
|
||||||
} else if (res == simdjson::SUCCESS) {
|
} else if (res == simdjson::SUCCESS) {
|
||||||
n_parsed_docs++;
|
n_parsed_docs++;
|
||||||
if (remaining() > _batch_size) {
|
if (remaining() > _batch_size) {
|
||||||
current_buffer_loc = parser.structural_indexes[next_json - 1];
|
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
|
||||||
load_next_batch = true;
|
load_next_batch = true;
|
||||||
res = simdjson::SUCCESS_AND_HAS_MORE;
|
res = simdjson::SUCCESS_AND_HAS_MORE;
|
||||||
}
|
}
|
||||||
|
@ -249,28 +249,28 @@ inline error_code document_stream::json_parse() noexcept {
|
||||||
n_bytes_parsed += current_buffer_loc;
|
n_bytes_parsed += current_buffer_loc;
|
||||||
_batch_size = (std::min)(_batch_size, remaining());
|
_batch_size = (std::min)(_batch_size, remaining());
|
||||||
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
|
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
|
||||||
auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true);
|
auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, true);
|
||||||
if (stage1_is_ok != simdjson::SUCCESS) {
|
if (stage1_is_ok != simdjson::SUCCESS) {
|
||||||
return stage1_is_ok;
|
return stage1_is_ok;
|
||||||
}
|
}
|
||||||
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
||||||
if (last_index == 0) {
|
if (last_index == 0) {
|
||||||
if (parser.n_structural_indexes == 0) {
|
if (parser.implementation->n_structural_indexes == 0) {
|
||||||
return EMPTY;
|
return EMPTY;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
parser.n_structural_indexes = last_index + 1;
|
parser.implementation->n_structural_indexes = last_index + 1;
|
||||||
}
|
}
|
||||||
load_next_batch = false;
|
load_next_batch = false;
|
||||||
} // load_next_batch
|
} // load_next_batch
|
||||||
error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
|
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
|
||||||
if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
|
if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
|
||||||
n_parsed_docs++;
|
n_parsed_docs++;
|
||||||
current_buffer_loc = parser.structural_indexes[next_json];
|
current_buffer_loc = parser.implementation->structural_indexes[next_json];
|
||||||
} else if (res == simdjson::SUCCESS) {
|
} else if (res == simdjson::SUCCESS) {
|
||||||
n_parsed_docs++;
|
n_parsed_docs++;
|
||||||
if (remaining() > _batch_size) {
|
if (remaining() > _batch_size) {
|
||||||
current_buffer_loc = parser.structural_indexes[next_json - 1];
|
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
|
||||||
next_json = 1;
|
next_json = 1;
|
||||||
load_next_batch = true;
|
load_next_batch = true;
|
||||||
res = simdjson::SUCCESS_AND_HAS_MORE;
|
res = simdjson::SUCCESS_AND_HAS_MORE;
|
||||||
|
|
|
@ -17,8 +17,11 @@ namespace dom {
|
||||||
//
|
//
|
||||||
really_inline parser::parser(size_t max_capacity) noexcept
|
really_inline parser::parser(size_t max_capacity) noexcept
|
||||||
: _max_capacity{max_capacity},
|
: _max_capacity{max_capacity},
|
||||||
loaded_bytes(nullptr, &aligned_free_char)
|
loaded_bytes(nullptr, &aligned_free_char) {
|
||||||
{}
|
}
|
||||||
|
really_inline parser::parser(parser &&other) noexcept = default;
|
||||||
|
really_inline parser &parser::operator=(parser &&other) noexcept = default;
|
||||||
|
|
||||||
inline bool parser::is_valid() const noexcept { return valid; }
|
inline bool parser::is_valid() const noexcept { return valid; }
|
||||||
inline int parser::get_error_code() const noexcept { return error; }
|
inline int parser::get_error_code() const noexcept { return error; }
|
||||||
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
|
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
|
||||||
|
@ -101,15 +104,12 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
|
||||||
memcpy((void *)buf, tmp_buf, len);
|
memcpy((void *)buf, tmp_buf, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
code = simdjson::active_implementation->parse(buf, len, *this);
|
code = implementation->parse(buf, len, doc);
|
||||||
if (realloc_if_needed) {
|
if (realloc_if_needed) {
|
||||||
aligned_free((void *)buf); // must free before we exit
|
aligned_free((void *)buf); // must free before we exit
|
||||||
}
|
}
|
||||||
if (code) { return code; }
|
if (code) { return code; }
|
||||||
|
|
||||||
// We're indicating validity via the simdjson_result<element>, so set the parse state back to invalid
|
|
||||||
valid = false;
|
|
||||||
error = UNINITIALIZED;
|
|
||||||
return doc.root();
|
return doc.root();
|
||||||
}
|
}
|
||||||
really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
|
really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
|
||||||
|
@ -136,81 +136,30 @@ inline document_stream parser::parse_many(const padded_string &s, size_t batch_s
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline size_t parser::capacity() const noexcept {
|
really_inline size_t parser::capacity() const noexcept {
|
||||||
return _capacity;
|
return implementation ? implementation->capacity() : 0;
|
||||||
}
|
}
|
||||||
really_inline size_t parser::max_capacity() const noexcept {
|
really_inline size_t parser::max_capacity() const noexcept {
|
||||||
return _max_capacity;
|
return _max_capacity;
|
||||||
}
|
}
|
||||||
really_inline size_t parser::max_depth() const noexcept {
|
really_inline size_t parser::max_depth() const noexcept {
|
||||||
return _max_depth;
|
return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
|
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
|
||||||
//
|
//
|
||||||
// If capacity has changed, reallocate capacity-based buffers
|
// Reallocate implementation and document if needed
|
||||||
//
|
//
|
||||||
if (_capacity != capacity) {
|
error_code err;
|
||||||
// Set capacity to 0 until we finish, in case there's an error
|
if (implementation) {
|
||||||
_capacity = 0;
|
err = implementation->allocate(capacity, max_depth);
|
||||||
|
} else {
|
||||||
//
|
err = simdjson::active_implementation->create_dom_parser_implementation(capacity, max_depth, implementation);
|
||||||
// Reallocate the document
|
|
||||||
//
|
|
||||||
error_code err = doc.allocate(capacity);
|
|
||||||
if (err) { return err; }
|
|
||||||
|
|
||||||
//
|
|
||||||
// Don't allocate 0 bytes, just return.
|
|
||||||
//
|
|
||||||
if (capacity == 0) {
|
|
||||||
structural_indexes.reset();
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Initialize stage 1 output
|
|
||||||
//
|
|
||||||
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
|
|
||||||
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc
|
|
||||||
if (!structural_indexes) {
|
|
||||||
return MEMALLOC;
|
|
||||||
}
|
|
||||||
|
|
||||||
_capacity = capacity;
|
|
||||||
|
|
||||||
//
|
|
||||||
// If capacity hasn't changed, but the document was taken, allocate a new document.
|
|
||||||
//
|
|
||||||
} else if (!doc.tape) {
|
|
||||||
error_code err = doc.allocate(capacity);
|
|
||||||
if (err) { return err; }
|
|
||||||
}
|
}
|
||||||
|
if (err) { return err; }
|
||||||
|
|
||||||
//
|
if (implementation->capacity() != capacity || !doc.tape) {
|
||||||
// If max_depth has changed, reallocate those buffers
|
return doc.allocate(capacity);
|
||||||
//
|
|
||||||
if (max_depth != _max_depth) {
|
|
||||||
_max_depth = 0;
|
|
||||||
|
|
||||||
if (max_depth == 0) {
|
|
||||||
ret_address.reset();
|
|
||||||
containing_scope.reset();
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Initialize stage 2 state
|
|
||||||
//
|
|
||||||
containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]); // TODO realloc
|
|
||||||
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
|
|
||||||
|
|
||||||
if (!ret_address || !containing_scope) {
|
|
||||||
// Could not allocate memory
|
|
||||||
return MEMALLOC;
|
|
||||||
}
|
|
||||||
|
|
||||||
_max_depth = max_depth;
|
|
||||||
}
|
}
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -220,24 +169,24 @@ inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcep
|
||||||
return !allocate(capacity, max_depth);
|
return !allocate(capacity, max_depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
|
|
||||||
_max_capacity = max_capacity;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
|
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
|
||||||
// If we don't have enough capacity, (try to) automatically bump it.
|
// If we don't have enough capacity, (try to) automatically bump it.
|
||||||
// If the document was taken, reallocate that too.
|
// If the document was taken, reallocate that too.
|
||||||
// Both in one if statement to minimize unlikely branching.
|
// Both in one if statement to minimize unlikely branching.
|
||||||
if (unlikely(desired_capacity > capacity() || !doc.tape)) {
|
if (unlikely(capacity() < desired_capacity || !doc.tape)) {
|
||||||
if (desired_capacity > max_capacity()) {
|
if (desired_capacity > max_capacity()) {
|
||||||
return error = CAPACITY;
|
return error = CAPACITY;
|
||||||
}
|
}
|
||||||
return allocate(desired_capacity, _max_depth > 0 ? _max_depth : DEFAULT_MAX_DEPTH);
|
return allocate(desired_capacity, max_depth());
|
||||||
}
|
}
|
||||||
|
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
|
||||||
|
_max_capacity = max_capacity;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace dom
|
} // namespace dom
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,185 @@
|
||||||
|
#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
|
||||||
|
#include "simdjson/common_defs.h"
|
||||||
|
#include "simdjson/error.h"
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
|
||||||
|
namespace dom {
|
||||||
|
class document;
|
||||||
|
} // namespace dom
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of simdjson's DOM parser for a particular CPU architecture.
|
||||||
|
*
|
||||||
|
* This class is expected to be accessed only by pointer, and never move in memory (though the
|
||||||
|
* pointer can move).
|
||||||
|
*/
|
||||||
|
class dom_parser_implementation {
|
||||||
|
public:
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private For internal implementation use
|
||||||
|
*
|
||||||
|
* Run a full JSON parse on a single document (stage1 + stage2).
|
||||||
|
*
|
||||||
|
* Guaranteed only to be called when capacity > document length.
|
||||||
|
*
|
||||||
|
* Overridden by each implementation.
|
||||||
|
*
|
||||||
|
* @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
||||||
|
* @param len The length of the json document.
|
||||||
|
* @return The error code, or SUCCESS if there was no error.
|
||||||
|
*/
|
||||||
|
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private For internal implementation use
|
||||||
|
*
|
||||||
|
* Stage 1 of the document parser.
|
||||||
|
*
|
||||||
|
* Guaranteed only to be called when capacity > document length.
|
||||||
|
*
|
||||||
|
* Overridden by each implementation.
|
||||||
|
*
|
||||||
|
* @param buf The json document to parse.
|
||||||
|
* @param len The length of the json document.
|
||||||
|
* @param streaming Whether this is being called by parser::parse_many.
|
||||||
|
* @return The error code, or SUCCESS if there was no error.
|
||||||
|
*/
|
||||||
|
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private For internal implementation use
|
||||||
|
*
|
||||||
|
* Stage 2 of the document parser.
|
||||||
|
*
|
||||||
|
* Called after stage1().
|
||||||
|
*
|
||||||
|
* Overridden by each implementation.
|
||||||
|
*
|
||||||
|
* @param doc The document to output to.
|
||||||
|
* @return The error code, or SUCCESS if there was no error.
|
||||||
|
*/
|
||||||
|
WARN_UNUSED virtual error_code stage2(dom::document &doc) noexcept = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private For internal implementation use
|
||||||
|
*
|
||||||
|
* Stage 2 of the document parser for parser::parse_many.
|
||||||
|
*
|
||||||
|
* Guaranteed only to be called after stage1(), with buf and len being a subset of the total stage1 buf/len.
|
||||||
|
* Overridden by each implementation.
|
||||||
|
*
|
||||||
|
* @param buf The json document to parse.
|
||||||
|
* @param len The length of the json document.
|
||||||
|
* @param doc The document to output to.
|
||||||
|
* @param next_json The next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time.
|
||||||
|
* @return The error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again.
|
||||||
|
*/
|
||||||
|
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Change the capacity of this parser.
|
||||||
|
*
|
||||||
|
* Generally used for reallocation.
|
||||||
|
*
|
||||||
|
* @param capacity The new capacity.
|
||||||
|
* @param max_depth The new max_depth.
|
||||||
|
* @return The error code, or SUCCESS if there was no error.
|
||||||
|
*/
|
||||||
|
virtual error_code set_capacity(size_t capacity) noexcept = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Change the max depth of this parser.
|
||||||
|
*
|
||||||
|
* Generally used for reallocation.
|
||||||
|
*
|
||||||
|
* @param capacity The new capacity.
|
||||||
|
* @param max_depth The new max_depth.
|
||||||
|
* @return The error code, or SUCCESS if there was no error.
|
||||||
|
*/
|
||||||
|
virtual error_code set_max_depth(size_t max_depth) noexcept = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deallocate this parser.
|
||||||
|
*/
|
||||||
|
virtual ~dom_parser_implementation() = default;
|
||||||
|
|
||||||
|
/** Next location to write to in the tape */
|
||||||
|
uint32_t current_loc{0};
|
||||||
|
|
||||||
|
/** Number of structural indices passed from stage 1 to stage 2 */
|
||||||
|
uint32_t n_structural_indexes{0};
|
||||||
|
/** Structural indices passed from stage 1 to stage 2 */
|
||||||
|
std::unique_ptr<uint32_t[]> structural_indexes{};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The largest document this parser can support without reallocating.
|
||||||
|
*
|
||||||
|
* @return Current capacity, in bytes.
|
||||||
|
*/
|
||||||
|
really_inline size_t capacity() const noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum level of nested object and arrays supported by this parser.
|
||||||
|
*
|
||||||
|
* @return Maximum depth, in bytes.
|
||||||
|
*/
|
||||||
|
really_inline size_t max_depth() const noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
|
||||||
|
* and `max_depth` depth.
|
||||||
|
*
|
||||||
|
* @param capacity The new capacity.
|
||||||
|
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
|
||||||
|
* @return The error, if there is one.
|
||||||
|
*/
|
||||||
|
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* The maximum document length this parser supports.
|
||||||
|
*
|
||||||
|
* Buffers are large enough to handle any document up to this length.
|
||||||
|
*/
|
||||||
|
size_t _capacity{0};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum depth (number of nested objects and arrays) supported by this parser.
|
||||||
|
*
|
||||||
|
* Defaults to DEFAULT_MAX_DEPTH.
|
||||||
|
*/
|
||||||
|
size_t _max_depth{0};
|
||||||
|
}; // class dom_parser_implementation
|
||||||
|
|
||||||
|
really_inline size_t dom_parser_implementation::capacity() const noexcept {
|
||||||
|
return _capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
really_inline size_t dom_parser_implementation::max_depth() const noexcept {
|
||||||
|
return _max_depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
WARN_UNUSED
|
||||||
|
inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
|
||||||
|
if (this->max_depth() != max_depth) {
|
||||||
|
error_code err = set_max_depth(max_depth);
|
||||||
|
if (err) { return err; }
|
||||||
|
}
|
||||||
|
if (_capacity != capacity) {
|
||||||
|
error_code err = set_capacity(capacity);
|
||||||
|
if (err) { return err; }
|
||||||
|
}
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
|
|
@ -1,2 +0,0 @@
|
||||||
Try :
|
|
||||||
c++ -O3 -std=c++17 -pthread -o amalgamate_demo amalgamate_demo.cpp && ./amalgamate_demo ../jsonexamples/twitter.json ../jsonexamples/amazon_cellphones.ndjson
|
|
|
@ -1,42 +0,0 @@
|
||||||
/* auto-generated on Thu 21 May 2020 14:01:15 EDT. Do not edit! */
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include "simdjson.h"
|
|
||||||
#include "simdjson.cpp"
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
if(argc < 2) {
|
|
||||||
std::cerr << "Please specify at least one file name. " << std::endl;
|
|
||||||
}
|
|
||||||
const char * filename = argv[1];
|
|
||||||
simdjson::dom::parser parser;
|
|
||||||
simdjson::error_code error;
|
|
||||||
UNUSED simdjson::dom::element elem;
|
|
||||||
parser.load(filename).tie(elem, error); // do the parsing
|
|
||||||
if (error) {
|
|
||||||
std::cout << "parse failed" << std::endl;
|
|
||||||
std::cout << "error code: " << error << std::endl;
|
|
||||||
std::cout << error << std::endl;
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
} else {
|
|
||||||
std::cout << "parse valid" << std::endl;
|
|
||||||
}
|
|
||||||
if(argc == 2) {
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// parse_many
|
|
||||||
const char * filename2 = argv[2];
|
|
||||||
for (auto result : parser.load_many(filename2)) {
|
|
||||||
error = result.error();
|
|
||||||
}
|
|
||||||
if (error) {
|
|
||||||
std::cout << "parse_many failed" << std::endl;
|
|
||||||
std::cout << "error code: " << error << std::endl;
|
|
||||||
std::cout << error << std::endl;
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
} else {
|
|
||||||
std::cout << "parse_many valid" << std::endl;
|
|
||||||
}
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,13 @@
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
#include "arm64/implementation.h"
|
||||||
|
#include "arm64/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 1
|
||||||
|
//
|
||||||
#include "arm64/bitmask.h"
|
#include "arm64/bitmask.h"
|
||||||
#include "arm64/simd.h"
|
#include "arm64/simd.h"
|
||||||
#include "arm64/bitmanipulation.h"
|
#include "arm64/bitmanipulation.h"
|
||||||
#include "arm64/implementation.h"
|
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
namespace arm64 {
|
namespace arm64 {
|
||||||
|
@ -79,8 +84,35 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
|
||||||
|
|
||||||
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
||||||
#include "generic/stage1/json_structural_indexer.h"
|
#include "generic/stage1/json_structural_indexer.h"
|
||||||
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
|
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||||
return arm64::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
|
this->buf = _buf;
|
||||||
|
this->len = _len;
|
||||||
|
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace arm64
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 2
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "arm64/stringparsing.h"
|
||||||
|
#include "arm64/numberparsing.h"
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace arm64 {
|
||||||
|
|
||||||
|
#include "generic/stage2/logger.h"
|
||||||
|
#include "generic/stage2/atomparsing.h"
|
||||||
|
#include "generic/stage2/structural_iterator.h"
|
||||||
|
#include "generic/stage2/structural_parser.h"
|
||||||
|
#include "generic/stage2/streaming_structural_parser.h"
|
||||||
|
|
||||||
|
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||||
|
error_code err = stage1(_buf, _len, false);
|
||||||
|
if (err) { return err; }
|
||||||
|
return stage2(_doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace arm64
|
} // namespace arm64
|
|
@ -0,0 +1,15 @@
|
||||||
|
#ifndef SIMDJSON_ARM64_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
#define SIMDJSON_ARM64_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "isadetection.h"
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace arm64 {
|
||||||
|
|
||||||
|
#include "generic/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
} // namespace arm64
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
#endif // SIMDJSON_ARM64_DOM_PARSER_IMPLEMENTATION_H
|
|
@ -0,0 +1,25 @@
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "arm64/implementation.h"
|
||||||
|
#include "arm64/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace arm64 {
|
||||||
|
|
||||||
|
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_depth,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept {
|
||||||
|
dst.reset( new (std::nothrow) dom_parser_implementation() );
|
||||||
|
if (!dst) { return MEMALLOC; }
|
||||||
|
dst->set_capacity(capacity);
|
||||||
|
dst->set_max_depth(max_depth);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace arm64
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
UNTARGET_REGION
|
|
@ -12,11 +12,12 @@ using namespace simdjson::dom;
|
||||||
class implementation final : public simdjson::implementation {
|
class implementation final : public simdjson::implementation {
|
||||||
public:
|
public:
|
||||||
really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {}
|
really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {}
|
||||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
WARN_UNUSED error_code create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_length,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept final;
|
||||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace arm64
|
} // namespace arm64
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
#ifndef SIMDJSON_ARM64_STAGE2_H
|
|
||||||
#define SIMDJSON_ARM64_STAGE2_H
|
|
||||||
|
|
||||||
#include "simdjson.h"
|
|
||||||
#include "arm64/implementation.h"
|
|
||||||
#include "arm64/stringparsing.h"
|
|
||||||
#include "arm64/numberparsing.h"
|
|
||||||
|
|
||||||
namespace simdjson {
|
|
||||||
namespace arm64 {
|
|
||||||
|
|
||||||
#include "generic/stage2/logger.h"
|
|
||||||
#include "generic/stage2/atomparsing.h"
|
|
||||||
#include "generic/stage2/structural_iterator.h"
|
|
||||||
#include "generic/stage2/structural_parser.h"
|
|
||||||
#include "generic/stage2/streaming_structural_parser.h"
|
|
||||||
|
|
||||||
} // namespace arm64
|
|
||||||
} // namespace simdjson
|
|
||||||
|
|
||||||
#endif // SIMDJSON_ARM64_STAGE2_H
|
|
|
@ -1,6 +1,10 @@
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
#include "fallback/implementation.h"
|
#include "fallback/implementation.h"
|
||||||
|
#include "fallback/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 1
|
||||||
|
//
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
namespace fallback {
|
namespace fallback {
|
||||||
namespace stage1 {
|
namespace stage1 {
|
||||||
|
@ -8,8 +12,13 @@ namespace stage1 {
|
||||||
class structural_scanner {
|
class structural_scanner {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
really_inline structural_scanner(const uint8_t *_buf, uint32_t _len, parser &_doc_parser, bool _streaming)
|
really_inline structural_scanner(dom_parser_implementation &_parser, bool _streaming)
|
||||||
: buf{_buf}, next_structural_index{_doc_parser.structural_indexes.get()}, doc_parser{_doc_parser}, idx{0}, len{_len}, error{SUCCESS}, streaming{_streaming} {}
|
: buf{_parser.buf},
|
||||||
|
next_structural_index{_parser.structural_indexes.get()},
|
||||||
|
parser{_parser},
|
||||||
|
len{static_cast<uint32_t>(_parser.len)},
|
||||||
|
streaming{_streaming} {
|
||||||
|
}
|
||||||
|
|
||||||
really_inline void add_structural() {
|
really_inline void add_structural() {
|
||||||
*next_structural_index = idx;
|
*next_structural_index = idx;
|
||||||
|
@ -119,33 +128,32 @@ really_inline error_code scan() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (unlikely(next_structural_index == doc_parser.structural_indexes.get())) {
|
if (unlikely(next_structural_index == parser.structural_indexes.get())) {
|
||||||
return EMPTY;
|
return EMPTY;
|
||||||
}
|
}
|
||||||
*next_structural_index = len;
|
*next_structural_index = len;
|
||||||
next_structural_index++;
|
next_structural_index++;
|
||||||
doc_parser.n_structural_indexes = uint32_t(next_structural_index - doc_parser.structural_indexes.get());
|
parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get());
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const uint8_t *buf;
|
const uint8_t *buf;
|
||||||
uint32_t *next_structural_index;
|
uint32_t *next_structural_index;
|
||||||
parser &doc_parser;
|
dom_parser_implementation &parser;
|
||||||
uint32_t idx;
|
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
error_code error;
|
uint32_t idx{0};
|
||||||
|
error_code error{SUCCESS};
|
||||||
bool streaming;
|
bool streaming;
|
||||||
}; // structural_scanner
|
}; // structural_scanner
|
||||||
|
|
||||||
} // namespace stage1
|
} // namespace stage1
|
||||||
|
|
||||||
|
|
||||||
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
|
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||||
if (unlikely(len > parser.capacity())) {
|
this->buf = _buf;
|
||||||
return CAPACITY;
|
this->len = _len;
|
||||||
}
|
stage1::structural_scanner scanner(*this, streaming);
|
||||||
stage1::structural_scanner scanner(buf, uint32_t(len), parser, streaming);
|
|
||||||
return scanner.scan();
|
return scanner.scan();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,3 +215,27 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
|
||||||
|
|
||||||
} // namespace fallback
|
} // namespace fallback
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 2
|
||||||
|
//
|
||||||
|
#include "fallback/stringparsing.h"
|
||||||
|
#include "fallback/numberparsing.h"
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace fallback {
|
||||||
|
|
||||||
|
#include "generic/stage2/logger.h"
|
||||||
|
#include "generic/stage2/atomparsing.h"
|
||||||
|
#include "generic/stage2/structural_iterator.h"
|
||||||
|
#include "generic/stage2/structural_parser.h"
|
||||||
|
#include "generic/stage2/streaming_structural_parser.h"
|
||||||
|
|
||||||
|
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||||
|
error_code err = stage1(_buf, _len, false);
|
||||||
|
if (err) { return err; }
|
||||||
|
return stage2(_doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fallback
|
||||||
|
} // namespace simdjson
|
|
@ -0,0 +1,15 @@
|
||||||
|
#ifndef SIMDJSON_FALLBACK_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
#define SIMDJSON_FALLBACK_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "isadetection.h"
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace fallback {
|
||||||
|
|
||||||
|
#include "generic/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
} // namespace fallback
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
#endif // SIMDJSON_FALLBACK_DOM_PARSER_IMPLEMENTATION_H
|
|
@ -0,0 +1,25 @@
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "fallback/implementation.h"
|
||||||
|
#include "fallback/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace fallback {
|
||||||
|
|
||||||
|
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_depth,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept {
|
||||||
|
dst.reset( new (std::nothrow) dom_parser_implementation() );
|
||||||
|
if (!dst) { return MEMALLOC; }
|
||||||
|
dst->set_capacity(capacity);
|
||||||
|
dst->set_max_depth(max_depth);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fallback
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
UNTARGET_REGION
|
|
@ -16,11 +16,12 @@ public:
|
||||||
"Generic fallback implementation",
|
"Generic fallback implementation",
|
||||||
0
|
0
|
||||||
) {}
|
) {}
|
||||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
WARN_UNUSED error_code create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_length,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept final;
|
||||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace fallback
|
} // namespace fallback
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
#include "simdjson.h"
|
|
||||||
|
|
||||||
#include "fallback/implementation.h"
|
|
||||||
#include "fallback/stringparsing.h"
|
|
||||||
#include "fallback/numberparsing.h"
|
|
||||||
|
|
||||||
namespace simdjson {
|
|
||||||
namespace fallback {
|
|
||||||
|
|
||||||
#include "generic/stage2/logger.h"
|
|
||||||
#include "generic/stage2/atomparsing.h"
|
|
||||||
#include "generic/stage2/structural_iterator.h"
|
|
||||||
#include "generic/stage2/structural_parser.h"
|
|
||||||
#include "generic/stage2/streaming_structural_parser.h"
|
|
||||||
|
|
||||||
} // namespace fallback
|
|
||||||
} // namespace simdjson
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
// expectation: sizeof(scope_descriptor) = 64/8.
|
||||||
|
struct scope_descriptor {
|
||||||
|
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
|
||||||
|
uint32_t count; // how many elements in the scope
|
||||||
|
}; // struct scope_descriptor
|
||||||
|
|
||||||
|
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||||
|
typedef void* ret_address_t;
|
||||||
|
#else
|
||||||
|
typedef char ret_address_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
class dom_parser_implementation final : public internal::dom_parser_implementation {
|
||||||
|
public:
|
||||||
|
/** Tape location of each open { or [ */
|
||||||
|
std::unique_ptr<scope_descriptor[]> containing_scope{};
|
||||||
|
/** Return address of each open { or [ */
|
||||||
|
std::unique_ptr<ret_address_t[]> ret_address{};
|
||||||
|
/** Buffer passed to stage 1 */
|
||||||
|
const uint8_t *buf{};
|
||||||
|
/** Length passed to stage 1 */
|
||||||
|
size_t len{0};
|
||||||
|
/** Document passed to stage 2 */
|
||||||
|
dom::document *doc{};
|
||||||
|
/** Error code (TODO remove, this is not even used, we just set it so the g++ optimizer doesn't get confused) */
|
||||||
|
error_code error{UNINITIALIZED};
|
||||||
|
|
||||||
|
really_inline dom_parser_implementation();
|
||||||
|
dom_parser_implementation(const dom_parser_implementation &) = delete;
|
||||||
|
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
|
||||||
|
|
||||||
|
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
|
||||||
|
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
|
||||||
|
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
|
||||||
|
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
|
||||||
|
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
|
||||||
|
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "generic/stage1/allocate.h"
|
||||||
|
#include "generic/stage2/allocate.h"
|
||||||
|
|
||||||
|
really_inline dom_parser_implementation::dom_parser_implementation() {}
|
||||||
|
|
||||||
|
// Leaving these here so they can be inlined if so desired
|
||||||
|
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
|
||||||
|
error_code err = stage1::allocate::set_capacity(*this, capacity);
|
||||||
|
if (err) { _capacity = 0; return err; }
|
||||||
|
_capacity = capacity;
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
|
||||||
|
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
|
||||||
|
if (err) { _max_depth = 0; return err; }
|
||||||
|
_max_depth = max_depth;
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
namespace stage1 {
|
||||||
|
namespace allocate {
|
||||||
|
|
||||||
|
//
|
||||||
|
// Allocates stage 1 internal state and outputs in the parser
|
||||||
|
//
|
||||||
|
really_inline error_code set_capacity(internal::dom_parser_implementation &parser, size_t capacity) {
|
||||||
|
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
|
||||||
|
parser.structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
|
||||||
|
if (!parser.structural_indexes) { return MEMALLOC; }
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace allocate
|
||||||
|
} // namespace stage1
|
|
@ -58,7 +58,7 @@ public:
|
||||||
class json_structural_indexer {
|
class json_structural_indexer {
|
||||||
public:
|
public:
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
static error_code index(const uint8_t *buf, size_t len, parser &parser, bool streaming) noexcept;
|
static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
really_inline json_structural_indexer(uint32_t *structural_indexes)
|
really_inline json_structural_indexer(uint32_t *structural_indexes)
|
||||||
|
@ -66,7 +66,7 @@ private:
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
|
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
|
||||||
really_inline error_code finish(parser &parser, size_t idx, size_t len, bool streaming);
|
really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming);
|
||||||
|
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
utf8_checker checker{};
|
utf8_checker checker{};
|
||||||
|
@ -83,7 +83,7 @@ really_inline void json_structural_indexer::next(simd::simd8x64<uint8_t> in, jso
|
||||||
unescaped_chars_error |= block.non_quote_inside_string(unescaped);
|
unescaped_chars_error |= block.non_quote_inside_string(unescaped);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline error_code json_structural_indexer::finish(parser &parser, size_t idx, size_t len, bool streaming) {
|
really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming) {
|
||||||
// Write out the final iteration's structurals
|
// Write out the final iteration's structurals
|
||||||
indexer.write(uint32_t(idx-64), prev_structurals);
|
indexer.write(uint32_t(idx-64), prev_structurals);
|
||||||
|
|
||||||
|
@ -155,7 +155,7 @@ really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_b
|
||||||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
||||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
// you may want to call on a function like trimmed_length_safe_utf8.
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, parser &parser, bool streaming) noexcept {
|
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
|
||||||
if (unlikely(len > parser.capacity())) { return CAPACITY; }
|
if (unlikely(len > parser.capacity())) { return CAPACITY; }
|
||||||
|
|
||||||
buf_block_reader<STEP_SIZE> reader(buf, len);
|
buf_block_reader<STEP_SIZE> reader(buf, len);
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
namespace stage2 {
|
||||||
|
namespace allocate {
|
||||||
|
|
||||||
|
//
|
||||||
|
// Allocates stage 2 internal state and outputs in the parser
|
||||||
|
//
|
||||||
|
really_inline error_code set_max_depth(dom_parser_implementation &parser, size_t max_depth) {
|
||||||
|
parser.containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]);
|
||||||
|
parser.ret_address.reset(new (std::nothrow) ret_address_t[max_depth]);
|
||||||
|
|
||||||
|
if (!parser.ret_address || !parser.containing_scope) {
|
||||||
|
return MEMALLOC;
|
||||||
|
}
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace allocate
|
||||||
|
} // namespace stage2
|
|
@ -1,10 +1,10 @@
|
||||||
namespace stage2 {
|
namespace stage2 {
|
||||||
|
|
||||||
struct streaming_structural_parser: structural_parser {
|
struct streaming_structural_parser: structural_parser {
|
||||||
really_inline streaming_structural_parser(const uint8_t *buf, size_t len, parser &_doc_parser, uint32_t next_structural) : structural_parser(buf, len, _doc_parser, next_structural) {}
|
really_inline streaming_structural_parser(dom_parser_implementation &_parser, uint32_t next_structural) : structural_parser(_parser, next_structural) {}
|
||||||
|
|
||||||
// override to add streaming
|
// override to add streaming
|
||||||
WARN_UNUSED really_inline error_code start(UNUSED size_t len, ret_address finish_parser) {
|
WARN_UNUSED really_inline error_code start(ret_address_t finish_parser) {
|
||||||
log_start();
|
log_start();
|
||||||
init(); // sets is_valid to false
|
init(); // sets is_valid to false
|
||||||
// Capacity ain't no thang for streaming, so we don't check it.
|
// Capacity ain't no thang for streaming, so we don't check it.
|
||||||
|
@ -12,29 +12,29 @@ struct streaming_structural_parser: structural_parser {
|
||||||
advance_char();
|
advance_char();
|
||||||
// Push the root scope (there is always at least one scope)
|
// Push the root scope (there is always at least one scope)
|
||||||
if (start_document(finish_parser)) {
|
if (start_document(finish_parser)) {
|
||||||
return on_error(DEPTH_ERROR);
|
return parser.error = DEPTH_ERROR;
|
||||||
}
|
}
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// override to add streaming
|
// override to add streaming
|
||||||
WARN_UNUSED really_inline error_code finish() {
|
WARN_UNUSED really_inline error_code finish() {
|
||||||
if ( structurals.past_end(doc_parser.n_structural_indexes) ) {
|
if ( structurals.past_end(parser.n_structural_indexes) ) {
|
||||||
log_error("IMPOSSIBLE: past the end of the JSON!");
|
log_error("IMPOSSIBLE: past the end of the JSON!");
|
||||||
return on_error(TAPE_ERROR);
|
return parser.error = TAPE_ERROR;
|
||||||
}
|
}
|
||||||
end_document();
|
end_document();
|
||||||
if (depth != 0) {
|
if (depth != 0) {
|
||||||
log_error("Unclosed objects or arrays!");
|
log_error("Unclosed objects or arrays!");
|
||||||
return on_error(TAPE_ERROR);
|
return parser.error = TAPE_ERROR;
|
||||||
}
|
}
|
||||||
if (doc_parser.containing_scope[depth].tape_index != 0) {
|
if (parser.containing_scope[depth].tape_index != 0) {
|
||||||
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
|
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
|
||||||
return on_error(TAPE_ERROR);
|
return parser.error = TAPE_ERROR;
|
||||||
}
|
}
|
||||||
bool finished = structurals.at_end(doc_parser.n_structural_indexes);
|
bool finished = structurals.at_end(parser.n_structural_indexes);
|
||||||
if (!finished) { log_value("(and has more)"); }
|
if (!finished) { log_value("(and has more)"); }
|
||||||
return on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
|
return finished ? SUCCESS : SUCCESS_AND_HAS_MORE;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -44,10 +44,13 @@ struct streaming_structural_parser: structural_parser {
|
||||||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||||
* for documentation.
|
* for documentation.
|
||||||
***********/
|
***********/
|
||||||
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, parser &doc_parser, size_t &next_json) const noexcept {
|
WARN_UNUSED error_code dom_parser_implementation::stage2(const uint8_t *_buf, size_t _len, dom::document &_doc, size_t &next_json) noexcept {
|
||||||
|
this->buf = _buf;
|
||||||
|
this->len = _len;
|
||||||
|
this->doc = &_doc;
|
||||||
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||||
stage2::streaming_structural_parser parser(buf, len, doc_parser, uint32_t(next_json));
|
stage2::streaming_structural_parser parser(*this, uint32_t(next_json));
|
||||||
error_code result = parser.start(len, addresses.finish);
|
error_code result = parser.start(addresses.finish);
|
||||||
if (result) { return result; }
|
if (result) { return result; }
|
||||||
//
|
//
|
||||||
// Read first value
|
// Read first value
|
||||||
|
@ -123,7 +126,7 @@ object_continue:
|
||||||
}
|
}
|
||||||
|
|
||||||
scope_end:
|
scope_end:
|
||||||
CONTINUE( parser.doc_parser.ret_address[parser.depth] );
|
CONTINUE( parser.parser.ret_address[parser.depth] );
|
||||||
|
|
||||||
//
|
//
|
||||||
// Array parser parsers
|
// Array parser parsers
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
|
|
||||||
namespace stage2 {
|
namespace stage2 {
|
||||||
|
|
||||||
using internal::ret_address;
|
|
||||||
|
|
||||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||||
#define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
|
#define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
|
||||||
#define GOTO(address) { goto *(address); }
|
#define GOTO(address) { goto *(address); }
|
||||||
|
@ -36,76 +34,74 @@ using internal::ret_address;
|
||||||
#endif // SIMDJSON_USE_COMPUTED_GOTO
|
#endif // SIMDJSON_USE_COMPUTED_GOTO
|
||||||
|
|
||||||
struct unified_machine_addresses {
|
struct unified_machine_addresses {
|
||||||
ret_address array_begin;
|
ret_address_t array_begin;
|
||||||
ret_address array_continue;
|
ret_address_t array_continue;
|
||||||
ret_address error;
|
ret_address_t error;
|
||||||
ret_address finish;
|
ret_address_t finish;
|
||||||
ret_address object_begin;
|
ret_address_t object_begin;
|
||||||
ret_address object_continue;
|
ret_address_t object_continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef FAIL_IF
|
#undef FAIL_IF
|
||||||
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
|
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
|
||||||
|
|
||||||
struct number_writer {
|
struct number_writer {
|
||||||
parser &doc_parser;
|
dom_parser_implementation &parser;
|
||||||
|
|
||||||
really_inline void write_s64(int64_t value) noexcept {
|
really_inline void write_s64(int64_t value) noexcept {
|
||||||
append_tape(0, internal::tape_type::INT64);
|
append_tape(0, internal::tape_type::INT64);
|
||||||
std::memcpy(&doc_parser.doc.tape[doc_parser.current_loc], &value, sizeof(value));
|
std::memcpy(&parser.doc->tape[parser.current_loc], &value, sizeof(value));
|
||||||
++doc_parser.current_loc;
|
++parser.current_loc;
|
||||||
}
|
}
|
||||||
really_inline void write_u64(uint64_t value) noexcept {
|
really_inline void write_u64(uint64_t value) noexcept {
|
||||||
append_tape(0, internal::tape_type::UINT64);
|
append_tape(0, internal::tape_type::UINT64);
|
||||||
doc_parser.doc.tape[doc_parser.current_loc++] = value;
|
parser.doc->tape[parser.current_loc++] = value;
|
||||||
}
|
}
|
||||||
really_inline void write_double(double value) noexcept {
|
really_inline void write_double(double value) noexcept {
|
||||||
append_tape(0, internal::tape_type::DOUBLE);
|
append_tape(0, internal::tape_type::DOUBLE);
|
||||||
static_assert(sizeof(value) == sizeof(doc_parser.doc.tape[doc_parser.current_loc]), "mismatch size");
|
static_assert(sizeof(value) == sizeof(parser.doc->tape[parser.current_loc]), "mismatch size");
|
||||||
memcpy(&doc_parser.doc.tape[doc_parser.current_loc++], &value, sizeof(double));
|
memcpy(&parser.doc->tape[parser.current_loc++], &value, sizeof(double));
|
||||||
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
|
// doc->tape[doc->current_loc++] = *((uint64_t *)&d);
|
||||||
}
|
}
|
||||||
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||||
doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||||
}
|
}
|
||||||
}; // struct number_writer
|
}; // struct number_writer
|
||||||
|
|
||||||
struct structural_parser {
|
struct structural_parser {
|
||||||
structural_iterator structurals;
|
structural_iterator structurals;
|
||||||
parser &doc_parser;
|
dom_parser_implementation &parser;
|
||||||
/** Next write location in the string buf for stage 2 parsing */
|
/** Next write location in the string buf for stage 2 parsing */
|
||||||
uint8_t *current_string_buf_loc{};
|
uint8_t *current_string_buf_loc{};
|
||||||
uint32_t depth;
|
uint32_t depth;
|
||||||
|
|
||||||
really_inline structural_parser(
|
really_inline structural_parser(
|
||||||
const uint8_t *buf,
|
dom_parser_implementation &_parser,
|
||||||
size_t len,
|
|
||||||
parser &_doc_parser,
|
|
||||||
uint32_t next_structural = 0
|
uint32_t next_structural = 0
|
||||||
) : structurals(buf, len, _doc_parser.structural_indexes.get(), next_structural), doc_parser{_doc_parser}, depth{0} {}
|
) : structurals(_parser.buf, _parser.len, _parser.structural_indexes.get(), next_structural), parser{_parser}, depth{0} {}
|
||||||
|
|
||||||
WARN_UNUSED really_inline bool start_scope(ret_address continue_state) {
|
WARN_UNUSED really_inline bool start_scope(ret_address_t continue_state) {
|
||||||
doc_parser.containing_scope[depth].tape_index = doc_parser.current_loc;
|
parser.containing_scope[depth].tape_index = parser.current_loc;
|
||||||
doc_parser.containing_scope[depth].count = 0;
|
parser.containing_scope[depth].count = 0;
|
||||||
doc_parser.current_loc++; // We don't actually *write* the start element until the end.
|
parser.current_loc++; // We don't actually *write* the start element until the end.
|
||||||
doc_parser.ret_address[depth] = continue_state;
|
parser.ret_address[depth] = continue_state;
|
||||||
depth++;
|
depth++;
|
||||||
bool exceeded_max_depth = depth >= doc_parser.max_depth();
|
bool exceeded_max_depth = depth >= parser.max_depth();
|
||||||
if (exceeded_max_depth) { log_error("Exceeded max depth!"); }
|
if (exceeded_max_depth) { log_error("Exceeded max depth!"); }
|
||||||
return exceeded_max_depth;
|
return exceeded_max_depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline bool start_document(ret_address continue_state) {
|
WARN_UNUSED really_inline bool start_document(ret_address_t continue_state) {
|
||||||
log_start_value("document");
|
log_start_value("document");
|
||||||
return start_scope(continue_state);
|
return start_scope(continue_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline bool start_object(ret_address continue_state) {
|
WARN_UNUSED really_inline bool start_object(ret_address_t continue_state) {
|
||||||
log_start_value("object");
|
log_start_value("object");
|
||||||
return start_scope(continue_state);
|
return start_scope(continue_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline bool start_array(ret_address continue_state) {
|
WARN_UNUSED really_inline bool start_array(ret_address_t continue_state) {
|
||||||
log_start_value("array");
|
log_start_value("array");
|
||||||
return start_scope(continue_state);
|
return start_scope(continue_state);
|
||||||
}
|
}
|
||||||
|
@ -113,16 +109,16 @@ struct structural_parser {
|
||||||
// this function is responsible for annotating the start of the scope
|
// this function is responsible for annotating the start of the scope
|
||||||
really_inline void end_scope(internal::tape_type start, internal::tape_type end) noexcept {
|
really_inline void end_scope(internal::tape_type start, internal::tape_type end) noexcept {
|
||||||
depth--;
|
depth--;
|
||||||
// write our doc.tape location to the header scope
|
// write our doc->tape location to the header scope
|
||||||
// The root scope gets written *at* the previous location.
|
// The root scope gets written *at* the previous location.
|
||||||
append_tape(doc_parser.containing_scope[depth].tape_index, end);
|
append_tape(parser.containing_scope[depth].tape_index, end);
|
||||||
// count can overflow if it exceeds 24 bits... so we saturate
|
// count can overflow if it exceeds 24 bits... so we saturate
|
||||||
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
|
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
|
||||||
const uint32_t start_tape_index = doc_parser.containing_scope[depth].tape_index;
|
const uint32_t start_tape_index = parser.containing_scope[depth].tape_index;
|
||||||
const uint32_t count = doc_parser.containing_scope[depth].count;
|
const uint32_t count = parser.containing_scope[depth].count;
|
||||||
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
|
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
|
||||||
// This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
|
// This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
|
||||||
write_tape(start_tape_index, doc_parser.current_loc | (uint64_t(cntsat) << 32), start);
|
write_tape(start_tape_index, parser.current_loc | (uint64_t(cntsat) << 32), start);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void end_object() {
|
really_inline void end_object() {
|
||||||
|
@ -139,11 +135,11 @@ struct structural_parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||||
doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept {
|
really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept {
|
||||||
doc_parser.doc.tape[loc] = val | ((uint64_t(char(t))) << 56);
|
parser.doc->tape[loc] = val | ((uint64_t(char(t))) << 56);
|
||||||
}
|
}
|
||||||
|
|
||||||
// increment_count increments the count of keys in an object or values in an array.
|
// increment_count increments the count of keys in an object or values in an array.
|
||||||
|
@ -151,12 +147,12 @@ struct structural_parser {
|
||||||
// must be increment in the preceding depth (depth-1) where the array or
|
// must be increment in the preceding depth (depth-1) where the array or
|
||||||
// the object resides.
|
// the object resides.
|
||||||
really_inline void increment_count() {
|
really_inline void increment_count() {
|
||||||
doc_parser.containing_scope[depth - 1].count++; // we have a key value pair in the object at parser.depth - 1
|
parser.containing_scope[depth - 1].count++; // we have a key value pair in the object at parser.depth - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline uint8_t *on_start_string() noexcept {
|
really_inline uint8_t *on_start_string() noexcept {
|
||||||
// we advance the point, accounting for the fact that we have a NULL termination
|
// we advance the point, accounting for the fact that we have a NULL termination
|
||||||
append_tape(current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING);
|
append_tape(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING);
|
||||||
return current_string_buf_loc + sizeof(uint32_t);
|
return current_string_buf_loc + sizeof(uint32_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,7 +182,7 @@ struct structural_parser {
|
||||||
|
|
||||||
WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
|
WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
|
||||||
log_value("number");
|
log_value("number");
|
||||||
number_writer writer{doc_parser};
|
number_writer writer{parser};
|
||||||
bool succeeded = numberparsing::parse_number(src, found_minus, writer);
|
bool succeeded = numberparsing::parse_number(src, found_minus, writer);
|
||||||
if (!succeeded) { log_error("Invalid number"); }
|
if (!succeeded) { log_error("Invalid number"); }
|
||||||
return !succeeded;
|
return !succeeded;
|
||||||
|
@ -243,7 +239,7 @@ struct structural_parser {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline ret_address parse_value(const unified_machine_addresses &addresses, ret_address continue_state) {
|
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
|
||||||
switch (structurals.current_char()) {
|
switch (structurals.current_char()) {
|
||||||
case '"':
|
case '"':
|
||||||
FAIL_IF( parse_string() );
|
FAIL_IF( parse_string() );
|
||||||
|
@ -272,37 +268,27 @@ struct structural_parser {
|
||||||
|
|
||||||
WARN_UNUSED really_inline error_code finish() {
|
WARN_UNUSED really_inline error_code finish() {
|
||||||
// the string might not be NULL terminated.
|
// the string might not be NULL terminated.
|
||||||
if ( !structurals.at_end(doc_parser.n_structural_indexes) ) {
|
if ( !structurals.at_end(parser.n_structural_indexes) ) {
|
||||||
log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
|
log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
|
||||||
return on_error(TAPE_ERROR);
|
return parser.error = TAPE_ERROR;
|
||||||
}
|
}
|
||||||
end_document();
|
end_document();
|
||||||
if (depth != 0) {
|
if (depth != 0) {
|
||||||
log_error("Unclosed objects or arrays!");
|
log_error("Unclosed objects or arrays!");
|
||||||
return on_error(TAPE_ERROR);
|
return parser.error = TAPE_ERROR;
|
||||||
}
|
}
|
||||||
if (doc_parser.containing_scope[depth].tape_index != 0) {
|
if (parser.containing_scope[depth].tape_index != 0) {
|
||||||
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
|
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
|
||||||
return on_error(TAPE_ERROR);
|
return parser.error = TAPE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
return on_success(SUCCESS);
|
return SUCCESS;
|
||||||
}
|
|
||||||
|
|
||||||
really_inline error_code on_error(error_code new_error_code) noexcept {
|
|
||||||
doc_parser.error = new_error_code;
|
|
||||||
return new_error_code;
|
|
||||||
}
|
|
||||||
really_inline error_code on_success(error_code success_code) noexcept {
|
|
||||||
doc_parser.error = success_code;
|
|
||||||
doc_parser.valid = true;
|
|
||||||
return success_code;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline error_code error() {
|
WARN_UNUSED really_inline error_code error() {
|
||||||
/* We do not need the next line because this is done by doc_parser.init_stage2(),
|
/* We do not need the next line because this is done by parser.init_stage2(),
|
||||||
* pessimistically.
|
* pessimistically.
|
||||||
* doc_parser.is_valid = false;
|
* parser.is_valid = false;
|
||||||
* At this point in the code, we have all the time in the world.
|
* At this point in the code, we have all the time in the world.
|
||||||
* Note that we know exactly where we are in the document so we could,
|
* Note that we know exactly where we are in the document so we could,
|
||||||
* without any overhead on the processing code, report a specific
|
* without any overhead on the processing code, report a specific
|
||||||
|
@ -310,12 +296,12 @@ struct structural_parser {
|
||||||
* We could even trigger special code paths to assess what happened
|
* We could even trigger special code paths to assess what happened
|
||||||
* carefully,
|
* carefully,
|
||||||
* all without any added cost. */
|
* all without any added cost. */
|
||||||
if (depth >= doc_parser.max_depth()) {
|
if (depth >= parser.max_depth()) {
|
||||||
return on_error(DEPTH_ERROR);
|
return parser.error = DEPTH_ERROR;
|
||||||
}
|
}
|
||||||
switch (structurals.current_char()) {
|
switch (structurals.current_char()) {
|
||||||
case '"':
|
case '"':
|
||||||
return on_error(STRING_ERROR);
|
return parser.error = STRING_ERROR;
|
||||||
case '0':
|
case '0':
|
||||||
case '1':
|
case '1':
|
||||||
case '2':
|
case '2':
|
||||||
|
@ -327,36 +313,35 @@ struct structural_parser {
|
||||||
case '8':
|
case '8':
|
||||||
case '9':
|
case '9':
|
||||||
case '-':
|
case '-':
|
||||||
return on_error(NUMBER_ERROR);
|
return parser.error = NUMBER_ERROR;
|
||||||
case 't':
|
case 't':
|
||||||
return on_error(T_ATOM_ERROR);
|
return parser.error = T_ATOM_ERROR;
|
||||||
case 'n':
|
case 'n':
|
||||||
return on_error(N_ATOM_ERROR);
|
return parser.error = N_ATOM_ERROR;
|
||||||
case 'f':
|
case 'f':
|
||||||
return on_error(F_ATOM_ERROR);
|
return parser.error = F_ATOM_ERROR;
|
||||||
default:
|
default:
|
||||||
return on_error(TAPE_ERROR);
|
return parser.error = TAPE_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void init() {
|
really_inline void init() {
|
||||||
current_string_buf_loc = doc_parser.doc.string_buf.get();
|
current_string_buf_loc = parser.doc->string_buf.get();
|
||||||
doc_parser.current_loc = 0;
|
parser.current_loc = 0;
|
||||||
doc_parser.valid = false;
|
parser.error = UNINITIALIZED;
|
||||||
doc_parser.error = UNINITIALIZED;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline error_code start(size_t len, ret_address finish_state) {
|
WARN_UNUSED really_inline error_code start(size_t len, ret_address_t finish_state) {
|
||||||
log_start();
|
log_start();
|
||||||
init(); // sets is_valid to false
|
init(); // sets is_valid to false
|
||||||
if (len > doc_parser.capacity()) {
|
if (len > parser.capacity()) {
|
||||||
return CAPACITY;
|
return parser.error = CAPACITY;
|
||||||
}
|
}
|
||||||
// Advance to the first character as soon as possible
|
// Advance to the first character as soon as possible
|
||||||
structurals.advance_char();
|
structurals.advance_char();
|
||||||
// Push the root scope (there is always at least one scope)
|
// Push the root scope (there is always at least one scope)
|
||||||
if (start_document(finish_state)) {
|
if (start_document(finish_state)) {
|
||||||
return on_error(DEPTH_ERROR);
|
return parser.error = DEPTH_ERROR;
|
||||||
}
|
}
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -398,9 +383,10 @@ struct structural_parser {
|
||||||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||||
* for documentation.
|
* for documentation.
|
||||||
***********/
|
***********/
|
||||||
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, parser &doc_parser) const noexcept {
|
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
|
||||||
|
this->doc = &_doc;
|
||||||
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||||
stage2::structural_parser parser(buf, len, doc_parser);
|
stage2::structural_parser parser(*this);
|
||||||
error_code result = parser.start(len, addresses.finish);
|
error_code result = parser.start(len, addresses.finish);
|
||||||
if (result) { return result; }
|
if (result) { return result; }
|
||||||
|
|
||||||
|
@ -479,7 +465,7 @@ object_continue:
|
||||||
}
|
}
|
||||||
|
|
||||||
scope_end:
|
scope_end:
|
||||||
CONTINUE( parser.doc_parser.ret_address[parser.depth] );
|
CONTINUE( parser.parser.ret_address[parser.depth] );
|
||||||
|
|
||||||
//
|
//
|
||||||
// Array parser states
|
// Array parser states
|
||||||
|
@ -516,11 +502,3 @@ finish:
|
||||||
error:
|
error:
|
||||||
return parser.error();
|
return parser.error();
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED error_code implementation::parse(const uint8_t *buf, size_t len, parser &doc_parser) const noexcept {
|
|
||||||
error_code code = stage1(buf, len, doc_parser, false);
|
|
||||||
if (!code) {
|
|
||||||
code = stage2(buf, len, doc_parser);
|
|
||||||
}
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
#include "haswell/implementation.h"
|
||||||
|
#include "haswell/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 1
|
||||||
|
//
|
||||||
#include "haswell/bitmask.h"
|
#include "haswell/bitmask.h"
|
||||||
#include "haswell/simd.h"
|
#include "haswell/simd.h"
|
||||||
#include "haswell/bitmanipulation.h"
|
#include "haswell/bitmanipulation.h"
|
||||||
#include "haswell/implementation.h"
|
|
||||||
|
|
||||||
TARGET_HASWELL
|
TARGET_HASWELL
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
@ -68,11 +72,38 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
|
||||||
|
|
||||||
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
||||||
#include "generic/stage1/json_structural_indexer.h"
|
#include "generic/stage1/json_structural_indexer.h"
|
||||||
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
|
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||||
return haswell::stage1::json_structural_indexer::index<128>(buf, len, parser, streaming);
|
this->buf = _buf;
|
||||||
|
this->len = _len;
|
||||||
|
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace haswell
|
||||||
|
} // namespace simdjson
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 2
|
||||||
|
//
|
||||||
|
#include "haswell/stringparsing.h"
|
||||||
|
#include "haswell/numberparsing.h"
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
namespace simdjson {
|
||||||
|
namespace haswell {
|
||||||
|
|
||||||
|
#include "generic/stage2/logger.h"
|
||||||
|
#include "generic/stage2/atomparsing.h"
|
||||||
|
#include "generic/stage2/structural_iterator.h"
|
||||||
|
#include "generic/stage2/structural_parser.h"
|
||||||
|
#include "generic/stage2/streaming_structural_parser.h"
|
||||||
|
|
||||||
|
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||||
|
error_code err = stage1(_buf, _len, false);
|
||||||
|
if (err) { return err; }
|
||||||
|
return stage2(_doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace haswell
|
} // namespace haswell
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
UNTARGET_REGION
|
UNTARGET_REGION
|
|
@ -0,0 +1,15 @@
|
||||||
|
#ifndef SIMDJSON_HASWELL_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
#define SIMDJSON_HASWELL_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "isadetection.h"
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace haswell {
|
||||||
|
|
||||||
|
#include "generic/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
} // namespace haswell
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
#endif // SIMDJSON_HASWELL_DOM_PARSER_IMPLEMENTATION_H
|
|
@ -0,0 +1,25 @@
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "haswell/implementation.h"
|
||||||
|
#include "haswell/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace haswell {
|
||||||
|
|
||||||
|
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_depth,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept {
|
||||||
|
dst.reset( new (std::nothrow) dom_parser_implementation() );
|
||||||
|
if (!dst) { return MEMALLOC; }
|
||||||
|
dst->set_capacity(capacity);
|
||||||
|
dst->set_max_depth(max_depth);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace haswell
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
UNTARGET_REGION
|
|
@ -7,8 +7,6 @@
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
namespace haswell {
|
namespace haswell {
|
||||||
|
|
||||||
using namespace simdjson::dom;
|
|
||||||
|
|
||||||
class implementation final : public simdjson::implementation {
|
class implementation final : public simdjson::implementation {
|
||||||
public:
|
public:
|
||||||
really_inline implementation() : simdjson::implementation(
|
really_inline implementation() : simdjson::implementation(
|
||||||
|
@ -16,11 +14,12 @@ public:
|
||||||
"Intel/AMD AVX2",
|
"Intel/AMD AVX2",
|
||||||
instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2
|
instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2
|
||||||
) {}
|
) {}
|
||||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
WARN_UNUSED error_code create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_length,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept final;
|
||||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace haswell
|
} // namespace haswell
|
||||||
|
|
|
@ -1,18 +0,0 @@
|
||||||
#include "simdjson.h"
|
|
||||||
#include "haswell/implementation.h"
|
|
||||||
#include "haswell/stringparsing.h"
|
|
||||||
#include "haswell/numberparsing.h"
|
|
||||||
|
|
||||||
TARGET_HASWELL
|
|
||||||
namespace simdjson {
|
|
||||||
namespace haswell {
|
|
||||||
|
|
||||||
#include "generic/stage2/logger.h"
|
|
||||||
#include "generic/stage2/atomparsing.h"
|
|
||||||
#include "generic/stage2/structural_iterator.h"
|
|
||||||
#include "generic/stage2/structural_parser.h"
|
|
||||||
#include "generic/stage2/streaming_structural_parser.h"
|
|
||||||
|
|
||||||
} // namespace haswell
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
|
@ -38,21 +38,16 @@ public:
|
||||||
const std::string &name() const noexcept final { return set_best()->name(); }
|
const std::string &name() const noexcept final { return set_best()->name(); }
|
||||||
const std::string &description() const noexcept final { return set_best()->description(); }
|
const std::string &description() const noexcept final { return set_best()->description(); }
|
||||||
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
|
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
|
||||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept final {
|
WARN_UNUSED error_code create_dom_parser_implementation(
|
||||||
return set_best()->parse(buf, len, parser);
|
size_t capacity,
|
||||||
|
size_t max_length,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept final {
|
||||||
|
return set_best()->create_dom_parser_implementation(capacity, max_length, dst);
|
||||||
}
|
}
|
||||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
|
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
|
||||||
return set_best()->minify(buf, len, dst, dst_len);
|
return set_best()->minify(buf, len, dst, dst_len);
|
||||||
}
|
}
|
||||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) const noexcept final {
|
|
||||||
return set_best()->stage1(buf, len, parser, streaming);
|
|
||||||
}
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept final {
|
|
||||||
return set_best()->stage2(buf, len, parser);
|
|
||||||
}
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) const noexcept final {
|
|
||||||
return set_best()->stage2(buf, len, parser, next_json);
|
|
||||||
}
|
|
||||||
|
|
||||||
really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
|
really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
|
||||||
private:
|
private:
|
||||||
|
@ -81,21 +76,16 @@ const std::initializer_list<const implementation *> available_implementation_poi
|
||||||
// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
|
// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
|
||||||
class unsupported_implementation final : public implementation {
|
class unsupported_implementation final : public implementation {
|
||||||
public:
|
public:
|
||||||
WARN_UNUSED error_code parse(const uint8_t *, size_t, dom::parser &) const noexcept final {
|
WARN_UNUSED error_code create_dom_parser_implementation(
|
||||||
|
size_t,
|
||||||
|
size_t,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>&
|
||||||
|
) const noexcept final {
|
||||||
return UNSUPPORTED_ARCHITECTURE;
|
return UNSUPPORTED_ARCHITECTURE;
|
||||||
}
|
}
|
||||||
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final {
|
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final {
|
||||||
return UNSUPPORTED_ARCHITECTURE;
|
return UNSUPPORTED_ARCHITECTURE;
|
||||||
}
|
}
|
||||||
WARN_UNUSED error_code stage1(const uint8_t *, size_t, dom::parser &, bool) const noexcept final {
|
|
||||||
return UNSUPPORTED_ARCHITECTURE;
|
|
||||||
}
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &) const noexcept final {
|
|
||||||
return UNSUPPORTED_ARCHITECTURE;
|
|
||||||
}
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &, size_t &) const noexcept final {
|
|
||||||
return UNSUPPORTED_ARCHITECTURE;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
|
unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
|
||||||
};
|
};
|
||||||
|
|
|
@ -13,20 +13,20 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
|
||||||
#include "simdprune_tables.h"
|
#include "simdprune_tables.h"
|
||||||
|
|
||||||
#if SIMDJSON_IMPLEMENTATION_ARM64
|
#if SIMDJSON_IMPLEMENTATION_ARM64
|
||||||
#include "arm64/stage1.cpp"
|
#include "arm64/implementation.cpp"
|
||||||
#include "arm64/stage2.cpp"
|
#include "arm64/dom_parser_implementation.cpp"
|
||||||
#endif
|
#endif
|
||||||
#if SIMDJSON_IMPLEMENTATION_FALLBACK
|
#if SIMDJSON_IMPLEMENTATION_FALLBACK
|
||||||
#include "fallback/stage1.cpp"
|
#include "fallback/implementation.cpp"
|
||||||
#include "fallback/stage2.cpp"
|
#include "fallback/dom_parser_implementation.cpp"
|
||||||
#endif
|
#endif
|
||||||
#if SIMDJSON_IMPLEMENTATION_HASWELL
|
#if SIMDJSON_IMPLEMENTATION_HASWELL
|
||||||
#include "haswell/stage1.cpp"
|
#include "haswell/implementation.cpp"
|
||||||
#include "haswell/stage2.cpp"
|
#include "haswell/dom_parser_implementation.cpp"
|
||||||
#endif
|
#endif
|
||||||
#if SIMDJSON_IMPLEMENTATION_WESTMERE
|
#if SIMDJSON_IMPLEMENTATION_WESTMERE
|
||||||
#include "westmere/stage1.cpp"
|
#include "westmere/implementation.cpp"
|
||||||
#include "westmere/stage2.cpp"
|
#include "westmere/dom_parser_implementation.cpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SIMDJSON_POP_DISABLE_WARNINGS
|
SIMDJSON_POP_DISABLE_WARNINGS
|
||||||
|
|
|
@ -1,4 +1,10 @@
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
#include "westmere/implementation.h"
|
||||||
|
#include "westmere/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 1
|
||||||
|
//
|
||||||
#include "westmere/bitmask.h"
|
#include "westmere/bitmask.h"
|
||||||
#include "westmere/simd.h"
|
#include "westmere/simd.h"
|
||||||
#include "westmere/bitmanipulation.h"
|
#include "westmere/bitmanipulation.h"
|
||||||
|
@ -67,11 +73,38 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
|
||||||
|
|
||||||
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
||||||
#include "generic/stage1/json_structural_indexer.h"
|
#include "generic/stage1/json_structural_indexer.h"
|
||||||
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
|
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||||
return westmere::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
|
this->buf = _buf;
|
||||||
|
this->len = _len;
|
||||||
|
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace westmere
|
||||||
|
} // namespace simdjson
|
||||||
|
UNTARGET_REGION
|
||||||
|
|
||||||
|
//
|
||||||
|
// Stage 2
|
||||||
|
//
|
||||||
|
#include "westmere/stringparsing.h"
|
||||||
|
#include "westmere/numberparsing.h"
|
||||||
|
|
||||||
|
TARGET_WESTMERE
|
||||||
|
namespace simdjson {
|
||||||
|
namespace westmere {
|
||||||
|
|
||||||
|
#include "generic/stage2/logger.h"
|
||||||
|
#include "generic/stage2/atomparsing.h"
|
||||||
|
#include "generic/stage2/structural_iterator.h"
|
||||||
|
#include "generic/stage2/structural_parser.h"
|
||||||
|
#include "generic/stage2/streaming_structural_parser.h"
|
||||||
|
|
||||||
|
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||||
|
error_code err = stage1(_buf, _len, false);
|
||||||
|
if (err) { return err; }
|
||||||
|
return stage2(_doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace westmere
|
} // namespace westmere
|
||||||
|
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
UNTARGET_REGION
|
UNTARGET_REGION
|
|
@ -0,0 +1,15 @@
|
||||||
|
#ifndef SIMDJSON_WESTMERE_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
#define SIMDJSON_WESTMERE_DOM_PARSER_IMPLEMENTATION_H
|
||||||
|
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "isadetection.h"
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace westmere {
|
||||||
|
|
||||||
|
#include "generic/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
} // namespace westmere
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
#endif // SIMDJSON_WESTMERE_DOM_PARSER_IMPLEMENTATION_H
|
|
@ -0,0 +1,25 @@
|
||||||
|
#include "simdjson.h"
|
||||||
|
#include "westmere/implementation.h"
|
||||||
|
#include "westmere/dom_parser_implementation.h"
|
||||||
|
|
||||||
|
TARGET_HASWELL
|
||||||
|
|
||||||
|
namespace simdjson {
|
||||||
|
namespace westmere {
|
||||||
|
|
||||||
|
WARN_UNUSED error_code implementation::create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_depth,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept {
|
||||||
|
dst.reset( new (std::nothrow) dom_parser_implementation() );
|
||||||
|
if (!dst) { return MEMALLOC; }
|
||||||
|
dst->set_capacity(capacity);
|
||||||
|
dst->set_max_depth(max_depth);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace westmere
|
||||||
|
} // namespace simdjson
|
||||||
|
|
||||||
|
UNTARGET_REGION
|
|
@ -13,11 +13,12 @@ using namespace simdjson::dom;
|
||||||
class implementation final : public simdjson::implementation {
|
class implementation final : public simdjson::implementation {
|
||||||
public:
|
public:
|
||||||
really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", instruction_set::SSE42 | instruction_set::PCLMULQDQ) {}
|
really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", instruction_set::SSE42 | instruction_set::PCLMULQDQ) {}
|
||||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
WARN_UNUSED error_code create_dom_parser_implementation(
|
||||||
|
size_t capacity,
|
||||||
|
size_t max_length,
|
||||||
|
std::unique_ptr<internal::dom_parser_implementation>& dst
|
||||||
|
) const noexcept final;
|
||||||
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
|
||||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser) const noexcept final;
|
|
||||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, parser &parser, size_t &next_json) const noexcept final;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace westmere
|
} // namespace westmere
|
||||||
|
|
|
@ -1,18 +0,0 @@
|
||||||
#include "simdjson.h"
|
|
||||||
#include "westmere/implementation.h"
|
|
||||||
#include "westmere/stringparsing.h"
|
|
||||||
#include "westmere/numberparsing.h"
|
|
||||||
|
|
||||||
TARGET_WESTMERE
|
|
||||||
namespace simdjson {
|
|
||||||
namespace westmere {
|
|
||||||
|
|
||||||
#include "generic/stage2/logger.h"
|
|
||||||
#include "generic/stage2/atomparsing.h"
|
|
||||||
#include "generic/stage2/structural_iterator.h"
|
|
||||||
#include "generic/stage2/structural_parser.h"
|
|
||||||
#include "generic/stage2/streaming_structural_parser.h"
|
|
||||||
|
|
||||||
} // namespace westmere
|
|
||||||
} // namespace simdjson
|
|
||||||
UNTARGET_REGION
|
|
|
@ -176,7 +176,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
|
||||||
s.non_ascii_byte_count = count_nonasciibytes(
|
s.non_ascii_byte_count = count_nonasciibytes(
|
||||||
reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
||||||
s.byte_count = p.size();
|
s.byte_count = p.size();
|
||||||
s.structural_indexes_count = parser.n_structural_indexes;
|
s.structural_indexes_count = parser.implementation->n_structural_indexes;
|
||||||
|
|
||||||
// simdjson::document::iterator iter(doc);
|
// simdjson::document::iterator iter(doc);
|
||||||
recurse(doc, s, 0);
|
recurse(doc, s, 0);
|
||||||
|
|
Loading…
Reference in New Issue