Store all parser state in the implementation
This commit is contained in:
parent
86f8a4a9d2
commit
1aab4752e2
|
@ -84,7 +84,7 @@ struct json_stats {
|
|||
bytes = json.size();
|
||||
blocks = bytes / BYTES_PER_BLOCK;
|
||||
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
|
||||
structurals = parser.n_structural_indexes-1;
|
||||
structurals = parser.implementation->n_structural_indexes-1;
|
||||
|
||||
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
|
||||
bool last_block_has_utf8 = false;
|
||||
|
@ -141,7 +141,7 @@ struct json_stats {
|
|||
for (size_t block=0; block<blocks; block++) {
|
||||
// Count structurals in the block
|
||||
int block_structurals=0;
|
||||
while (structural < parser.n_structural_indexes && parser.structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
|
||||
while (structural < parser.implementation->n_structural_indexes && parser.implementation->structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
|
||||
block_structurals++;
|
||||
structural++;
|
||||
}
|
||||
|
@ -320,7 +320,7 @@ struct benchmarker {
|
|||
|
||||
// Stage 1 (find structurals)
|
||||
collector.start();
|
||||
error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
|
||||
error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), false);
|
||||
event_count stage1_count = collector.end();
|
||||
stage1 << stage1_count;
|
||||
if (error) {
|
||||
|
@ -334,7 +334,7 @@ struct benchmarker {
|
|||
} else {
|
||||
event_count stage2_count;
|
||||
collector.start();
|
||||
error = parser.implementation->stage2(parser);
|
||||
error = parser.implementation->stage2(parser.doc);
|
||||
if (error) {
|
||||
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error));
|
||||
}
|
||||
|
@ -345,7 +345,7 @@ struct benchmarker {
|
|||
// Calculate stats the first time we parse
|
||||
if (stats == NULL) {
|
||||
if (stage1_only) { // we need stage 2 once
|
||||
error = parser.implementation->stage2(parser);
|
||||
error = parser.implementation->stage2(parser.doc);
|
||||
if (error) {
|
||||
printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
|
|||
answer.non_ascii_byte_count = count_nonasciibytes(
|
||||
reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
||||
answer.byte_count = p.size();
|
||||
answer.structural_indexes_count = parser.n_structural_indexes;
|
||||
answer.structural_indexes_count = parser.implementation->n_structural_indexes;
|
||||
simdjson_recurse(answer, doc);
|
||||
return answer;
|
||||
}
|
||||
|
@ -163,7 +163,6 @@ int main(int argc, char *argv[]) {
|
|||
s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
|
||||
#ifdef __linux__
|
||||
simdjson::dom::parser parser;
|
||||
const simdjson::implementation &stage_parser = *simdjson::active_implementation;
|
||||
simdjson::error_code alloc_error = parser.allocate(p.size());
|
||||
if (alloc_error) {
|
||||
std::cerr << alloc_error << std::endl;
|
||||
|
@ -181,14 +180,14 @@ int main(int argc, char *argv[]) {
|
|||
for (uint32_t i = 0; i < iterations; i++) {
|
||||
unified.start();
|
||||
// The default template is simdjson::architecture::NATIVE.
|
||||
bool isok = (stage_parser.stage1((const uint8_t *)p.data(), p.size(), parser, false) == simdjson::SUCCESS);
|
||||
bool isok = (parser.implementation->stage1((const uint8_t *)p.data(), p.size(), false) == simdjson::SUCCESS);
|
||||
unified.end(results);
|
||||
|
||||
cy1 += results[0];
|
||||
cl1 += results[1];
|
||||
|
||||
unified.start();
|
||||
isok = isok && (stage_parser.stage2((const uint8_t *)p.data(), p.size(), parser) == simdjson::SUCCESS);
|
||||
isok = isok && (parser.implementation->stage2(parser.doc) == simdjson::SUCCESS);
|
||||
unified.end(results);
|
||||
|
||||
cy2 += results[0];
|
||||
|
|
|
@ -15,22 +15,6 @@
|
|||
|
||||
namespace simdjson {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// expectation: sizeof(scope_descriptor) = 64/8.
|
||||
struct scope_descriptor {
|
||||
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
|
||||
uint32_t count; // how many elements in the scope
|
||||
}; // struct scope_descriptor
|
||||
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
typedef void* ret_address;
|
||||
#else
|
||||
typedef char ret_address;
|
||||
#endif
|
||||
|
||||
} // namespace internal
|
||||
|
||||
namespace dom {
|
||||
|
||||
class document_stream;
|
||||
|
@ -68,14 +52,14 @@ public:
|
|||
*
|
||||
* @param other The parser to take. Its capacity is zeroed.
|
||||
*/
|
||||
parser(parser &&other) = default;
|
||||
really_inline parser(parser &&other) noexcept;
|
||||
parser(const parser &) = delete; ///< @private Disallow copying
|
||||
/**
|
||||
* Take another parser's buffers and state.
|
||||
*
|
||||
* @param other The parser to take. Its capacity is zeroed.
|
||||
*/
|
||||
parser &operator=(parser &&other) = default;
|
||||
really_inline parser &operator=(parser &&other) noexcept;
|
||||
parser &operator=(const parser &) = delete; ///< @private Disallow copying
|
||||
|
||||
/** Deallocate the JSON parser. */
|
||||
|
@ -352,21 +336,6 @@ public:
|
|||
/** @private [for benchmarking access] The implementation to use */
|
||||
std::unique_ptr<internal::dom_parser_implementation> implementation{};
|
||||
|
||||
public:
|
||||
/** @private Next location to write to in the tape */
|
||||
uint32_t current_loc{0};
|
||||
|
||||
/** @private Number of structural indices passed from stage 1 to stage 2 */
|
||||
uint32_t n_structural_indexes{0};
|
||||
/** @private Structural indices passed from stage 1 to stage 2 */
|
||||
std::unique_ptr<uint32_t[]> structural_indexes{};
|
||||
|
||||
/** @private Tape location of each open { or [ */
|
||||
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
|
||||
|
||||
/** @private Return address of each open { or [ */
|
||||
std::unique_ptr<internal::ret_address[]> ret_address{};
|
||||
|
||||
/** @private Use `if (parser.parse(...).error())` instead */
|
||||
bool valid{false};
|
||||
/** @private Use `parser.parse(...).error()` instead */
|
||||
|
@ -405,20 +374,6 @@ private:
|
|||
*/
|
||||
size_t _max_capacity;
|
||||
|
||||
/**
|
||||
* The maximum document length this parser supports.
|
||||
*
|
||||
* Buffers are large enough to handle any document up to this length.
|
||||
*/
|
||||
size_t _capacity{0};
|
||||
|
||||
/**
|
||||
* The maximum depth (number of nested objects and arrays) supported by this parser.
|
||||
*
|
||||
* Defaults to DEFAULT_MAX_DEPTH.
|
||||
*/
|
||||
size_t _max_depth{0};
|
||||
|
||||
/**
|
||||
* The loaded buffer (reused each time load() is called)
|
||||
*/
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
namespace simdjson {
|
||||
|
||||
namespace dom {
|
||||
class parser;
|
||||
class document;
|
||||
} // namespace dom
|
||||
|
||||
/**
|
||||
|
|
|
@ -34,18 +34,18 @@ namespace internal {
|
|||
* */
|
||||
inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) {
|
||||
// this function can be generally useful
|
||||
if (parser.n_structural_indexes == 0)
|
||||
if (parser.implementation->n_structural_indexes == 0)
|
||||
return 0;
|
||||
auto last_i = parser.n_structural_indexes - 1;
|
||||
if (parser.structural_indexes[last_i] == size) {
|
||||
auto last_i = parser.implementation->n_structural_indexes - 1;
|
||||
if (parser.implementation->structural_indexes[last_i] == size) {
|
||||
if (last_i == 0)
|
||||
return 0;
|
||||
last_i = parser.n_structural_indexes - 2;
|
||||
last_i = parser.implementation->n_structural_indexes - 2;
|
||||
}
|
||||
auto arr_cnt = 0;
|
||||
auto obj_cnt = 0;
|
||||
for (auto i = last_i; i > 0; i--) {
|
||||
auto idxb = parser.structural_indexes[i];
|
||||
auto idxb = parser.implementation->structural_indexes[i];
|
||||
switch (buf[idxb]) {
|
||||
case ':':
|
||||
case ',':
|
||||
|
@ -63,7 +63,7 @@ inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const do
|
|||
arr_cnt++;
|
||||
break;
|
||||
}
|
||||
auto idxa = parser.structural_indexes[i - 1];
|
||||
auto idxa = parser.implementation->structural_indexes[i - 1];
|
||||
switch (buf[idxa]) {
|
||||
case '{':
|
||||
case '[':
|
||||
|
@ -172,17 +172,17 @@ inline error_code document_stream::json_parse() noexcept {
|
|||
if (_batch_size == 0) {
|
||||
return simdjson::UTF8_ERROR;
|
||||
}
|
||||
auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, parser, true));
|
||||
auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, true));
|
||||
if (stage1_is_ok != simdjson::SUCCESS) {
|
||||
return stage1_is_ok;
|
||||
}
|
||||
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
||||
if (last_index == 0) {
|
||||
if (parser.n_structural_indexes == 0) {
|
||||
if (parser.implementation->n_structural_indexes == 0) {
|
||||
return simdjson::EMPTY;
|
||||
}
|
||||
} else {
|
||||
parser.n_structural_indexes = last_index + 1;
|
||||
parser.implementation->n_structural_indexes = last_index + 1;
|
||||
}
|
||||
}
|
||||
// the second thread is running or done.
|
||||
|
@ -191,15 +191,15 @@ inline error_code document_stream::json_parse() noexcept {
|
|||
if (stage1_is_ok_thread != simdjson::SUCCESS) {
|
||||
return stage1_is_ok_thread;
|
||||
}
|
||||
std::swap(parser.structural_indexes, parser_thread.structural_indexes);
|
||||
parser.n_structural_indexes = parser_thread.n_structural_indexes;
|
||||
std::swap(parser.implementation->structural_indexes, parser_thread.implementation->structural_indexes);
|
||||
parser.implementation->n_structural_indexes = parser_thread.implementation->n_structural_indexes;
|
||||
advance(last_json_buffer_loc);
|
||||
n_bytes_parsed += last_json_buffer_loc;
|
||||
}
|
||||
// let us decide whether we will start a new thread
|
||||
if (remaining() - _batch_size > 0) {
|
||||
last_json_buffer_loc =
|
||||
parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
|
||||
parser.implementation->structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
|
||||
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
|
||||
if (_batch_size > 0) {
|
||||
_batch_size = internal::trimmed_length_safe_utf8(
|
||||
|
@ -214,22 +214,22 @@ inline error_code document_stream::json_parse() noexcept {
|
|||
// this->stage1_is_ok_thread
|
||||
// there is only one thread that may write to this value
|
||||
stage_1_thread = std::thread([this, b, bs] {
|
||||
this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, this->parser_thread, true));
|
||||
this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, true));
|
||||
});
|
||||
}
|
||||
}
|
||||
next_json = 0;
|
||||
load_next_batch = false;
|
||||
} // load_next_batch
|
||||
error_code res = parser.implementation->stage2(buf(), remaining(), parser, next_json);
|
||||
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
|
||||
if (res == simdjson::SUCCESS_AND_HAS_MORE) {
|
||||
n_parsed_docs++;
|
||||
current_buffer_loc = parser.structural_indexes[next_json];
|
||||
current_buffer_loc = parser.implementation->structural_indexes[next_json];
|
||||
load_next_batch = (current_buffer_loc == last_json_buffer_loc);
|
||||
} else if (res == simdjson::SUCCESS) {
|
||||
n_parsed_docs++;
|
||||
if (remaining() > _batch_size) {
|
||||
current_buffer_loc = parser.structural_indexes[next_json - 1];
|
||||
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
|
||||
load_next_batch = true;
|
||||
res = simdjson::SUCCESS_AND_HAS_MORE;
|
||||
}
|
||||
|
@ -249,28 +249,28 @@ inline error_code document_stream::json_parse() noexcept {
|
|||
n_bytes_parsed += current_buffer_loc;
|
||||
_batch_size = (std::min)(_batch_size, remaining());
|
||||
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
|
||||
auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, parser, true);
|
||||
auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, true);
|
||||
if (stage1_is_ok != simdjson::SUCCESS) {
|
||||
return stage1_is_ok;
|
||||
}
|
||||
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
||||
if (last_index == 0) {
|
||||
if (parser.n_structural_indexes == 0) {
|
||||
if (parser.implementation->n_structural_indexes == 0) {
|
||||
return EMPTY;
|
||||
}
|
||||
} else {
|
||||
parser.n_structural_indexes = last_index + 1;
|
||||
parser.implementation->n_structural_indexes = last_index + 1;
|
||||
}
|
||||
load_next_batch = false;
|
||||
} // load_next_batch
|
||||
error_code res = parser.implementation->stage2(buf(), remaining(), parser, next_json);
|
||||
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
|
||||
if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
|
||||
n_parsed_docs++;
|
||||
current_buffer_loc = parser.structural_indexes[next_json];
|
||||
current_buffer_loc = parser.implementation->structural_indexes[next_json];
|
||||
} else if (res == simdjson::SUCCESS) {
|
||||
n_parsed_docs++;
|
||||
if (remaining() > _batch_size) {
|
||||
current_buffer_loc = parser.structural_indexes[next_json - 1];
|
||||
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
|
||||
next_json = 1;
|
||||
load_next_batch = true;
|
||||
res = simdjson::SUCCESS_AND_HAS_MORE;
|
||||
|
|
|
@ -17,8 +17,11 @@ namespace dom {
|
|||
//
|
||||
really_inline parser::parser(size_t max_capacity) noexcept
|
||||
: _max_capacity{max_capacity},
|
||||
loaded_bytes(nullptr, &aligned_free_char)
|
||||
{}
|
||||
loaded_bytes(nullptr, &aligned_free_char) {
|
||||
}
|
||||
really_inline parser::parser(parser &&other) noexcept = default;
|
||||
really_inline parser &parser::operator=(parser &&other) noexcept = default;
|
||||
|
||||
inline bool parser::is_valid() const noexcept { return valid; }
|
||||
inline int parser::get_error_code() const noexcept { return error; }
|
||||
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
|
||||
|
@ -101,15 +104,12 @@ inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bo
|
|||
memcpy((void *)buf, tmp_buf, len);
|
||||
}
|
||||
|
||||
code = implementation->parse(buf, len, *this);
|
||||
code = implementation->parse(buf, len, doc);
|
||||
if (realloc_if_needed) {
|
||||
aligned_free((void *)buf); // must free before we exit
|
||||
}
|
||||
if (code) { return code; }
|
||||
|
||||
// We're indicating validity via the simdjson_result<element>, so set the parse state back to invalid
|
||||
valid = false;
|
||||
error = UNINITIALIZED;
|
||||
return doc.root();
|
||||
}
|
||||
really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
|
||||
|
@ -136,77 +136,31 @@ inline document_stream parser::parse_many(const padded_string &s, size_t batch_s
|
|||
}
|
||||
|
||||
really_inline size_t parser::capacity() const noexcept {
|
||||
return _capacity;
|
||||
return implementation ? implementation->capacity() : 0;
|
||||
}
|
||||
really_inline size_t parser::max_capacity() const noexcept {
|
||||
return _max_capacity;
|
||||
}
|
||||
really_inline size_t parser::max_depth() const noexcept {
|
||||
return _max_depth;
|
||||
return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
|
||||
//
|
||||
// If capacity has changed, reallocate capacity-based buffers
|
||||
// Reallocate implementation and document if needed
|
||||
//
|
||||
if (_capacity != capacity || _max_depth != max_depth) {
|
||||
error_code err;
|
||||
if (_capacity != capacity) {
|
||||
//
|
||||
// Reallocate the document
|
||||
//
|
||||
err = doc.allocate(capacity);
|
||||
|
||||
//
|
||||
// Initialize stage 1 output
|
||||
//
|
||||
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
|
||||
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc
|
||||
if (!structural_indexes) { _capacity = _max_depth = 0; return err; }
|
||||
|
||||
//
|
||||
// Reallocate implementation capacity
|
||||
//
|
||||
if (implementation && !err) { err = implementation->set_capacity(capacity); }
|
||||
}
|
||||
|
||||
if (_max_depth != max_depth && !err) {
|
||||
//
|
||||
// Reallocate stage 2 state
|
||||
//
|
||||
containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]); // TODO realloc
|
||||
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
|
||||
|
||||
if (!ret_address || !containing_scope) {
|
||||
err = MEMALLOC;
|
||||
}
|
||||
|
||||
//
|
||||
// Reallocate implementation max depth
|
||||
//
|
||||
if (implementation && !err) { err = implementation->set_max_depth(max_depth); }
|
||||
}
|
||||
|
||||
//
|
||||
// Create the implementation if it doesn't already exist
|
||||
//
|
||||
if (!implementation && !err) {
|
||||
if (implementation) {
|
||||
err = implementation->allocate(capacity, max_depth);
|
||||
} else {
|
||||
err = simdjson::active_implementation->create_dom_parser_implementation(capacity, max_depth, implementation);
|
||||
}
|
||||
|
||||
if (err) { _capacity = _max_depth = 0; return err; }
|
||||
_capacity = capacity;
|
||||
_max_depth = max_depth;
|
||||
|
||||
//
|
||||
// If capacity hasn't changed, but the document was taken, allocate a new document.
|
||||
//
|
||||
} else if (!doc.tape) {
|
||||
error_code err = doc.allocate(capacity);
|
||||
if (err) { return err; }
|
||||
}
|
||||
|
||||
if (implementation->capacity() != capacity || !doc.tape) {
|
||||
return doc.allocate(capacity);
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -215,24 +169,24 @@ inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcep
|
|||
return !allocate(capacity, max_depth);
|
||||
}
|
||||
|
||||
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
|
||||
_max_capacity = max_capacity;
|
||||
}
|
||||
|
||||
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
|
||||
// If we don't have enough capacity, (try to) automatically bump it.
|
||||
// If the document was taken, reallocate that too.
|
||||
// Both in one if statement to minimize unlikely branching.
|
||||
if (unlikely(desired_capacity > capacity() || !doc.tape)) {
|
||||
if (unlikely(capacity() < desired_capacity || !doc.tape)) {
|
||||
if (desired_capacity > max_capacity()) {
|
||||
return error = CAPACITY;
|
||||
}
|
||||
return allocate(desired_capacity, _max_depth > 0 ? _max_depth : DEFAULT_MAX_DEPTH);
|
||||
return allocate(desired_capacity, max_depth());
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
|
||||
_max_capacity = max_capacity;
|
||||
}
|
||||
|
||||
} // namespace dom
|
||||
} // namespace simdjson
|
||||
|
||||
|
|
|
@ -8,16 +8,32 @@
|
|||
namespace simdjson {
|
||||
|
||||
namespace dom {
|
||||
class parser;
|
||||
class document;
|
||||
} // namespace dom
|
||||
|
||||
namespace internal {
|
||||
|
||||
// expectation: sizeof(scope_descriptor) = 64/8.
|
||||
struct scope_descriptor {
|
||||
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
|
||||
uint32_t count; // how many elements in the scope
|
||||
}; // struct scope_descriptor
|
||||
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
typedef void* ret_address;
|
||||
#else
|
||||
typedef char ret_address;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* An implementation of simdjson's DOM parser for a particular CPU architecture.
|
||||
*
|
||||
* This class is expected to be accessed only by pointer, and never move in memory (though the
|
||||
* pointer can move).
|
||||
*/
|
||||
class dom_parser_implementation {
|
||||
public:
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
|
@ -29,10 +45,9 @@ public:
|
|||
*
|
||||
* @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
||||
* @param len The length of the json document.
|
||||
* @param parser The parser object. TODO replace this with dom::document & when state is moved to the implementation.
|
||||
* @return The error code, or SUCCESS if there was no error.
|
||||
*/
|
||||
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept = 0;
|
||||
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
|
@ -45,25 +60,24 @@ public:
|
|||
*
|
||||
* @param buf The json document to parse.
|
||||
* @param len The length of the json document.
|
||||
* @param parser The parser object. TODO replace this with structural_indexes & when state is moved to the implementation.
|
||||
* @param streaming Whether this is being called by parser::parse_many.
|
||||
* @return The error code, or SUCCESS if there was no error.
|
||||
*/
|
||||
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept = 0;
|
||||
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0;
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
* Stage 2 of the document parser.
|
||||
*
|
||||
* Guaranteed only to be called after stage1(), with the same buf/len as stage1().
|
||||
* Called after stage1().
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
*
|
||||
* @param parser The parser object. TODO replace this with dom::document & when state is moved to the implementation.
|
||||
* @param doc The document to output to.
|
||||
* @return The error code, or SUCCESS if there was no error.
|
||||
*/
|
||||
WARN_UNUSED virtual error_code stage2(dom::parser &parser) noexcept = 0;
|
||||
WARN_UNUSED virtual error_code stage2(dom::document &doc) noexcept = 0;
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
|
@ -75,11 +89,11 @@ public:
|
|||
*
|
||||
* @param buf The json document to parse.
|
||||
* @param len The length of the json document.
|
||||
* @param parser The parser object. TODO replace this with dom::document & when state is moved to the implementation.
|
||||
* @param doc The document to output to.
|
||||
* @param next_json The next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time.
|
||||
* @return The error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again.
|
||||
*/
|
||||
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept = 0;
|
||||
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept = 0;
|
||||
|
||||
/**
|
||||
* Change the capacity of this parser.
|
||||
|
@ -103,10 +117,86 @@ public:
|
|||
*/
|
||||
virtual error_code set_max_depth(size_t max_depth) noexcept = 0;
|
||||
|
||||
/**
|
||||
* Deallocate this parser.
|
||||
*/
|
||||
virtual ~dom_parser_implementation() = default;
|
||||
|
||||
/** Next location to write to in the tape */
|
||||
uint32_t current_loc{0};
|
||||
|
||||
/** Number of structural indices passed from stage 1 to stage 2 */
|
||||
uint32_t n_structural_indexes{0};
|
||||
/** Structural indices passed from stage 1 to stage 2 */
|
||||
std::unique_ptr<uint32_t[]> structural_indexes{};
|
||||
|
||||
/** Tape location of each open { or [ */
|
||||
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
|
||||
|
||||
/** Return address of each open { or [ */
|
||||
std::unique_ptr<internal::ret_address[]> ret_address{};
|
||||
|
||||
/**
|
||||
* The largest document this parser can support without reallocating.
|
||||
*
|
||||
* @return Current capacity, in bytes.
|
||||
*/
|
||||
really_inline size_t capacity() const noexcept;
|
||||
|
||||
/**
|
||||
* The maximum level of nested object and arrays supported by this parser.
|
||||
*
|
||||
* @return Maximum depth, in bytes.
|
||||
*/
|
||||
really_inline size_t max_depth() const noexcept;
|
||||
|
||||
/**
|
||||
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
|
||||
* and `max_depth` depth.
|
||||
*
|
||||
* @param capacity The new capacity.
|
||||
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
|
||||
* @return The error, if there is one.
|
||||
*/
|
||||
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* The maximum document length this parser supports.
|
||||
*
|
||||
* Buffers are large enough to handle any document up to this length.
|
||||
*/
|
||||
size_t _capacity{0};
|
||||
|
||||
/**
|
||||
* The maximum depth (number of nested objects and arrays) supported by this parser.
|
||||
*
|
||||
* Defaults to DEFAULT_MAX_DEPTH.
|
||||
*/
|
||||
size_t _max_depth{0};
|
||||
}; // class dom_parser_implementation
|
||||
|
||||
really_inline size_t dom_parser_implementation::capacity() const noexcept {
|
||||
return _capacity;
|
||||
}
|
||||
|
||||
really_inline size_t dom_parser_implementation::max_depth() const noexcept {
|
||||
return _max_depth;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
|
||||
if (this->max_depth() != max_depth) {
|
||||
error_code err = set_max_depth(max_depth);
|
||||
if (err) { return err; }
|
||||
}
|
||||
if (_capacity != capacity) {
|
||||
error_code err = set_capacity(capacity);
|
||||
if (err) { return err; }
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace simdjson
|
||||
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
Try :
|
||||
c++ -O3 -std=c++17 -pthread -o amalgamate_demo amalgamate_demo.cpp && ./amalgamate_demo ../jsonexamples/twitter.json ../jsonexamples/amazon_cellphones.ndjson
|
|
@ -1,42 +0,0 @@
|
|||
/* auto-generated on Thu 21 May 2020 14:01:15 EDT. Do not edit! */
|
||||
|
||||
#include <iostream>
|
||||
#include "simdjson.h"
|
||||
#include "simdjson.cpp"
|
||||
int main(int argc, char *argv[]) {
|
||||
if(argc < 2) {
|
||||
std::cerr << "Please specify at least one file name. " << std::endl;
|
||||
}
|
||||
const char * filename = argv[1];
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::error_code error;
|
||||
UNUSED simdjson::dom::element elem;
|
||||
parser.load(filename).tie(elem, error); // do the parsing
|
||||
if (error) {
|
||||
std::cout << "parse failed" << std::endl;
|
||||
std::cout << "error code: " << error << std::endl;
|
||||
std::cout << error << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
} else {
|
||||
std::cout << "parse valid" << std::endl;
|
||||
}
|
||||
if(argc == 2) {
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// parse_many
|
||||
const char * filename2 = argv[2];
|
||||
for (auto result : parser.load_many(filename2)) {
|
||||
error = result.error();
|
||||
}
|
||||
if (error) {
|
||||
std::cout << "parse_many failed" << std::endl;
|
||||
std::cout << "error code: " << error << std::endl;
|
||||
std::cout << error << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
} else {
|
||||
std::cout << "parse_many valid" << std::endl;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -84,10 +84,10 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
|
|||
|
||||
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
||||
#include "generic/stage1/json_structural_indexer.h"
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, parser &parser, bool streaming) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||
this->buf = _buf;
|
||||
this->len = _len;
|
||||
return arm64::stage1::json_structural_indexer::index<64>(_buf, _len, parser, streaming);
|
||||
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
|
||||
}
|
||||
|
||||
} // namespace arm64
|
||||
|
@ -109,5 +109,11 @@ namespace arm64 {
|
|||
#include "generic/stage2/structural_parser.h"
|
||||
#include "generic/stage2/streaming_structural_parser.h"
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||
error_code err = stage1(_buf, _len, false);
|
||||
if (err) { return err; }
|
||||
return stage2(_doc);
|
||||
}
|
||||
|
||||
} // namespace arm64
|
||||
} // namespace simdjson
|
||||
|
|
|
@ -11,38 +11,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
|
|||
public:
|
||||
const uint8_t *buf{}; // Buffer passed to stage 1
|
||||
size_t len{0}; // Length passed to stage 1
|
||||
dom::document *doc{}; // Document passed to stage 2
|
||||
|
||||
really_inline dom_parser_implementation();
|
||||
dom_parser_implementation(const dom_parser_implementation &) = delete;
|
||||
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
|
||||
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
|
||||
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
|
||||
};
|
||||
|
||||
#include "generic/stage1/allocate.h"
|
||||
#include "generic/stage2/allocate.h"
|
||||
|
||||
really_inline dom_parser_implementation::dom_parser_implementation() {}
|
||||
|
||||
// Leaving these here so they can be inlined if so desired
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
|
||||
error_code err = stage1::allocate::set_capacity(*this, capacity);
|
||||
if (err) { _capacity = 0; return err; }
|
||||
_capacity = capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
|
||||
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
|
||||
if (err) { _max_depth = 0; return err; }
|
||||
_max_depth = max_depth;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
|
||||
error_code code = stage1(_buf, _len, parser, false);
|
||||
if (!code) {
|
||||
code = stage2(_buf, _len, parser);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
} // namespace arm64
|
||||
} // namespace simdjson
|
||||
|
||||
|
|
|
@ -12,8 +12,13 @@ namespace stage1 {
|
|||
class structural_scanner {
|
||||
public:
|
||||
|
||||
really_inline structural_scanner(const uint8_t *_buf, uint32_t _len, dom::parser &_parser, bool _streaming)
|
||||
: buf{_buf}, next_structural_index{_parser.structural_indexes.get()}, parser{_parser}, idx{0}, len{_len}, error{SUCCESS}, streaming{_streaming} {}
|
||||
really_inline structural_scanner(dom_parser_implementation &_parser, bool _streaming)
|
||||
: buf{_parser.buf},
|
||||
next_structural_index{_parser.structural_indexes.get()},
|
||||
parser{_parser},
|
||||
len{static_cast<uint32_t>(_parser.len)},
|
||||
streaming{_streaming} {
|
||||
}
|
||||
|
||||
really_inline void add_structural() {
|
||||
*next_structural_index = idx;
|
||||
|
@ -135,23 +140,20 @@ really_inline error_code scan() {
|
|||
private:
|
||||
const uint8_t *buf;
|
||||
uint32_t *next_structural_index;
|
||||
dom::parser &parser;
|
||||
uint32_t idx;
|
||||
dom_parser_implementation &parser;
|
||||
uint32_t len;
|
||||
error_code error;
|
||||
uint32_t idx{0};
|
||||
error_code error{SUCCESS};
|
||||
bool streaming;
|
||||
}; // structural_scanner
|
||||
|
||||
} // namespace stage1
|
||||
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, dom::parser &parser, bool streaming) noexcept {
|
||||
if (unlikely(_len > parser.capacity())) {
|
||||
return CAPACITY;
|
||||
}
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||
this->buf = _buf;
|
||||
this->len = _len;
|
||||
stage1::structural_scanner scanner(_buf, uint32_t(_len), parser, streaming);
|
||||
stage1::structural_scanner scanner(*this, streaming);
|
||||
return scanner.scan();
|
||||
}
|
||||
|
||||
|
@ -229,5 +231,11 @@ namespace fallback {
|
|||
#include "generic/stage2/structural_parser.h"
|
||||
#include "generic/stage2/streaming_structural_parser.h"
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||
error_code err = stage1(_buf, _len, false);
|
||||
if (err) { return err; }
|
||||
return stage2(_doc);
|
||||
}
|
||||
|
||||
} // namespace fallback
|
||||
} // namespace simdjson
|
||||
|
|
|
@ -11,38 +11,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
|
|||
public:
|
||||
const uint8_t *buf{}; // Buffer passed to stage 1
|
||||
size_t len{0}; // Length passed to stage 1
|
||||
dom::document *doc{}; // Document passed to stage 2
|
||||
|
||||
really_inline dom_parser_implementation();
|
||||
dom_parser_implementation(const dom_parser_implementation &) = delete;
|
||||
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
|
||||
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
|
||||
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
|
||||
};
|
||||
|
||||
#include "generic/stage1/allocate.h"
|
||||
#include "generic/stage2/allocate.h"
|
||||
|
||||
really_inline dom_parser_implementation::dom_parser_implementation() {}
|
||||
|
||||
// Leaving these here so they can be inlined if so desired
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
|
||||
error_code err = stage1::allocate::set_capacity(*this, capacity);
|
||||
if (err) { _capacity = 0; return err; }
|
||||
_capacity = capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
|
||||
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
|
||||
if (err) { _max_depth = 0; return err; }
|
||||
_max_depth = max_depth;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
|
||||
error_code code = stage1(_buf, _len, parser, false);
|
||||
if (!code) {
|
||||
code = stage2(parser);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
} // namespace fallback
|
||||
} // namespace simdjson
|
||||
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
namespace stage1 {
|
||||
namespace allocate {
|
||||
|
||||
//
|
||||
// Allocates stage 1 internal state and outputs in the parser
|
||||
//
|
||||
really_inline error_code set_capacity(internal::dom_parser_implementation &parser, size_t capacity) {
|
||||
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
|
||||
parser.structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
|
||||
if (!parser.structural_indexes) { return MEMALLOC; }
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace allocate
|
||||
} // namespace stage1
|
|
@ -58,7 +58,7 @@ public:
|
|||
class json_structural_indexer {
|
||||
public:
|
||||
template<size_t STEP_SIZE>
|
||||
static error_code index(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept;
|
||||
static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept;
|
||||
|
||||
private:
|
||||
really_inline json_structural_indexer(uint32_t *structural_indexes)
|
||||
|
@ -66,7 +66,7 @@ private:
|
|||
template<size_t STEP_SIZE>
|
||||
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
|
||||
really_inline error_code finish(dom::parser &parser, size_t idx, size_t len, bool streaming);
|
||||
really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming);
|
||||
|
||||
json_scanner scanner{};
|
||||
utf8_checker checker{};
|
||||
|
@ -83,7 +83,7 @@ really_inline void json_structural_indexer::next(simd::simd8x64<uint8_t> in, jso
|
|||
unescaped_chars_error |= block.non_quote_inside_string(unescaped);
|
||||
}
|
||||
|
||||
really_inline error_code json_structural_indexer::finish(dom::parser &parser, size_t idx, size_t len, bool streaming) {
|
||||
really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, bool streaming) {
|
||||
// Write out the final iteration's structurals
|
||||
indexer.write(uint32_t(idx-64), prev_structurals);
|
||||
|
||||
|
@ -155,7 +155,7 @@ really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_b
|
|||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
||||
template<size_t STEP_SIZE>
|
||||
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept {
|
||||
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
|
||||
if (unlikely(len > parser.capacity())) { return CAPACITY; }
|
||||
|
||||
buf_block_reader<STEP_SIZE> reader(buf, len);
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
namespace stage2 {
|
||||
namespace allocate {
|
||||
|
||||
//
|
||||
// Allocates stage 2 internal state and outputs in the parser
|
||||
//
|
||||
really_inline error_code set_max_depth(dom_parser_implementation &parser, size_t max_depth) {
|
||||
parser.containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]);
|
||||
parser.ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
|
||||
|
||||
if (!parser.ret_address || !parser.containing_scope) {
|
||||
return MEMALLOC;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace allocate
|
||||
} // namespace stage2
|
|
@ -1,7 +1,7 @@
|
|||
namespace stage2 {
|
||||
|
||||
struct streaming_structural_parser: structural_parser {
|
||||
really_inline streaming_structural_parser(const uint8_t *buf, size_t len, dom::parser &_doc_parser, uint32_t next_structural) : structural_parser(buf, len, _doc_parser, next_structural) {}
|
||||
really_inline streaming_structural_parser(dom_parser_implementation &_parser, uint32_t next_structural) : structural_parser(_parser, next_structural) {}
|
||||
|
||||
// override to add streaming
|
||||
WARN_UNUSED really_inline error_code start(ret_address finish_parser) {
|
||||
|
@ -44,10 +44,12 @@ struct streaming_structural_parser: structural_parser {
|
|||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage2(const uint8_t *_buf, size_t _len, dom::parser &doc_parser, size_t &next_json) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage2(const uint8_t *_buf, size_t _len, dom::document &_doc, size_t &next_json) noexcept {
|
||||
this->buf = _buf;
|
||||
this->len = _len;
|
||||
this->doc = &_doc;
|
||||
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||
|
||||
stage2::streaming_structural_parser parser(_buf, _len, doc_parser, uint32_t(next_json));
|
||||
stage2::streaming_structural_parser parser(*this, uint32_t(next_json));
|
||||
error_code result = parser.start(addresses.finish);
|
||||
if (result) { return result; }
|
||||
//
|
||||
|
|
|
@ -48,41 +48,39 @@ struct unified_machine_addresses {
|
|||
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
|
||||
|
||||
struct number_writer {
|
||||
dom::parser &parser;
|
||||
dom_parser_implementation &parser;
|
||||
|
||||
really_inline void write_s64(int64_t value) noexcept {
|
||||
append_tape(0, internal::tape_type::INT64);
|
||||
std::memcpy(&parser.doc.tape[parser.current_loc], &value, sizeof(value));
|
||||
std::memcpy(&parser.doc->tape[parser.current_loc], &value, sizeof(value));
|
||||
++parser.current_loc;
|
||||
}
|
||||
really_inline void write_u64(uint64_t value) noexcept {
|
||||
append_tape(0, internal::tape_type::UINT64);
|
||||
parser.doc.tape[parser.current_loc++] = value;
|
||||
parser.doc->tape[parser.current_loc++] = value;
|
||||
}
|
||||
really_inline void write_double(double value) noexcept {
|
||||
append_tape(0, internal::tape_type::DOUBLE);
|
||||
static_assert(sizeof(value) == sizeof(parser.doc.tape[parser.current_loc]), "mismatch size");
|
||||
memcpy(&parser.doc.tape[parser.current_loc++], &value, sizeof(double));
|
||||
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
|
||||
static_assert(sizeof(value) == sizeof(parser.doc->tape[parser.current_loc]), "mismatch size");
|
||||
memcpy(&parser.doc->tape[parser.current_loc++], &value, sizeof(double));
|
||||
// doc->tape[doc->current_loc++] = *((uint64_t *)&d);
|
||||
}
|
||||
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||
parser.doc.tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
}; // struct number_writer
|
||||
|
||||
struct structural_parser {
|
||||
structural_iterator structurals;
|
||||
dom::parser &parser;
|
||||
dom_parser_implementation &parser;
|
||||
/** Next write location in the string buf for stage 2 parsing */
|
||||
uint8_t *current_string_buf_loc{};
|
||||
uint32_t depth;
|
||||
|
||||
really_inline structural_parser(
|
||||
const uint8_t *buf,
|
||||
size_t len,
|
||||
dom::parser &_parser,
|
||||
dom_parser_implementation &_parser,
|
||||
uint32_t next_structural = 0
|
||||
) : structurals(buf, len, _parser.structural_indexes.get(), next_structural), parser{_parser}, depth{0} {}
|
||||
) : structurals(_parser.buf, _parser.len, _parser.structural_indexes.get(), next_structural), parser{_parser}, depth{0} {}
|
||||
|
||||
WARN_UNUSED really_inline bool start_scope(ret_address continue_state) {
|
||||
parser.containing_scope[depth].tape_index = parser.current_loc;
|
||||
|
@ -113,7 +111,7 @@ struct structural_parser {
|
|||
// this function is responsible for annotating the start of the scope
|
||||
really_inline void end_scope(internal::tape_type start, internal::tape_type end) noexcept {
|
||||
depth--;
|
||||
// write our doc.tape location to the header scope
|
||||
// write our doc->tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
append_tape(parser.containing_scope[depth].tape_index, end);
|
||||
// count can overflow if it exceeds 24 bits... so we saturate
|
||||
|
@ -121,7 +119,7 @@ struct structural_parser {
|
|||
const uint32_t start_tape_index = parser.containing_scope[depth].tape_index;
|
||||
const uint32_t count = parser.containing_scope[depth].count;
|
||||
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
|
||||
// This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
|
||||
// This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
|
||||
write_tape(start_tape_index, parser.current_loc | (uint64_t(cntsat) << 32), start);
|
||||
}
|
||||
|
||||
|
@ -139,11 +137,11 @@ struct structural_parser {
|
|||
}
|
||||
|
||||
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||
parser.doc.tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
|
||||
really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept {
|
||||
parser.doc.tape[loc] = val | ((uint64_t(char(t))) << 56);
|
||||
parser.doc->tape[loc] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
|
||||
// increment_count increments the count of keys in an object or values in an array.
|
||||
|
@ -156,7 +154,7 @@ struct structural_parser {
|
|||
|
||||
really_inline uint8_t *on_start_string() noexcept {
|
||||
// we advance the point, accounting for the fact that we have a NULL termination
|
||||
append_tape(current_string_buf_loc - parser.doc.string_buf.get(), internal::tape_type::STRING);
|
||||
append_tape(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING);
|
||||
return current_string_buf_loc + sizeof(uint32_t);
|
||||
}
|
||||
|
||||
|
@ -330,7 +328,7 @@ struct structural_parser {
|
|||
}
|
||||
|
||||
really_inline void init() {
|
||||
current_string_buf_loc = parser.doc.string_buf.get();
|
||||
current_string_buf_loc = parser.doc->string_buf.get();
|
||||
parser.current_loc = 0;
|
||||
}
|
||||
|
||||
|
@ -386,10 +384,11 @@ struct structural_parser {
|
|||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::parser &doc_parser) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
|
||||
this->doc = &_doc;
|
||||
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||
stage2::structural_parser parser(this->buf, this->len, doc_parser);
|
||||
error_code result = parser.start(this->len, addresses.finish);
|
||||
stage2::structural_parser parser(*this);
|
||||
error_code result = parser.start(len, addresses.finish);
|
||||
if (result) { return result; }
|
||||
|
||||
//
|
||||
|
|
|
@ -72,10 +72,10 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
|
|||
|
||||
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
||||
#include "generic/stage1/json_structural_indexer.h"
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, dom::parser &parser, bool streaming) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||
this->buf = _buf;
|
||||
this->len = _len;
|
||||
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, parser, streaming);
|
||||
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
|
||||
}
|
||||
|
||||
} // namespace haswell
|
||||
|
@ -98,6 +98,12 @@ namespace haswell {
|
|||
#include "generic/stage2/structural_parser.h"
|
||||
#include "generic/stage2/streaming_structural_parser.h"
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||
error_code err = stage1(_buf, _len, false);
|
||||
if (err) { return err; }
|
||||
return stage2(_doc);
|
||||
}
|
||||
|
||||
} // namespace haswell
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include "simdjson.h"
|
||||
#include "isadetection.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace haswell {
|
||||
|
||||
|
@ -11,38 +10,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
|
|||
public:
|
||||
const uint8_t *buf{}; // Buffer passed to stage 1
|
||||
size_t len{0}; // Length passed to stage 1
|
||||
dom::document *doc{}; // Document passed to stage 2
|
||||
|
||||
really_inline dom_parser_implementation();
|
||||
dom_parser_implementation(const dom_parser_implementation &) = delete;
|
||||
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
|
||||
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
|
||||
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
|
||||
};
|
||||
|
||||
#include "generic/stage1/allocate.h"
|
||||
#include "generic/stage2/allocate.h"
|
||||
|
||||
really_inline dom_parser_implementation::dom_parser_implementation() {}
|
||||
|
||||
// Leaving these here so they can be inlined if so desired
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
|
||||
error_code err = stage1::allocate::set_capacity(*this, capacity);
|
||||
if (err) { _capacity = 0; return err; }
|
||||
_capacity = capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
|
||||
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
|
||||
if (err) { _max_depth = 0; return err; }
|
||||
_max_depth = max_depth;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
|
||||
error_code code = stage1(_buf, _len, parser, false);
|
||||
if (!code) {
|
||||
code = stage2(parser);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
} // namespace haswell
|
||||
} // namespace simdjson
|
||||
|
||||
|
|
|
@ -73,10 +73,10 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
|
|||
|
||||
#include "generic/stage1/utf8_lookup2_algorithm.h"
|
||||
#include "generic/stage1/json_structural_indexer.h"
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, parser &parser, bool streaming) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept {
|
||||
this->buf = _buf;
|
||||
this->len = _len;
|
||||
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, parser, streaming);
|
||||
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
|
||||
}
|
||||
|
||||
} // namespace westmere
|
||||
|
@ -99,6 +99,12 @@ namespace westmere {
|
|||
#include "generic/stage2/structural_parser.h"
|
||||
#include "generic/stage2/streaming_structural_parser.h"
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
|
||||
error_code err = stage1(_buf, _len, false);
|
||||
if (err) { return err; }
|
||||
return stage2(_doc);
|
||||
}
|
||||
|
||||
} // namespace westmere
|
||||
} // namespace simdjson
|
||||
UNTARGET_REGION
|
||||
|
|
|
@ -11,38 +11,40 @@ class dom_parser_implementation final : public internal::dom_parser_implementati
|
|||
public:
|
||||
const uint8_t *buf{}; // Buffer passed to stage 1
|
||||
size_t len{0}; // Length passed to stage 1
|
||||
dom::document *doc{}; // Document passed to stage 2
|
||||
|
||||
really_inline dom_parser_implementation();
|
||||
dom_parser_implementation(const dom_parser_implementation &) = delete;
|
||||
dom_parser_implementation & operator=(const dom_parser_implementation &) = delete;
|
||||
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::parser &parser) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final;
|
||||
WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
|
||||
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final;
|
||||
WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
|
||||
WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
|
||||
};
|
||||
|
||||
#include "generic/stage1/allocate.h"
|
||||
#include "generic/stage2/allocate.h"
|
||||
|
||||
really_inline dom_parser_implementation::dom_parser_implementation() {}
|
||||
|
||||
// Leaving these here so they can be inlined if so desired
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
|
||||
error_code err = stage1::allocate::set_capacity(*this, capacity);
|
||||
if (err) { _capacity = 0; return err; }
|
||||
_capacity = capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t) noexcept {
|
||||
WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
|
||||
error_code err = stage2::allocate::set_max_depth(*this, max_depth);
|
||||
if (err) { _max_depth = 0; return err; }
|
||||
_max_depth = max_depth;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::parser &parser) noexcept {
|
||||
error_code code = stage1(_buf, _len, parser, false);
|
||||
if (!code) {
|
||||
code = stage2(parser);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
} // namespace westmere
|
||||
} // namespace simdjson
|
||||
|
||||
|
|
|
@ -176,7 +176,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
|
|||
s.non_ascii_byte_count = count_nonasciibytes(
|
||||
reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
||||
s.byte_count = p.size();
|
||||
s.structural_indexes_count = parser.n_structural_indexes;
|
||||
s.structural_indexes_count = parser.implementation->n_structural_indexes;
|
||||
|
||||
// simdjson::document::iterator iter(doc);
|
||||
recurse(doc, s, 0);
|
||||
|
|
Loading…
Reference in New Issue