Merge pull request #876 from simdjson/jkeiser/doc-writing-stage-2
Move tape writes into stage 2 code
This commit is contained in:
commit
c615d52cf4
|
@ -985,9 +985,6 @@ public:
|
|||
std::unique_ptr<char[]> ret_address{};
|
||||
#endif
|
||||
|
||||
/** @private Next write location in the string buf for stage 2 parsing */
|
||||
uint8_t *current_string_buf_loc{};
|
||||
|
||||
/** @private Use `if (parser.parse(...).error())` instead */
|
||||
bool valid{false};
|
||||
/** @private Use `parser.parse(...).error()` instead */
|
||||
|
@ -1018,32 +1015,6 @@ public:
|
|||
/** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
|
||||
inline bool dump_raw_tape(std::ostream &os) const noexcept;
|
||||
|
||||
//
|
||||
// Parser callbacks: these are internal!
|
||||
//
|
||||
|
||||
/** @private this should be called when parsing (right before writing the tapes) */
|
||||
inline void init_stage2() noexcept;
|
||||
really_inline error_code on_error(error_code new_error_code) noexcept; ///< @private
|
||||
really_inline error_code on_success(error_code success_code) noexcept; ///< @private
|
||||
really_inline bool on_start_document(uint32_t depth) noexcept; ///< @private
|
||||
really_inline bool on_start_object(uint32_t depth) noexcept; ///< @private
|
||||
really_inline bool on_start_array(uint32_t depth) noexcept; ///< @private
|
||||
// TODO we're not checking this bool
|
||||
really_inline bool on_end_document(uint32_t depth) noexcept; ///< @private
|
||||
really_inline bool on_end_object(uint32_t depth) noexcept; ///< @private
|
||||
really_inline bool on_end_array(uint32_t depth) noexcept; ///< @private
|
||||
really_inline bool on_true_atom() noexcept; ///< @private
|
||||
really_inline bool on_false_atom() noexcept; ///< @private
|
||||
really_inline bool on_null_atom() noexcept; ///< @private
|
||||
really_inline uint8_t *on_start_string() noexcept; ///< @private
|
||||
really_inline bool on_end_string(uint8_t *dst) noexcept; ///< @private
|
||||
really_inline bool on_number_s64(int64_t value) noexcept; ///< @private
|
||||
really_inline bool on_number_u64(uint64_t value) noexcept; ///< @private
|
||||
really_inline bool on_number_double(double value) noexcept; ///< @private
|
||||
|
||||
really_inline void increment_count(uint32_t depth) noexcept; ///< @private
|
||||
really_inline void end_scope(uint32_t depth) noexcept; ///< @private
|
||||
private:
|
||||
/**
|
||||
* The maximum document length this parser will automatically support.
|
||||
|
@ -1088,8 +1059,6 @@ private:
|
|||
//
|
||||
//
|
||||
|
||||
inline void write_tape(uint64_t val, internal::tape_type t) noexcept;
|
||||
|
||||
/**
|
||||
* Ensure we have enough capacity to handle at least desired_capacity bytes,
|
||||
* and auto-allocate if not.
|
||||
|
|
|
@ -1,145 +0,0 @@
|
|||
#ifndef SIMDJSON_DOCUMENT_PARSER_CALLBACKS_H
|
||||
#define SIMDJSON_DOCUMENT_PARSER_CALLBACKS_H
|
||||
|
||||
#include "simdjson.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace dom {
|
||||
|
||||
//
|
||||
// Parser callbacks
|
||||
//
|
||||
|
||||
inline void parser::init_stage2() noexcept {
|
||||
current_string_buf_loc = doc.string_buf.get();
|
||||
current_loc = 0;
|
||||
valid = false;
|
||||
error = UNINITIALIZED;
|
||||
}
|
||||
|
||||
really_inline error_code parser::on_error(error_code new_error_code) noexcept {
|
||||
error = new_error_code;
|
||||
return new_error_code;
|
||||
}
|
||||
really_inline error_code parser::on_success(error_code success_code) noexcept {
|
||||
error = success_code;
|
||||
valid = true;
|
||||
return success_code;
|
||||
}
|
||||
// increment_count increments the count of keys in an object or values in an array.
|
||||
// Note that if you are at the level of the values or elements, the count
|
||||
// must be increment in the preceding depth (depth-1) where the array or
|
||||
// the object resides.
|
||||
really_inline void parser::increment_count(uint32_t depth) noexcept {
|
||||
containing_scope[depth].count++;
|
||||
}
|
||||
|
||||
really_inline bool parser::on_start_document(uint32_t depth) noexcept {
|
||||
containing_scope[depth].tape_index = current_loc;
|
||||
containing_scope[depth].count = 0;
|
||||
write_tape(0, internal::tape_type::ROOT); // if the document is correct, this gets rewritten later
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_start_object(uint32_t depth) noexcept {
|
||||
containing_scope[depth].tape_index = current_loc;
|
||||
containing_scope[depth].count = 0;
|
||||
write_tape(0, internal::tape_type::START_OBJECT); // if the document is correct, this gets rewritten later
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_start_array(uint32_t depth) noexcept {
|
||||
containing_scope[depth].tape_index = current_loc;
|
||||
containing_scope[depth].count = 0;
|
||||
write_tape(0, internal::tape_type::START_ARRAY); // if the document is correct, this gets rewritten later
|
||||
return true;
|
||||
}
|
||||
// TODO we're not checking this bool
|
||||
really_inline bool parser::on_end_document(uint32_t depth) noexcept {
|
||||
// write our doc.tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
write_tape(containing_scope[depth].tape_index, internal::tape_type::ROOT);
|
||||
end_scope(depth);
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_end_object(uint32_t depth) noexcept {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope[depth].tape_index, internal::tape_type::END_OBJECT);
|
||||
end_scope(depth);
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_end_array(uint32_t depth) noexcept {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope[depth].tape_index, internal::tape_type::END_ARRAY);
|
||||
end_scope(depth);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool parser::on_true_atom() noexcept {
|
||||
write_tape(0, internal::tape_type::TRUE_VALUE);
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_false_atom() noexcept {
|
||||
write_tape(0, internal::tape_type::FALSE_VALUE);
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_null_atom() noexcept {
|
||||
write_tape(0, internal::tape_type::NULL_VALUE);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline uint8_t *parser::on_start_string() noexcept {
|
||||
/* we advance the point, accounting for the fact that we have a NULL
|
||||
* termination */
|
||||
write_tape(current_string_buf_loc - doc.string_buf.get(), internal::tape_type::STRING);
|
||||
return current_string_buf_loc + sizeof(uint32_t);
|
||||
}
|
||||
|
||||
really_inline bool parser::on_end_string(uint8_t *dst) noexcept {
|
||||
uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
|
||||
// TODO check for overflow in case someone has a crazy string (>=4GB?)
|
||||
// But only add the overflow check when the document itself exceeds 4GB
|
||||
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
|
||||
memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
|
||||
// NULL termination is still handy if you expect all your strings to
|
||||
// be NULL terminated? It comes at a small cost
|
||||
*dst = 0;
|
||||
current_string_buf_loc = dst + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool parser::on_number_s64(int64_t value) noexcept {
|
||||
write_tape(0, internal::tape_type::INT64);
|
||||
std::memcpy(&doc.tape[current_loc], &value, sizeof(value));
|
||||
++current_loc;
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_number_u64(uint64_t value) noexcept {
|
||||
write_tape(0, internal::tape_type::UINT64);
|
||||
doc.tape[current_loc++] = value;
|
||||
return true;
|
||||
}
|
||||
really_inline bool parser::on_number_double(double value) noexcept {
|
||||
write_tape(0, internal::tape_type::DOUBLE);
|
||||
static_assert(sizeof(value) == sizeof(doc.tape[current_loc]), "mismatch size");
|
||||
memcpy(&doc.tape[current_loc++], &value, sizeof(double));
|
||||
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline void parser::write_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||
doc.tape[current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
|
||||
// this function is responsible for annotating the start of the scope
|
||||
really_inline void parser::end_scope(uint32_t depth) noexcept {
|
||||
scope_descriptor d = containing_scope[depth];
|
||||
// count can overflow if it exceeds 24 bits... so we saturate
|
||||
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
|
||||
const uint32_t cntsat = d.count > 0xFFFFFF ? 0xFFFFFF : d.count;
|
||||
// This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
|
||||
doc.tape[d.tape_index] |= current_loc | (uint64_t(cntsat) << 32);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
} // namespace dom
|
||||
|
||||
#endif // SIMDJSON_DOCUMENT_PARSER_CALLBACKS_H
|
|
@ -1,6 +1,5 @@
|
|||
namespace numberparsing {
|
||||
|
||||
|
||||
// Attempts to compute i * 10^(power) exactly; and if "negative" is
|
||||
// true, negate the result.
|
||||
// This function will only work in some cases, when it does not work, success is
|
||||
|
@ -261,8 +260,9 @@ really_inline bool is_made_of_eight_digits_fast(const char *chars) {
|
|||
//
|
||||
// This function will almost never be called!!!
|
||||
//
|
||||
template<typename W>
|
||||
never_inline bool parse_large_integer(const uint8_t *const src,
|
||||
parser &parser,
|
||||
W writer,
|
||||
bool found_minus) {
|
||||
const char *p = reinterpret_cast<const char *>(src);
|
||||
|
||||
|
@ -310,14 +310,14 @@ never_inline bool parse_large_integer(const uint8_t *const src,
|
|||
// as a positive signed integer, but the negative version is
|
||||
// possible.
|
||||
constexpr int64_t signed_answer = INT64_MIN;
|
||||
parser.on_number_s64(signed_answer);
|
||||
writer.write_s64(signed_answer);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(signed_answer, src);
|
||||
#endif
|
||||
} else {
|
||||
// we can negate safely
|
||||
int64_t signed_answer = -static_cast<int64_t>(i);
|
||||
parser.on_number_s64(signed_answer);
|
||||
writer.write_s64(signed_answer);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(signed_answer, src);
|
||||
#endif
|
||||
|
@ -330,21 +330,22 @@ never_inline bool parse_large_integer(const uint8_t *const src,
|
|||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(i, src);
|
||||
#endif
|
||||
parser.on_number_s64(i);
|
||||
writer.write_s64(i);
|
||||
} else {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_unsigned_integer(i, src);
|
||||
#endif
|
||||
parser.on_number_u64(i);
|
||||
writer.write_u64(i);
|
||||
}
|
||||
}
|
||||
return is_structural_or_whitespace(*p);
|
||||
}
|
||||
|
||||
bool slow_float_parsing(UNUSED const char * src, parser &parser) {
|
||||
template<typename W>
|
||||
bool slow_float_parsing(UNUSED const char * src, W writer) {
|
||||
double d;
|
||||
if (parse_float_strtod(src, &d)) {
|
||||
parser.on_number_double(d);
|
||||
writer.write_double(d);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_float(d, (const uint8_t *)src);
|
||||
#endif
|
||||
|
@ -365,12 +366,13 @@ bool slow_float_parsing(UNUSED const char * src, parser &parser) {
|
|||
// content and append a space before calling this function.
|
||||
//
|
||||
// Our objective is accurate parsing (ULP of 0) at high speed.
|
||||
template<typename W>
|
||||
really_inline bool parse_number(UNUSED const uint8_t *const src,
|
||||
UNUSED bool found_minus,
|
||||
parser &parser) {
|
||||
W writer) {
|
||||
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes
|
||||
// useful to skip parsing
|
||||
parser.on_number_s64(0); // always write zero
|
||||
writer.write_s64(0); // always write zero
|
||||
return true; // always succeeds
|
||||
#else
|
||||
const char *p = reinterpret_cast<const char *>(src);
|
||||
|
@ -522,14 +524,14 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
|
|||
// 10000000000000000000000000000000000000000000e+308
|
||||
// 3.1415926535897932384626433832795028841971693993751
|
||||
//
|
||||
return slow_float_parsing((const char *) src, parser);
|
||||
return slow_float_parsing((const char *) src, writer);
|
||||
}
|
||||
}
|
||||
if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) ||
|
||||
(exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!!
|
||||
// this is almost never going to get called!!!
|
||||
// we start anew, going slowly!!!
|
||||
return slow_float_parsing((const char *) src, parser);
|
||||
return slow_float_parsing((const char *) src, writer);
|
||||
}
|
||||
bool success = true;
|
||||
double d = compute_float_64(exponent, i, negative, &success);
|
||||
|
@ -538,7 +540,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
|
|||
success = parse_float_strtod((const char *)src, &d);
|
||||
}
|
||||
if (success) {
|
||||
parser.on_number_double(d);
|
||||
writer.write_double(d);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_float(d, src);
|
||||
#endif
|
||||
|
@ -553,10 +555,10 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
|
|||
if (unlikely(digit_count >= 18)) { // this is uncommon!!!
|
||||
// there is a good chance that we had an overflow, so we need
|
||||
// need to recover: we parse the whole thing again.
|
||||
return parse_large_integer(src, parser, found_minus);
|
||||
return parse_large_integer(src, writer, found_minus);
|
||||
}
|
||||
i = negative ? 0 - i : i;
|
||||
parser.on_number_s64(i);
|
||||
writer.write_s64(i);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(i, src);
|
||||
#endif
|
||||
|
|
|
@ -113,9 +113,34 @@ public:
|
|||
uint8_t c{0}; // used to track the (structural) character we are looking at
|
||||
};
|
||||
|
||||
struct number_writer {
|
||||
parser &doc_parser;
|
||||
|
||||
really_inline void write_s64(int64_t value) noexcept {
|
||||
write_tape(0, internal::tape_type::INT64);
|
||||
std::memcpy(&doc_parser.doc.tape[doc_parser.current_loc], &value, sizeof(value));
|
||||
++doc_parser.current_loc;
|
||||
}
|
||||
really_inline void write_u64(uint64_t value) noexcept {
|
||||
write_tape(0, internal::tape_type::UINT64);
|
||||
doc_parser.doc.tape[doc_parser.current_loc++] = value;
|
||||
}
|
||||
really_inline void write_double(double value) noexcept {
|
||||
write_tape(0, internal::tape_type::DOUBLE);
|
||||
static_assert(sizeof(value) == sizeof(doc_parser.doc.tape[doc_parser.current_loc]), "mismatch size");
|
||||
memcpy(&doc_parser.doc.tape[doc_parser.current_loc++], &value, sizeof(double));
|
||||
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
|
||||
}
|
||||
really_inline void write_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||
doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
}; // struct number_writer
|
||||
|
||||
struct structural_parser {
|
||||
structural_iterator structurals;
|
||||
parser &doc_parser;
|
||||
/** Next write location in the string buf for stage 2 parsing */
|
||||
uint8_t *current_string_buf_loc{};
|
||||
uint32_t depth;
|
||||
|
||||
really_inline structural_parser(
|
||||
|
@ -125,54 +150,96 @@ struct structural_parser {
|
|||
uint32_t next_structural = 0
|
||||
) : structurals(buf, len, _doc_parser.structural_indexes.get(), next_structural), doc_parser{_doc_parser}, depth{0} {}
|
||||
|
||||
WARN_UNUSED really_inline bool start_document(ret_address continue_state) {
|
||||
doc_parser.on_start_document(depth);
|
||||
WARN_UNUSED really_inline bool start_scope(internal::tape_type type, ret_address continue_state) {
|
||||
doc_parser.containing_scope[depth].tape_index = doc_parser.current_loc;
|
||||
doc_parser.containing_scope[depth].count = 0;
|
||||
write_tape(0, type); // if the document is correct, this gets rewritten later
|
||||
doc_parser.ret_address[depth] = continue_state;
|
||||
depth++;
|
||||
return depth >= doc_parser.max_depth();
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool start_document(ret_address continue_state) {
|
||||
return start_scope(internal::tape_type::ROOT, continue_state);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool start_object(ret_address continue_state) {
|
||||
doc_parser.on_start_object(depth);
|
||||
doc_parser.ret_address[depth] = continue_state;
|
||||
depth++;
|
||||
return depth >= doc_parser.max_depth();
|
||||
return start_scope(internal::tape_type::START_OBJECT, continue_state);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool start_array(ret_address continue_state) {
|
||||
doc_parser.on_start_array(depth);
|
||||
doc_parser.ret_address[depth] = continue_state;
|
||||
depth++;
|
||||
return depth >= doc_parser.max_depth();
|
||||
return start_scope(internal::tape_type::START_ARRAY, continue_state);
|
||||
}
|
||||
|
||||
really_inline bool end_object() {
|
||||
// this function is responsible for annotating the start of the scope
|
||||
really_inline void end_scope(internal::tape_type type) noexcept {
|
||||
depth--;
|
||||
doc_parser.on_end_object(depth);
|
||||
return false;
|
||||
// write our doc.tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
write_tape(doc_parser.containing_scope[depth].tape_index, type);
|
||||
// count can overflow if it exceeds 24 bits... so we saturate
|
||||
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
|
||||
const uint32_t start_tape_index = doc_parser.containing_scope[depth].tape_index;
|
||||
const uint32_t count = doc_parser.containing_scope[depth].count;
|
||||
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
|
||||
// This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
|
||||
doc_parser.doc.tape[start_tape_index] |= doc_parser.current_loc | (uint64_t(cntsat) << 32);
|
||||
}
|
||||
really_inline bool end_array() {
|
||||
depth--;
|
||||
doc_parser.on_end_array(depth);
|
||||
return false;
|
||||
|
||||
really_inline void end_object() {
|
||||
end_scope(internal::tape_type::END_OBJECT);
|
||||
}
|
||||
really_inline bool end_document() {
|
||||
depth--;
|
||||
doc_parser.on_end_document(depth);
|
||||
return false;
|
||||
really_inline void end_array() {
|
||||
end_scope(internal::tape_type::END_ARRAY);
|
||||
}
|
||||
really_inline void end_document() {
|
||||
end_scope(internal::tape_type::ROOT);
|
||||
}
|
||||
|
||||
really_inline void write_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||
doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
|
||||
// increment_count increments the count of keys in an object or values in an array.
|
||||
// Note that if you are at the level of the values or elements, the count
|
||||
// must be increment in the preceding depth (depth-1) where the array or
|
||||
// the object resides.
|
||||
really_inline void increment_count() {
|
||||
doc_parser.containing_scope[depth - 1].count++; // we have a key value pair in the object at parser.depth - 1
|
||||
}
|
||||
|
||||
really_inline uint8_t *on_start_string() noexcept {
|
||||
/* we advance the point, accounting for the fact that we have a NULL
|
||||
* termination */
|
||||
write_tape(current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING);
|
||||
return current_string_buf_loc + sizeof(uint32_t);
|
||||
}
|
||||
|
||||
really_inline bool on_end_string(uint8_t *dst) noexcept {
|
||||
uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
|
||||
// TODO check for overflow in case someone has a crazy string (>=4GB?)
|
||||
// But only add the overflow check when the document itself exceeds 4GB
|
||||
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
|
||||
memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
|
||||
// NULL termination is still handy if you expect all your strings to
|
||||
// be NULL terminated? It comes at a small cost
|
||||
*dst = 0;
|
||||
current_string_buf_loc = dst + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_string() {
|
||||
uint8_t *dst = doc_parser.on_start_string();
|
||||
uint8_t *dst = on_start_string();
|
||||
dst = stringparsing::parse_string(structurals.current(), dst);
|
||||
if (dst == nullptr) {
|
||||
return true;
|
||||
}
|
||||
return !doc_parser.on_end_string(dst);
|
||||
return !on_end_string(dst);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
|
||||
return !numberparsing::parse_number(src, found_minus, doc_parser);
|
||||
number_writer writer{doc_parser};
|
||||
return !numberparsing::parse_number(src, found_minus, writer);
|
||||
}
|
||||
WARN_UNUSED really_inline bool parse_number(bool found_minus) {
|
||||
return parse_number(structurals.current(), found_minus);
|
||||
|
@ -182,15 +249,15 @@ struct structural_parser {
|
|||
switch (structurals.current_char()) {
|
||||
case 't':
|
||||
if (!atomparsing::is_valid_true_atom(structurals.current())) { return true; }
|
||||
doc_parser.on_true_atom();
|
||||
write_tape(0, internal::tape_type::TRUE_VALUE);
|
||||
break;
|
||||
case 'f':
|
||||
if (!atomparsing::is_valid_false_atom(structurals.current())) { return true; }
|
||||
doc_parser.on_false_atom();
|
||||
write_tape(0, internal::tape_type::FALSE_VALUE);
|
||||
break;
|
||||
case 'n':
|
||||
if (!atomparsing::is_valid_null_atom(structurals.current())) { return true; }
|
||||
doc_parser.on_null_atom();
|
||||
write_tape(0, internal::tape_type::NULL_VALUE);
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
|
@ -202,15 +269,15 @@ struct structural_parser {
|
|||
switch (structurals.current_char()) {
|
||||
case 't':
|
||||
if (!atomparsing::is_valid_true_atom(structurals.current(), structurals.remaining_len())) { return true; }
|
||||
doc_parser.on_true_atom();
|
||||
write_tape(0, internal::tape_type::TRUE_VALUE);
|
||||
break;
|
||||
case 'f':
|
||||
if (!atomparsing::is_valid_false_atom(structurals.current(), structurals.remaining_len())) { return true; }
|
||||
doc_parser.on_false_atom();
|
||||
write_tape(0, internal::tape_type::FALSE_VALUE);
|
||||
break;
|
||||
case 'n':
|
||||
if (!atomparsing::is_valid_null_atom(structurals.current(), structurals.remaining_len())) { return true; }
|
||||
doc_parser.on_null_atom();
|
||||
write_tape(0, internal::tape_type::NULL_VALUE);
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
|
@ -247,17 +314,27 @@ struct structural_parser {
|
|||
WARN_UNUSED really_inline error_code finish() {
|
||||
// the string might not be NULL terminated.
|
||||
if ( !structurals.at_end(doc_parser.n_structural_indexes) ) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
return on_error(TAPE_ERROR);
|
||||
}
|
||||
end_document();
|
||||
if (depth != 0) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
return on_error(TAPE_ERROR);
|
||||
}
|
||||
if (doc_parser.containing_scope[depth].tape_index != 0) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
return on_error(TAPE_ERROR);
|
||||
}
|
||||
|
||||
return doc_parser.on_success(SUCCESS);
|
||||
return on_success(SUCCESS);
|
||||
}
|
||||
|
||||
really_inline error_code on_error(error_code new_error_code) noexcept {
|
||||
doc_parser.error = new_error_code;
|
||||
return new_error_code;
|
||||
}
|
||||
really_inline error_code on_success(error_code success_code) noexcept {
|
||||
doc_parser.error = success_code;
|
||||
doc_parser.valid = true;
|
||||
return success_code;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline error_code error() {
|
||||
|
@ -272,11 +349,11 @@ struct structural_parser {
|
|||
* carefully,
|
||||
* all without any added cost. */
|
||||
if (depth >= doc_parser.max_depth()) {
|
||||
return doc_parser.on_error(DEPTH_ERROR);
|
||||
return on_error(DEPTH_ERROR);
|
||||
}
|
||||
switch (structurals.current_char()) {
|
||||
case '"':
|
||||
return doc_parser.on_error(STRING_ERROR);
|
||||
return on_error(STRING_ERROR);
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
|
@ -288,20 +365,27 @@ struct structural_parser {
|
|||
case '8':
|
||||
case '9':
|
||||
case '-':
|
||||
return doc_parser.on_error(NUMBER_ERROR);
|
||||
return on_error(NUMBER_ERROR);
|
||||
case 't':
|
||||
return doc_parser.on_error(T_ATOM_ERROR);
|
||||
return on_error(T_ATOM_ERROR);
|
||||
case 'n':
|
||||
return doc_parser.on_error(N_ATOM_ERROR);
|
||||
return on_error(N_ATOM_ERROR);
|
||||
case 'f':
|
||||
return doc_parser.on_error(F_ATOM_ERROR);
|
||||
return on_error(F_ATOM_ERROR);
|
||||
default:
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
return on_error(TAPE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
really_inline void init() {
|
||||
current_string_buf_loc = doc_parser.doc.string_buf.get();
|
||||
doc_parser.current_loc = 0;
|
||||
doc_parser.valid = false;
|
||||
doc_parser.error = UNINITIALIZED;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline error_code start(size_t len, ret_address finish_state) {
|
||||
doc_parser.init_stage2(); // sets is_valid to false
|
||||
init(); // sets is_valid to false
|
||||
if (len > doc_parser.capacity()) {
|
||||
return CAPACITY;
|
||||
}
|
||||
|
@ -309,7 +393,7 @@ struct structural_parser {
|
|||
structurals.advance_char();
|
||||
// Push the root scope (there is always at least one scope)
|
||||
if (start_document(finish_state)) {
|
||||
return doc_parser.on_error(DEPTH_ERROR);
|
||||
return on_error(DEPTH_ERROR);
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
@ -376,7 +460,7 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, pa
|
|||
object_begin:
|
||||
switch (parser.advance_char()) {
|
||||
case '"': {
|
||||
doc_parser.increment_count(parser.depth - 1); // we have a key value pair in the object at parser.depth - 1
|
||||
parser.increment_count();
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_state;
|
||||
}
|
||||
|
@ -395,7 +479,7 @@ object_key_state:
|
|||
object_continue:
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
doc_parser.increment_count(parser.depth - 1); // we have a key value pair in the object at parser.depth - 1
|
||||
parser.increment_count();
|
||||
FAIL_IF( parser.advance_char() != '"' );
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_state;
|
||||
|
@ -417,7 +501,7 @@ array_begin:
|
|||
parser.end_array();
|
||||
goto scope_end;
|
||||
}
|
||||
doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
|
||||
parser.increment_count();
|
||||
|
||||
main_array_switch:
|
||||
/* we call update char on all paths in, so we can peek at parser.c on the
|
||||
|
@ -427,7 +511,7 @@ main_array_switch:
|
|||
array_continue:
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
|
||||
parser.increment_count();
|
||||
parser.advance_char();
|
||||
goto main_array_switch;
|
||||
case ']':
|
||||
|
|
|
@ -5,13 +5,13 @@ struct streaming_structural_parser: structural_parser {
|
|||
|
||||
// override to add streaming
|
||||
WARN_UNUSED really_inline error_code start(UNUSED size_t len, ret_address finish_parser) {
|
||||
doc_parser.init_stage2(); // sets is_valid to false
|
||||
init(); // sets is_valid to false
|
||||
// Capacity ain't no thang for streaming, so we don't check it.
|
||||
// Advance to the first character as soon as possible
|
||||
advance_char();
|
||||
// Push the root scope (there is always at least one scope)
|
||||
if (start_document(finish_parser)) {
|
||||
return doc_parser.on_error(DEPTH_ERROR);
|
||||
return on_error(DEPTH_ERROR);
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
@ -19,17 +19,17 @@ struct streaming_structural_parser: structural_parser {
|
|||
// override to add streaming
|
||||
WARN_UNUSED really_inline error_code finish() {
|
||||
if ( structurals.past_end(doc_parser.n_structural_indexes) ) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
return on_error(TAPE_ERROR);
|
||||
}
|
||||
end_document();
|
||||
if (depth != 0) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
return on_error(TAPE_ERROR);
|
||||
}
|
||||
if (doc_parser.containing_scope[depth].tape_index != 0) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
return on_error(TAPE_ERROR);
|
||||
}
|
||||
bool finished = structurals.at_end(doc_parser.n_structural_indexes);
|
||||
return doc_parser.on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
|
||||
return on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -97,7 +97,7 @@ object_begin:
|
|||
|
||||
object_key_parser:
|
||||
FAIL_IF( parser.advance_char() != ':' );
|
||||
doc_parser.increment_count(parser.depth - 1); // we have a key value pair in the object at parser.depth - 1
|
||||
parser.increment_count();
|
||||
parser.advance_char();
|
||||
GOTO( parser.parse_value(addresses, addresses.object_continue) );
|
||||
|
||||
|
@ -125,7 +125,7 @@ array_begin:
|
|||
parser.end_array();
|
||||
goto scope_end;
|
||||
}
|
||||
doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
|
||||
parser.increment_count();
|
||||
|
||||
main_array_switch:
|
||||
/* we call update char on all paths in, so we can peek at parser.c on the
|
||||
|
@ -135,7 +135,7 @@ main_array_switch:
|
|||
array_continue:
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
|
||||
parser.increment_count();
|
||||
parser.advance_char();
|
||||
goto main_array_switch;
|
||||
case ']':
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include "jsoncharutils.h"
|
||||
#include "document_parser_callbacks.h"
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
|
|
Loading…
Reference in New Issue