Structural iterator
This commit is contained in:
parent
d9a9fd387d
commit
81c86d7090
|
@ -145,9 +145,8 @@ really_inline double subnormal_power10(double base, int64_t negative_exponent) {
|
|||
//
|
||||
// Note: a redesign could avoid this function entirely.
|
||||
//
|
||||
never_inline bool parse_float(const uint8_t *const buf, document::parser &parser,
|
||||
const uint32_t offset, bool found_minus) {
|
||||
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||
never_inline bool parse_float(const uint8_t *const src, document::parser &parser, bool found_minus) {
|
||||
const char *p = reinterpret_cast<const char *>(src);
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
++p;
|
||||
|
@ -179,7 +178,7 @@ never_inline bool parse_float(const uint8_t *const buf, document::parser &parser
|
|||
: 0);
|
||||
} else {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -202,7 +201,7 @@ never_inline bool parse_float(const uint8_t *const buf, document::parser &parser
|
|||
}
|
||||
if (!is_integer(*p)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -228,7 +227,7 @@ never_inline bool parse_float(const uint8_t *const buf, document::parser &parser
|
|||
if (exp_number > 0x100000000) { // we need to check for overflows
|
||||
// we refuse to parse this
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -246,7 +245,7 @@ never_inline bool parse_float(const uint8_t *const buf, document::parser &parser
|
|||
// We know for sure that we have a number that is too large,
|
||||
// we refuse to parse this
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -264,14 +263,14 @@ never_inline bool parse_float(const uint8_t *const buf, document::parser &parser
|
|||
// check that we can go from long double to double safely.
|
||||
if(i > std::numeric_limits<double>::max()) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
double d = negative ? -i : i;
|
||||
parser.on_number_double(d);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_float(d, buf + offset);
|
||||
found_float(d, src);
|
||||
#endif
|
||||
return is_structural_or_whitespace(*p);
|
||||
}
|
||||
|
@ -284,11 +283,8 @@ never_inline bool parse_float(const uint8_t *const buf, document::parser &parser
|
|||
//
|
||||
// This function will almost never be called!!!
|
||||
//
|
||||
never_inline bool parse_large_integer(const uint8_t *const buf,
|
||||
document::parser &parser,
|
||||
const uint32_t offset,
|
||||
bool found_minus) {
|
||||
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||
never_inline bool parse_large_integer(const uint8_t *const src, document::parser &parser, bool found_minus) {
|
||||
const char *p = reinterpret_cast<const char *>(src);
|
||||
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
|
@ -309,13 +305,13 @@ never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
digit = *p - '0';
|
||||
if (mul_overflow(i, 10, &i)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false; // overflow
|
||||
}
|
||||
if (add_overflow(i, digit, &i)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false; // overflow
|
||||
}
|
||||
|
@ -326,7 +322,7 @@ never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
if (i > 0x8000000000000000) {
|
||||
// overflows!
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false; // overflow
|
||||
} else if (i == 0x8000000000000000) {
|
||||
|
@ -336,14 +332,14 @@ never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
constexpr int64_t signed_answer = INT64_MIN;
|
||||
parser.on_number_s64(signed_answer);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(signed_answer, buf + offset);
|
||||
found_integer(signed_answer, src);
|
||||
#endif
|
||||
} else {
|
||||
// we can negate safely
|
||||
int64_t signed_answer = -static_cast<int64_t>(i);
|
||||
parser.on_number_s64(signed_answer);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(signed_answer, buf + offset);
|
||||
found_integer(signed_answer, src);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
|
@ -352,12 +348,12 @@ never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
// fallback on unsigned integers if absolutely necessary.
|
||||
if(i < 0x8000000000000000) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(i, buf + offset);
|
||||
found_integer(i, src);
|
||||
#endif
|
||||
parser.on_number_s64(i);
|
||||
} else {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_unsigned_integer(i, buf + offset);
|
||||
found_unsigned_integer(i, src);
|
||||
#endif
|
||||
parser.on_number_u64(i);
|
||||
}
|
||||
|
@ -365,7 +361,7 @@ never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
return is_structural_or_whitespace(*p);
|
||||
}
|
||||
|
||||
// parse the number at buf + offset
|
||||
// parse the number at src
|
||||
// define JSON_TEST_NUMBERS for unit testing
|
||||
//
|
||||
// It is assumed that the number is followed by a structural ({,},],[) character
|
||||
|
@ -374,8 +370,7 @@ never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
// content and append a space before calling this function.
|
||||
//
|
||||
// Our objective is accurate parsing (ULP of 0 or 1) at high speed.
|
||||
really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
||||
UNUSED const uint32_t offset,
|
||||
really_inline bool parse_number(UNUSED const uint8_t *const src,
|
||||
UNUSED bool found_minus,
|
||||
document::parser &parser) {
|
||||
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes
|
||||
|
@ -383,14 +378,14 @@ really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
|||
parser.on_number_s64(0); // always write zero
|
||||
return true; // always succeeds
|
||||
#else
|
||||
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||
const char *p = reinterpret_cast<const char *>(src);
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
++p;
|
||||
negative = true;
|
||||
if (!is_integer(*p)) { // a negative sign must be followed by an integer
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -402,7 +397,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
|||
++p;
|
||||
if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -410,7 +405,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
|||
} else {
|
||||
if (!(is_integer(*p))) { // must start with an integer
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -445,7 +440,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
|||
// we will handle the overflow later
|
||||
} else {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -480,7 +475,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
|||
}
|
||||
if (!is_integer(*p)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -501,7 +496,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
|||
if (exp_number > 0x100000000) { // we need to check for overflows
|
||||
// we refuse to parse this
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_invalid_number(buf + offset);
|
||||
found_invalid_number(src);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -526,31 +521,31 @@ really_inline bool parse_number(UNUSED const uint8_t *const buf,
|
|||
// Ok, chances are good that we had an overflow!
|
||||
// this is almost never going to get called!!!
|
||||
// we start anew, going slowly!!!
|
||||
return parse_float(buf, parser, offset, found_minus);
|
||||
return parse_float(src, parser, found_minus);
|
||||
}
|
||||
}
|
||||
if (unlikely((power_index > 2 * 308))) { // this is uncommon!!!
|
||||
// this is almost never going to get called!!!
|
||||
// we start anew, going slowly!!!
|
||||
return parse_float(buf, parser, offset, found_minus);
|
||||
return parse_float(src, parser, found_minus);
|
||||
}
|
||||
double factor = power_of_ten[power_index];
|
||||
factor = negative ? -factor : factor;
|
||||
double d = i * factor;
|
||||
parser.on_number_double(d);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_float(d, buf + offset);
|
||||
found_float(d, src);
|
||||
#endif
|
||||
} else {
|
||||
if (unlikely(digit_count >= 18)) { // this is uncommon!!!
|
||||
// there is a good chance that we had an overflow, so we need
|
||||
// need to recover: we parse the whole thing again.
|
||||
return parse_large_integer(buf, parser, offset, found_minus);
|
||||
return parse_large_integer(src, parser, found_minus);
|
||||
}
|
||||
i = negative ? 0 - i : i;
|
||||
parser.on_number_s64(i);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(i, buf + offset);
|
||||
found_integer(i, src);
|
||||
#endif
|
||||
}
|
||||
return is_structural_or_whitespace(*p);
|
||||
|
|
|
@ -47,28 +47,22 @@ struct unified_machine_addresses {
|
|||
#undef FAIL_IF
|
||||
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
|
||||
|
||||
struct structural_parser {
|
||||
const uint8_t* const buf;
|
||||
const size_t len;
|
||||
document::parser &doc_parser;
|
||||
size_t i; // next structural index
|
||||
size_t idx; // location of the structural character in the input (buf)
|
||||
uint8_t c; // used to track the (structural) character we are looking at
|
||||
uint32_t depth = 0; // could have an arbitrary starting depth
|
||||
|
||||
really_inline structural_parser(
|
||||
const uint8_t *_buf,
|
||||
size_t _len,
|
||||
document::parser &_doc_parser,
|
||||
uint32_t _i = 0
|
||||
) : buf{_buf}, len{_len}, doc_parser{_doc_parser}, i{_i} {}
|
||||
|
||||
class structural_iterator {
|
||||
public:
|
||||
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
|
||||
: buf{_buf}, len{_len}, structural_indexes{_structural_indexes}, next_structural{next_structural_index} {}
|
||||
really_inline char advance_char() {
|
||||
idx = doc_parser.structural_indexes[i++];
|
||||
c = buf[idx];
|
||||
idx = structural_indexes[next_structural];
|
||||
next_structural++;
|
||||
c = *current();
|
||||
return c;
|
||||
}
|
||||
|
||||
really_inline char current_char() {
|
||||
return c;
|
||||
}
|
||||
really_inline const uint8_t* current() {
|
||||
return &buf[idx];
|
||||
}
|
||||
template<typename F>
|
||||
really_inline bool with_space_terminated_copy(const F& f) {
|
||||
/**
|
||||
|
@ -94,6 +88,36 @@ struct structural_parser {
|
|||
free(copy);
|
||||
return result;
|
||||
}
|
||||
really_inline bool past_end(uint32_t n_structural_indexes) {
|
||||
return next_structural+1 > n_structural_indexes;
|
||||
}
|
||||
really_inline bool at_end(uint32_t n_structural_indexes) {
|
||||
return next_structural+1 == n_structural_indexes;
|
||||
}
|
||||
really_inline size_t next_structural_index() {
|
||||
return next_structural;
|
||||
}
|
||||
|
||||
private:
|
||||
const uint8_t* const buf;
|
||||
const size_t len;
|
||||
const uint32_t* const structural_indexes;
|
||||
size_t next_structural; // next structural index
|
||||
size_t idx; // location of the structural character in the input (buf)
|
||||
uint8_t c; // used to track the (structural) character we are looking at
|
||||
};
|
||||
|
||||
struct structural_parser {
|
||||
structural_iterator structurals;
|
||||
document::parser &doc_parser;
|
||||
uint32_t depth;
|
||||
|
||||
really_inline structural_parser(
|
||||
const uint8_t *buf,
|
||||
size_t len,
|
||||
document::parser &_doc_parser,
|
||||
uint32_t next_structural = 0
|
||||
) : structurals(buf, len, _doc_parser.structural_indexes.get(), next_structural), doc_parser{_doc_parser}, depth{0} {}
|
||||
|
||||
WARN_UNUSED really_inline bool start_document(ret_address continue_state) {
|
||||
doc_parser.on_start_document(depth);
|
||||
|
@ -134,32 +158,32 @@ struct structural_parser {
|
|||
|
||||
WARN_UNUSED really_inline bool parse_string() {
|
||||
uint8_t *dst = doc_parser.on_start_string();
|
||||
dst = stringparsing::parse_string(buf, idx, dst);
|
||||
dst = stringparsing::parse_string(structurals.current(), dst);
|
||||
if (dst == nullptr) {
|
||||
return true;
|
||||
}
|
||||
return !doc_parser.on_end_string(dst);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_number(const uint8_t *copy, uint32_t offset, bool found_minus) {
|
||||
return !numberparsing::parse_number(copy, offset, found_minus, doc_parser);
|
||||
WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
|
||||
return !numberparsing::parse_number(src, found_minus, doc_parser);
|
||||
}
|
||||
WARN_UNUSED really_inline bool parse_number(bool found_minus) {
|
||||
return parse_number(buf, idx, found_minus);
|
||||
return parse_number(structurals.current(), found_minus);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_atom(const uint8_t *copy, uint32_t offset) {
|
||||
switch (c) {
|
||||
WARN_UNUSED really_inline bool parse_atom(const uint8_t *src) {
|
||||
switch (structurals.current_char()) {
|
||||
case 't':
|
||||
if (!is_valid_true_atom(copy + offset)) { return true; }
|
||||
if (!is_valid_true_atom(src)) { return true; }
|
||||
doc_parser.on_true_atom();
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(copy + offset)) { return true; }
|
||||
if (!is_valid_false_atom(src)) { return true; }
|
||||
doc_parser.on_false_atom();
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(copy + offset)) { return true; }
|
||||
if (!is_valid_null_atom(src)) { return true; }
|
||||
doc_parser.on_null_atom();
|
||||
break;
|
||||
default:
|
||||
|
@ -169,11 +193,11 @@ struct structural_parser {
|
|||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_atom() {
|
||||
return parse_atom(buf, idx);
|
||||
return parse_atom(structurals.current());
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline ret_address parse_value(const unified_machine_addresses &addresses, ret_address continue_state) {
|
||||
switch (c) {
|
||||
switch (structurals.current_char()) {
|
||||
case '"':
|
||||
FAIL_IF( parse_string() );
|
||||
return continue_state;
|
||||
|
@ -200,7 +224,7 @@ struct structural_parser {
|
|||
|
||||
WARN_UNUSED really_inline error_code finish() {
|
||||
// the string might not be NULL terminated.
|
||||
if ( i + 1 != doc_parser.n_structural_indexes ) {
|
||||
if ( !structurals.at_end(doc_parser.n_structural_indexes) ) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
}
|
||||
end_document();
|
||||
|
@ -228,7 +252,7 @@ struct structural_parser {
|
|||
if (depth >= doc_parser.max_depth()) {
|
||||
return doc_parser.on_error(DEPTH_ERROR);
|
||||
}
|
||||
switch (c) {
|
||||
switch (structurals.current_char()) {
|
||||
case '"':
|
||||
return doc_parser.on_error(STRING_ERROR);
|
||||
case '0':
|
||||
|
@ -254,19 +278,23 @@ struct structural_parser {
|
|||
}
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline error_code start(ret_address finish_state) {
|
||||
WARN_UNUSED really_inline error_code start(size_t len, ret_address finish_state) {
|
||||
doc_parser.init_stage2(); // sets is_valid to false
|
||||
if (len > doc_parser.capacity()) {
|
||||
return CAPACITY;
|
||||
}
|
||||
// Advance to the first character as soon as possible
|
||||
advance_char();
|
||||
structurals.advance_char();
|
||||
// Push the root scope (there is always at least one scope)
|
||||
if (start_document(finish_state)) {
|
||||
return doc_parser.on_error(DEPTH_ERROR);
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
really_inline char advance_char() {
|
||||
return structurals.advance_char();
|
||||
}
|
||||
};
|
||||
|
||||
// Redefine FAIL_IF to use goto since it'll be used inside the function now
|
||||
|
@ -282,13 +310,13 @@ struct structural_parser {
|
|||
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, document::parser &doc_parser) const noexcept {
|
||||
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||
stage2::structural_parser parser(buf, len, doc_parser);
|
||||
error_code result = parser.start(addresses.finish);
|
||||
error_code result = parser.start(len, addresses.finish);
|
||||
if (result) { return result; }
|
||||
|
||||
//
|
||||
// Read first value
|
||||
//
|
||||
switch (parser.c) {
|
||||
switch (parser.structurals.current_char()) {
|
||||
case '{':
|
||||
FAIL_IF( parser.start_object(addresses.finish) );
|
||||
goto object_begin;
|
||||
|
@ -300,23 +328,23 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, do
|
|||
goto finish;
|
||||
case 't': case 'f': case 'n':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_atom(copy, idx);
|
||||
parser.structurals.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_atom(©[idx]);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, false);
|
||||
parser.structurals.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(©[idx], false);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '-':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, true);
|
||||
parser.structurals.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(©[idx], true);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
|
@ -328,8 +356,7 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, do
|
|||
// Object parser states
|
||||
//
|
||||
object_begin:
|
||||
parser.advance_char();
|
||||
switch (parser.c) {
|
||||
switch (parser.advance_char()) {
|
||||
case '"': {
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_state;
|
||||
|
|
|
@ -4,7 +4,7 @@ struct streaming_structural_parser: structural_parser {
|
|||
really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, document::parser &_doc_parser, size_t _i) : structural_parser(_buf, _len, _doc_parser, _i) {}
|
||||
|
||||
// override to add streaming
|
||||
WARN_UNUSED really_inline error_code start(ret_address finish_parser) {
|
||||
WARN_UNUSED really_inline error_code start(UNUSED size_t len, ret_address finish_parser) {
|
||||
doc_parser.init_stage2(); // sets is_valid to false
|
||||
// Capacity ain't no thang for streaming, so we don't check it.
|
||||
// Advance to the first character as soon as possible
|
||||
|
@ -18,7 +18,7 @@ struct streaming_structural_parser: structural_parser {
|
|||
|
||||
// override to add streaming
|
||||
WARN_UNUSED really_inline error_code finish() {
|
||||
if ( i + 1 > doc_parser.n_structural_indexes ) {
|
||||
if ( structurals.past_end(doc_parser.n_structural_indexes) ) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
}
|
||||
end_document();
|
||||
|
@ -28,7 +28,7 @@ struct streaming_structural_parser: structural_parser {
|
|||
if (doc_parser.containing_scope_offset[depth] != 0) {
|
||||
return doc_parser.on_error(TAPE_ERROR);
|
||||
}
|
||||
bool finished = i + 1 == doc_parser.n_structural_indexes;
|
||||
bool finished = structurals.at_end(doc_parser.n_structural_indexes);
|
||||
return doc_parser.on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
|
||||
}
|
||||
};
|
||||
|
@ -42,12 +42,12 @@ struct streaming_structural_parser: structural_parser {
|
|||
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, document::parser &doc_parser, size_t &next_json) const noexcept {
|
||||
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||
stage2::streaming_structural_parser parser(buf, len, doc_parser, next_json);
|
||||
error_code result = parser.start(addresses.finish);
|
||||
error_code result = parser.start(len, addresses.finish);
|
||||
if (result) { return result; }
|
||||
//
|
||||
// Read first value
|
||||
//
|
||||
switch (parser.c) {
|
||||
switch (parser.structurals.current_char()) {
|
||||
case '{':
|
||||
FAIL_IF( parser.start_object(addresses.finish) );
|
||||
goto object_begin;
|
||||
|
@ -59,23 +59,23 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, do
|
|||
goto finish;
|
||||
case 't': case 'f': case 'n':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_atom(copy, idx);
|
||||
parser.structurals.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_atom(©[idx]);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, false);
|
||||
parser.structurals.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(©[idx], false);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '-':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, true);
|
||||
parser.structurals.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(©[idx], true);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
|
@ -87,8 +87,7 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, do
|
|||
// Object parser parsers
|
||||
//
|
||||
object_begin:
|
||||
parser.advance_char();
|
||||
switch (parser.c) {
|
||||
switch (parser.advance_char()) {
|
||||
case '"': {
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_parser;
|
||||
|
@ -148,7 +147,7 @@ array_continue:
|
|||
}
|
||||
|
||||
finish:
|
||||
next_json = parser.i;
|
||||
next_json = parser.structurals.next_structural_index();
|
||||
return parser.finish();
|
||||
|
||||
error:
|
||||
|
|
|
@ -71,10 +71,9 @@ really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
|
|||
return offset > 0;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *buf,
|
||||
uint32_t offset,
|
||||
WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src,
|
||||
uint8_t *dst) {
|
||||
const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */
|
||||
src++;
|
||||
while (1) {
|
||||
parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst);
|
||||
if (((helper.bs_bits - 1) & helper.quote_bits) != 0) {
|
||||
|
|
Loading…
Reference in New Issue