Extend structural_parser from structural_iterator
This commit is contained in:
parent
a1aea4588f
commit
3636aa5522
|
@ -69,16 +69,15 @@ struct number_writer {
|
||||||
}
|
}
|
||||||
}; // struct number_writer
|
}; // struct number_writer
|
||||||
|
|
||||||
struct structural_parser {
|
struct structural_parser : structural_iterator {
|
||||||
structural_iterator structurals;
|
|
||||||
dom_parser_implementation &parser;
|
dom_parser_implementation &parser;
|
||||||
/** Next write location in the string buf for stage 2 parsing */
|
/** Next write location in the string buf for stage 2 parsing */
|
||||||
uint8_t *current_string_buf_loc{};
|
uint8_t *current_string_buf_loc{};
|
||||||
uint32_t depth;
|
uint32_t depth;
|
||||||
|
|
||||||
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
|
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
|
||||||
really_inline structural_parser(dom_parser_implementation &_parser, uint32_t next_structural)
|
really_inline structural_parser(dom_parser_implementation &_parser, uint32_t _next_structural)
|
||||||
: structurals(_parser.buf, _parser.len, _parser.structural_indexes.get(), next_structural),
|
: structural_iterator(_parser.buf, _parser.len, _parser.structural_indexes.get(), _next_structural),
|
||||||
parser{_parser},
|
parser{_parser},
|
||||||
depth{0} {
|
depth{0} {
|
||||||
}
|
}
|
||||||
|
@ -174,7 +173,7 @@ struct structural_parser {
|
||||||
WARN_UNUSED really_inline bool parse_string(bool key = false) {
|
WARN_UNUSED really_inline bool parse_string(bool key = false) {
|
||||||
log_value(key ? "key" : "string");
|
log_value(key ? "key" : "string");
|
||||||
uint8_t *dst = on_start_string();
|
uint8_t *dst = on_start_string();
|
||||||
dst = stringparsing::parse_string(structurals.current(), dst);
|
dst = stringparsing::parse_string(current(), dst);
|
||||||
if (dst == nullptr) {
|
if (dst == nullptr) {
|
||||||
log_error("Invalid escape in string");
|
log_error("Invalid escape in string");
|
||||||
return true;
|
return true;
|
||||||
|
@ -191,24 +190,24 @@ struct structural_parser {
|
||||||
return !succeeded;
|
return !succeeded;
|
||||||
}
|
}
|
||||||
WARN_UNUSED really_inline bool parse_number(bool found_minus) {
|
WARN_UNUSED really_inline bool parse_number(bool found_minus) {
|
||||||
return parse_number(structurals.current(), found_minus);
|
return parse_number(current(), found_minus);
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline bool parse_atom() {
|
WARN_UNUSED really_inline bool parse_atom() {
|
||||||
switch (structurals.current_char()) {
|
switch (current_char()) {
|
||||||
case 't':
|
case 't':
|
||||||
log_value("true");
|
log_value("true");
|
||||||
if (!atomparsing::is_valid_true_atom(structurals.current())) { return true; }
|
if (!atomparsing::is_valid_true_atom(current())) { return true; }
|
||||||
append_tape(0, internal::tape_type::TRUE_VALUE);
|
append_tape(0, internal::tape_type::TRUE_VALUE);
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
log_value("false");
|
log_value("false");
|
||||||
if (!atomparsing::is_valid_false_atom(structurals.current())) { return true; }
|
if (!atomparsing::is_valid_false_atom(current())) { return true; }
|
||||||
append_tape(0, internal::tape_type::FALSE_VALUE);
|
append_tape(0, internal::tape_type::FALSE_VALUE);
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
log_value("null");
|
log_value("null");
|
||||||
if (!atomparsing::is_valid_null_atom(structurals.current())) { return true; }
|
if (!atomparsing::is_valid_null_atom(current())) { return true; }
|
||||||
append_tape(0, internal::tape_type::NULL_VALUE);
|
append_tape(0, internal::tape_type::NULL_VALUE);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -219,20 +218,20 @@ struct structural_parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline bool parse_single_atom() {
|
WARN_UNUSED really_inline bool parse_single_atom() {
|
||||||
switch (structurals.current_char()) {
|
switch (current_char()) {
|
||||||
case 't':
|
case 't':
|
||||||
log_value("true");
|
log_value("true");
|
||||||
if (!atomparsing::is_valid_true_atom(structurals.current(), structurals.remaining_len())) { return true; }
|
if (!atomparsing::is_valid_true_atom(current(), remaining_len())) { return true; }
|
||||||
append_tape(0, internal::tape_type::TRUE_VALUE);
|
append_tape(0, internal::tape_type::TRUE_VALUE);
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
log_value("false");
|
log_value("false");
|
||||||
if (!atomparsing::is_valid_false_atom(structurals.current(), structurals.remaining_len())) { return true; }
|
if (!atomparsing::is_valid_false_atom(current(), remaining_len())) { return true; }
|
||||||
append_tape(0, internal::tape_type::FALSE_VALUE);
|
append_tape(0, internal::tape_type::FALSE_VALUE);
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
log_value("null");
|
log_value("null");
|
||||||
if (!atomparsing::is_valid_null_atom(structurals.current(), structurals.remaining_len())) { return true; }
|
if (!atomparsing::is_valid_null_atom(current(), remaining_len())) { return true; }
|
||||||
append_tape(0, internal::tape_type::NULL_VALUE);
|
append_tape(0, internal::tape_type::NULL_VALUE);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -243,7 +242,7 @@ struct structural_parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
|
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
|
||||||
switch (structurals.current_char()) {
|
switch (current_char()) {
|
||||||
case '"':
|
case '"':
|
||||||
FAIL_IF( parse_string() );
|
FAIL_IF( parse_string() );
|
||||||
return continue_state;
|
return continue_state;
|
||||||
|
@ -271,7 +270,7 @@ struct structural_parser {
|
||||||
|
|
||||||
WARN_UNUSED really_inline error_code finish() {
|
WARN_UNUSED really_inline error_code finish() {
|
||||||
end_document();
|
end_document();
|
||||||
parser.next_structural_index = uint32_t(structurals.next_structural_index());
|
parser.next_structural_index = uint32_t(next_structural_index());
|
||||||
|
|
||||||
if (depth != 0) {
|
if (depth != 0) {
|
||||||
log_error("Unclosed objects or arrays!");
|
log_error("Unclosed objects or arrays!");
|
||||||
|
@ -295,7 +294,7 @@ struct structural_parser {
|
||||||
if (depth >= parser.max_depth()) {
|
if (depth >= parser.max_depth()) {
|
||||||
return parser.error = DEPTH_ERROR;
|
return parser.error = DEPTH_ERROR;
|
||||||
}
|
}
|
||||||
switch (structurals.current_char()) {
|
switch (current_char()) {
|
||||||
case '"':
|
case '"':
|
||||||
return parser.error = STRING_ERROR;
|
return parser.error = STRING_ERROR;
|
||||||
case '0':
|
case '0':
|
||||||
|
@ -329,14 +328,14 @@ struct structural_parser {
|
||||||
|
|
||||||
WARN_UNUSED really_inline error_code start(ret_address_t finish_state) {
|
WARN_UNUSED really_inline error_code start(ret_address_t finish_state) {
|
||||||
// If there are no structurals left, return EMPTY
|
// If there are no structurals left, return EMPTY
|
||||||
if (structurals.at_end(parser.n_structural_indexes)) {
|
if (at_end(parser.n_structural_indexes)) {
|
||||||
return parser.error = EMPTY;
|
return parser.error = EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_start();
|
log_start();
|
||||||
init();
|
init();
|
||||||
// Advance to the first character as soon as possible
|
// Advance to the first character as soon as possible
|
||||||
structurals.advance_char();
|
advance_char();
|
||||||
// Push the root scope (there is always at least one scope)
|
// Push the root scope (there is always at least one scope)
|
||||||
if (start_document(finish_state)) {
|
if (start_document(finish_state)) {
|
||||||
return parser.error = DEPTH_ERROR;
|
return parser.error = DEPTH_ERROR;
|
||||||
|
@ -344,12 +343,8 @@ struct structural_parser {
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline char advance_char() {
|
|
||||||
return structurals.advance_char();
|
|
||||||
}
|
|
||||||
|
|
||||||
really_inline void log_value(const char *type) {
|
really_inline void log_value(const char *type) {
|
||||||
logger::log_line(structurals, "", type, "");
|
logger::log_line(*this, "", type, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline void log_start() {
|
static really_inline void log_start() {
|
||||||
|
@ -357,17 +352,17 @@ struct structural_parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void log_start_value(const char *type) {
|
really_inline void log_start_value(const char *type) {
|
||||||
logger::log_line(structurals, "+", type, "");
|
logger::log_line(*this, "+", type, "");
|
||||||
if (logger::LOG_ENABLED) { logger::log_depth++; }
|
if (logger::LOG_ENABLED) { logger::log_depth++; }
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void log_end_value(const char *type) {
|
really_inline void log_end_value(const char *type) {
|
||||||
if (logger::LOG_ENABLED) { logger::log_depth--; }
|
if (logger::LOG_ENABLED) { logger::log_depth--; }
|
||||||
logger::log_line(structurals, "-", type, "");
|
logger::log_line(*this, "-", type, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void log_error(const char *error) {
|
really_inline void log_error(const char *error) {
|
||||||
logger::log_line(structurals, "", "ERROR", error);
|
logger::log_line(*this, "", "ERROR", error);
|
||||||
}
|
}
|
||||||
}; // struct structural_parser
|
}; // struct structural_parser
|
||||||
|
|
||||||
|
@ -386,7 +381,7 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
||||||
//
|
//
|
||||||
// Read first value
|
// Read first value
|
||||||
//
|
//
|
||||||
switch (parser.structurals.current_char()) {
|
switch (parser.current_char()) {
|
||||||
case '{':
|
case '{':
|
||||||
FAIL_IF( parser.start_object(addresses.finish) );
|
FAIL_IF( parser.start_object(addresses.finish) );
|
||||||
goto object_begin;
|
goto object_begin;
|
||||||
|
@ -395,7 +390,7 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
||||||
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
|
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
|
||||||
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
|
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
|
||||||
if (!STREAMING) {
|
if (!STREAMING) {
|
||||||
if (parser.structurals.buf[parser.structurals.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') {
|
if (parser.buf[parser.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') {
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -409,14 +404,14 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
||||||
case '0': case '1': case '2': case '3': case '4':
|
case '0': case '1': case '2': case '3': case '4':
|
||||||
case '5': case '6': case '7': case '8': case '9':
|
case '5': case '6': case '7': case '8': case '9':
|
||||||
FAIL_IF(
|
FAIL_IF(
|
||||||
parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
||||||
return parser.parse_number(©[idx], false);
|
return parser.parse_number(©[idx], false);
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
goto finish;
|
goto finish;
|
||||||
case '-':
|
case '-':
|
||||||
FAIL_IF(
|
FAIL_IF(
|
||||||
parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
||||||
return parser.parse_number(©[idx], true);
|
return parser.parse_number(©[idx], true);
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
Loading…
Reference in New Issue