Extend structural_parser from structural_iterator

This commit is contained in:
John Keiser 2020-06-06 12:04:29 -07:00
parent a1aea4588f
commit 3636aa5522
1 changed files with 26 additions and 31 deletions

View File

@ -69,16 +69,15 @@ struct number_writer {
} }
}; // struct number_writer }; // struct number_writer
struct structural_parser { struct structural_parser : structural_iterator {
structural_iterator structurals;
dom_parser_implementation &parser; dom_parser_implementation &parser;
/** Next write location in the string buf for stage 2 parsing */ /** Next write location in the string buf for stage 2 parsing */
uint8_t *current_string_buf_loc{}; uint8_t *current_string_buf_loc{};
uint32_t depth; uint32_t depth;
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations // For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
really_inline structural_parser(dom_parser_implementation &_parser, uint32_t next_structural) really_inline structural_parser(dom_parser_implementation &_parser, uint32_t _next_structural)
: structurals(_parser.buf, _parser.len, _parser.structural_indexes.get(), next_structural), : structural_iterator(_parser.buf, _parser.len, _parser.structural_indexes.get(), _next_structural),
parser{_parser}, parser{_parser},
depth{0} { depth{0} {
} }
@ -174,7 +173,7 @@ struct structural_parser {
WARN_UNUSED really_inline bool parse_string(bool key = false) { WARN_UNUSED really_inline bool parse_string(bool key = false) {
log_value(key ? "key" : "string"); log_value(key ? "key" : "string");
uint8_t *dst = on_start_string(); uint8_t *dst = on_start_string();
dst = stringparsing::parse_string(structurals.current(), dst); dst = stringparsing::parse_string(current(), dst);
if (dst == nullptr) { if (dst == nullptr) {
log_error("Invalid escape in string"); log_error("Invalid escape in string");
return true; return true;
@ -191,24 +190,24 @@ struct structural_parser {
return !succeeded; return !succeeded;
} }
WARN_UNUSED really_inline bool parse_number(bool found_minus) { WARN_UNUSED really_inline bool parse_number(bool found_minus) {
return parse_number(structurals.current(), found_minus); return parse_number(current(), found_minus);
} }
WARN_UNUSED really_inline bool parse_atom() { WARN_UNUSED really_inline bool parse_atom() {
switch (structurals.current_char()) { switch (current_char()) {
case 't': case 't':
log_value("true"); log_value("true");
if (!atomparsing::is_valid_true_atom(structurals.current())) { return true; } if (!atomparsing::is_valid_true_atom(current())) { return true; }
append_tape(0, internal::tape_type::TRUE_VALUE); append_tape(0, internal::tape_type::TRUE_VALUE);
break; break;
case 'f': case 'f':
log_value("false"); log_value("false");
if (!atomparsing::is_valid_false_atom(structurals.current())) { return true; } if (!atomparsing::is_valid_false_atom(current())) { return true; }
append_tape(0, internal::tape_type::FALSE_VALUE); append_tape(0, internal::tape_type::FALSE_VALUE);
break; break;
case 'n': case 'n':
log_value("null"); log_value("null");
if (!atomparsing::is_valid_null_atom(structurals.current())) { return true; } if (!atomparsing::is_valid_null_atom(current())) { return true; }
append_tape(0, internal::tape_type::NULL_VALUE); append_tape(0, internal::tape_type::NULL_VALUE);
break; break;
default: default:
@ -219,20 +218,20 @@ struct structural_parser {
} }
WARN_UNUSED really_inline bool parse_single_atom() { WARN_UNUSED really_inline bool parse_single_atom() {
switch (structurals.current_char()) { switch (current_char()) {
case 't': case 't':
log_value("true"); log_value("true");
if (!atomparsing::is_valid_true_atom(structurals.current(), structurals.remaining_len())) { return true; } if (!atomparsing::is_valid_true_atom(current(), remaining_len())) { return true; }
append_tape(0, internal::tape_type::TRUE_VALUE); append_tape(0, internal::tape_type::TRUE_VALUE);
break; break;
case 'f': case 'f':
log_value("false"); log_value("false");
if (!atomparsing::is_valid_false_atom(structurals.current(), structurals.remaining_len())) { return true; } if (!atomparsing::is_valid_false_atom(current(), remaining_len())) { return true; }
append_tape(0, internal::tape_type::FALSE_VALUE); append_tape(0, internal::tape_type::FALSE_VALUE);
break; break;
case 'n': case 'n':
log_value("null"); log_value("null");
if (!atomparsing::is_valid_null_atom(structurals.current(), structurals.remaining_len())) { return true; } if (!atomparsing::is_valid_null_atom(current(), remaining_len())) { return true; }
append_tape(0, internal::tape_type::NULL_VALUE); append_tape(0, internal::tape_type::NULL_VALUE);
break; break;
default: default:
@ -243,7 +242,7 @@ struct structural_parser {
} }
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) { WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
switch (structurals.current_char()) { switch (current_char()) {
case '"': case '"':
FAIL_IF( parse_string() ); FAIL_IF( parse_string() );
return continue_state; return continue_state;
@ -271,7 +270,7 @@ struct structural_parser {
WARN_UNUSED really_inline error_code finish() { WARN_UNUSED really_inline error_code finish() {
end_document(); end_document();
parser.next_structural_index = uint32_t(structurals.next_structural_index()); parser.next_structural_index = uint32_t(next_structural_index());
if (depth != 0) { if (depth != 0) {
log_error("Unclosed objects or arrays!"); log_error("Unclosed objects or arrays!");
@ -295,7 +294,7 @@ struct structural_parser {
if (depth >= parser.max_depth()) { if (depth >= parser.max_depth()) {
return parser.error = DEPTH_ERROR; return parser.error = DEPTH_ERROR;
} }
switch (structurals.current_char()) { switch (current_char()) {
case '"': case '"':
return parser.error = STRING_ERROR; return parser.error = STRING_ERROR;
case '0': case '0':
@ -329,14 +328,14 @@ struct structural_parser {
WARN_UNUSED really_inline error_code start(ret_address_t finish_state) { WARN_UNUSED really_inline error_code start(ret_address_t finish_state) {
// If there are no structurals left, return EMPTY // If there are no structurals left, return EMPTY
if (structurals.at_end(parser.n_structural_indexes)) { if (at_end(parser.n_structural_indexes)) {
return parser.error = EMPTY; return parser.error = EMPTY;
} }
log_start(); log_start();
init(); init();
// Advance to the first character as soon as possible // Advance to the first character as soon as possible
structurals.advance_char(); advance_char();
// Push the root scope (there is always at least one scope) // Push the root scope (there is always at least one scope)
if (start_document(finish_state)) { if (start_document(finish_state)) {
return parser.error = DEPTH_ERROR; return parser.error = DEPTH_ERROR;
@ -344,12 +343,8 @@ struct structural_parser {
return SUCCESS; return SUCCESS;
} }
really_inline char advance_char() {
return structurals.advance_char();
}
really_inline void log_value(const char *type) { really_inline void log_value(const char *type) {
logger::log_line(structurals, "", type, ""); logger::log_line(*this, "", type, "");
} }
static really_inline void log_start() { static really_inline void log_start() {
@ -357,17 +352,17 @@ struct structural_parser {
} }
really_inline void log_start_value(const char *type) { really_inline void log_start_value(const char *type) {
logger::log_line(structurals, "+", type, ""); logger::log_line(*this, "+", type, "");
if (logger::LOG_ENABLED) { logger::log_depth++; } if (logger::LOG_ENABLED) { logger::log_depth++; }
} }
really_inline void log_end_value(const char *type) { really_inline void log_end_value(const char *type) {
if (logger::LOG_ENABLED) { logger::log_depth--; } if (logger::LOG_ENABLED) { logger::log_depth--; }
logger::log_line(structurals, "-", type, ""); logger::log_line(*this, "-", type, "");
} }
really_inline void log_error(const char *error) { really_inline void log_error(const char *error) {
logger::log_line(structurals, "", "ERROR", error); logger::log_line(*this, "", "ERROR", error);
} }
}; // struct structural_parser }; // struct structural_parser
@ -386,7 +381,7 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
// //
// Read first value // Read first value
// //
switch (parser.structurals.current_char()) { switch (parser.current_char()) {
case '{': case '{':
FAIL_IF( parser.start_object(addresses.finish) ); FAIL_IF( parser.start_object(addresses.finish) );
goto object_begin; goto object_begin;
@ -395,7 +390,7 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get // Make sure the outer array is closed before continuing; otherwise, there are ways we could get
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906 // into memory corruption. See https://github.com/simdjson/simdjson/issues/906
if (!STREAMING) { if (!STREAMING) {
if (parser.structurals.buf[parser.structurals.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') { if (parser.buf[parser.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') {
goto error; goto error;
} }
} }
@ -409,14 +404,14 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
case '0': case '1': case '2': case '3': case '4': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '5': case '6': case '7': case '8': case '9':
FAIL_IF( FAIL_IF(
parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
return parser.parse_number(&copy[idx], false); return parser.parse_number(&copy[idx], false);
}) })
); );
goto finish; goto finish;
case '-': case '-':
FAIL_IF( FAIL_IF(
parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
return parser.parse_number(&copy[idx], true); return parser.parse_number(&copy[idx], true);
}) })
); );