Remove computed gotos from parse_value
This commit is contained in:
parent
c46ea0390c
commit
bb65ebd8be
|
@ -49,9 +49,6 @@ struct unified_machine_addresses {
|
|||
ret_address_t object_continue;
|
||||
};
|
||||
|
||||
#undef FAIL_IF
|
||||
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
|
||||
|
||||
struct structural_parser : structural_iterator {
|
||||
/** Lets you append to the tape */
|
||||
tape_writer tape;
|
||||
|
@ -173,7 +170,7 @@ struct structural_parser : structural_iterator {
|
|||
return parse_number(current());
|
||||
}
|
||||
|
||||
really_inline bool parse_number_with_space_terminated_copy() {
|
||||
really_inline bool parse_root_number() {
|
||||
/**
|
||||
* We need to make a copy to make sure that the string is space terminated.
|
||||
* This is not about padding the input, which should already padded up
|
||||
|
@ -198,41 +195,47 @@ struct structural_parser : structural_iterator {
|
|||
free(copy);
|
||||
return result;
|
||||
}
|
||||
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
|
||||
switch (advance_char()) {
|
||||
case '{':
|
||||
FAIL_IF( start_object(continue_state) );
|
||||
return addresses.object_begin;
|
||||
case '[':
|
||||
FAIL_IF( start_array(continue_state) );
|
||||
return addresses.array_begin;
|
||||
case '"':
|
||||
FAIL_IF( parse_string() );
|
||||
return continue_state;
|
||||
case 't':
|
||||
log_value("true");
|
||||
FAIL_IF( !atomparsing::is_valid_true_atom(current()) );
|
||||
tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||
return continue_state;
|
||||
case 'f':
|
||||
log_value("false");
|
||||
FAIL_IF( !atomparsing::is_valid_false_atom(current()) );
|
||||
tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||
return continue_state;
|
||||
case 'n':
|
||||
log_value("null");
|
||||
FAIL_IF( !atomparsing::is_valid_null_atom(current()) );
|
||||
tape.append(0, internal::tape_type::NULL_VALUE);
|
||||
return continue_state;
|
||||
case '-':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
FAIL_IF( parse_number() );
|
||||
return continue_state;
|
||||
default:
|
||||
log_error("Non-value found when value was expected!");
|
||||
return addresses.error;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_true_atom() {
|
||||
log_value("true");
|
||||
if (!atomparsing::is_valid_true_atom(current())) { return true; }
|
||||
tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||
return false;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_root_true_atom() {
|
||||
log_value("true");
|
||||
if (!atomparsing::is_valid_true_atom(current(), remaining_len())) { return true; }
|
||||
tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||
return false;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_false_atom() {
|
||||
log_value("false");
|
||||
if (!atomparsing::is_valid_false_atom(current())) { return true; }
|
||||
tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||
return false;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_root_false_atom() {
|
||||
log_value("false");
|
||||
if (!atomparsing::is_valid_false_atom(current(), remaining_len())) { return true; }
|
||||
tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||
return false;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_null_atom() {
|
||||
log_value("null");
|
||||
if (!atomparsing::is_valid_null_atom(current())) { return true; }
|
||||
tape.append(0, internal::tape_type::NULL_VALUE);
|
||||
return false;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_root_null_atom() {
|
||||
log_value("null");
|
||||
if (!atomparsing::is_valid_null_atom(current(), remaining_len())) { return true; }
|
||||
tape.append(0, internal::tape_type::NULL_VALUE);
|
||||
return false;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline error_code finish() {
|
||||
|
@ -248,16 +251,14 @@ struct structural_parser : structural_iterator {
|
|||
}
|
||||
|
||||
WARN_UNUSED really_inline error_code error() {
|
||||
/* We do not need the next line because this is done by parser.init_stage2(),
|
||||
* pessimistically.
|
||||
* parser.is_valid = false;
|
||||
* At this point in the code, we have all the time in the world.
|
||||
* Note that we know exactly where we are in the document so we could,
|
||||
* without any overhead on the processing code, report a specific
|
||||
* location.
|
||||
* We could even trigger special code paths to assess what happened
|
||||
* carefully,
|
||||
* all without any added cost. */
|
||||
// At this point in the code, we have all the time in the world.
|
||||
// Note that we know exactly where we are in the document so we could,
|
||||
// without any overhead on the processing code, report a specific
|
||||
// location.
|
||||
// We could even trigger special code paths to assess what happened
|
||||
// carefully,
|
||||
// all without any added cost.
|
||||
//
|
||||
if (depth >= parser.max_depth()) {
|
||||
return parser.error = DEPTH_ERROR;
|
||||
}
|
||||
|
@ -329,11 +330,6 @@ struct structural_parser : structural_iterator {
|
|||
}
|
||||
}; // struct structural_parser
|
||||
|
||||
// Redefine FAIL_IF to use goto since it'll be used inside the function now
|
||||
#undef FAIL_IF
|
||||
#define FAIL_IF(EXPR) { if (EXPR) { goto error; } }
|
||||
|
||||
|
||||
template<bool STREAMING>
|
||||
WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept {
|
||||
dom_parser.doc = &doc;
|
||||
|
@ -347,10 +343,10 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
|||
//
|
||||
switch (parser.current_char()) {
|
||||
case '{':
|
||||
FAIL_IF( parser.start_object(addresses.finish) );
|
||||
if ( parser.start_object(addresses.finish) ) { goto error; };
|
||||
goto object_begin;
|
||||
case '[':
|
||||
FAIL_IF( parser.start_array(addresses.finish) );
|
||||
if ( parser.start_array(addresses.finish) ) { goto error; }
|
||||
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
|
||||
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
|
||||
if (!STREAMING) {
|
||||
|
@ -359,31 +355,17 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
|||
}
|
||||
}
|
||||
goto array_begin;
|
||||
case '"':
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto finish;
|
||||
case 't':
|
||||
parser.log_value("true");
|
||||
FAIL_IF( !atomparsing::is_valid_true_atom(parser.current(), parser.remaining_len()) );
|
||||
parser.tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||
goto finish;
|
||||
case 'f':
|
||||
parser.log_value("false");
|
||||
FAIL_IF( !atomparsing::is_valid_false_atom(parser.current(), parser.remaining_len()) );
|
||||
parser.tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||
goto finish;
|
||||
case 'n':
|
||||
parser.log_value("null");
|
||||
FAIL_IF( !atomparsing::is_valid_null_atom(parser.current(), parser.remaining_len()) );
|
||||
parser.tape.append(0, internal::tape_type::NULL_VALUE);
|
||||
goto finish;
|
||||
case '"': if ( parser.parse_string() ) { goto error; }; goto finish;
|
||||
case 't': if ( parser.parse_root_true_atom() ) { goto error; }; goto finish;
|
||||
case 'f': if ( parser.parse_root_false_atom() ) { goto error; }; goto finish;
|
||||
case 'n': if ( parser.parse_root_null_atom() ) { goto error; }; goto finish;
|
||||
case '-':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
// Next line used to be an interesting functional programming exercise with
|
||||
// a lambda that gets passed to another function via a closure. This would confuse the
|
||||
// clangcl compiler under Visual Studio 2019 (recent release).
|
||||
FAIL_IF(parser.parse_number_with_space_terminated_copy());
|
||||
if ( parser.parse_root_number() ) { goto error; }
|
||||
goto finish;
|
||||
default:
|
||||
parser.log_error("Document starts with a non-value character");
|
||||
|
@ -397,7 +379,7 @@ object_begin:
|
|||
switch (parser.advance_char()) {
|
||||
case '"': {
|
||||
parser.increment_count();
|
||||
FAIL_IF( parser.parse_string(true) );
|
||||
if ( parser.parse_string(true) ) { goto error; }
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}':
|
||||
|
@ -410,14 +392,28 @@ object_begin:
|
|||
|
||||
object_key_state:
|
||||
if (parser.advance_char() != ':' ) { parser.log_error("Missing colon after key in object"); goto error; }
|
||||
GOTO( parser.parse_value(addresses, addresses.object_continue) );
|
||||
switch (parser.advance_char()) {
|
||||
case '{': if ( parser.start_object(addresses.object_continue) ) { goto error; } else { goto object_begin; }
|
||||
case '[': if ( parser.start_array(addresses.object_continue) ) { goto error; } else { goto array_begin; }
|
||||
case '"': if ( parser.parse_string() ) { goto error; }; break;
|
||||
case 't': if ( parser.parse_true_atom() ) { goto error; }; break;
|
||||
case 'f': if ( parser.parse_false_atom() ) { goto error; }; break;
|
||||
case 'n': if ( parser.parse_null_atom() ) { goto error; }; break;
|
||||
case '-':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
if ( parser.parse_number() ) { goto error; }; break;
|
||||
default:
|
||||
parser.log_error("Non-value found when value was expected!");
|
||||
goto error;
|
||||
}
|
||||
|
||||
object_continue:
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
parser.increment_count();
|
||||
if (parser.advance_char() != '"' ) { parser.log_error("Key string missing at beginning of field in object"); goto error; }
|
||||
FAIL_IF( parser.parse_string(true) );
|
||||
if ( parser.parse_string(true) ) { goto error; }
|
||||
goto object_key_state;
|
||||
case '}':
|
||||
parser.end_object();
|
||||
|
@ -442,9 +438,21 @@ array_begin:
|
|||
parser.increment_count();
|
||||
|
||||
main_array_switch:
|
||||
/* we call update char on all paths in, so we can peek at parser.c on the
|
||||
* on paths that can accept a close square brace (post-, and at start) */
|
||||
GOTO( parser.parse_value(addresses, addresses.array_continue) );
|
||||
switch (parser.advance_char()) {
|
||||
case '{': if ( parser.start_object(addresses.array_continue) ) { goto error; } else { goto object_begin; }
|
||||
case '[': if ( parser.start_array(addresses.array_continue) ) { goto error; } else { goto array_begin; }
|
||||
case '"': if ( parser.parse_string() ) { goto error; }; break;
|
||||
case 't': if ( parser.parse_true_atom() ) { goto error; }; break;
|
||||
case 'f': if ( parser.parse_false_atom() ) { goto error; }; break;
|
||||
case 'n': if ( parser.parse_null_atom() ) { goto error; }; break;
|
||||
case '-':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
if ( parser.parse_number() ) { goto error; }; break;
|
||||
default:
|
||||
parser.log_error("Non-value found when value was expected!");
|
||||
goto error;
|
||||
}
|
||||
|
||||
array_continue:
|
||||
switch (parser.advance_char()) {
|
||||
|
|
Loading…
Reference in New Issue