Return errors immediately instead of using goto

This commit is contained in:
John Keiser 2020-07-28 10:20:59 -07:00
parent 86162aaddb
commit 66a68ce264
6 changed files with 89 additions and 145 deletions

View File

@ -144,13 +144,12 @@ WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) cons
}
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
error_code result = stage2::parse_structurals<false>(*this, _doc);
if (result) { return result; }
if (auto error = stage2::parse_structurals<false>(*this, _doc)) { return error; }
// If we didn't make it to the end, it's an error
if ( next_structural_index != n_structural_indexes ) {
logger::log_string("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
return error = TAPE_ERROR;
return TAPE_ERROR;
}
return SUCCESS;
@ -161,8 +160,8 @@ WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_do
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
auto error = stage1(_buf, _len, false);
if (error) { return error; }
return stage2(_doc);
}

View File

@ -321,13 +321,12 @@ namespace {
namespace SIMDJSON_IMPLEMENTATION {
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
error_code result = stage2::parse_structurals<false>(*this, _doc);
if (result) { return result; }
if (auto error = stage2::parse_structurals<false>(*this, _doc)) { return error; }
// If we didn't make it to the end, it's an error
if ( next_structural_index != n_structural_indexes ) {
logger::log_string("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
return error = TAPE_ERROR;
return TAPE_ERROR;
}
return SUCCESS;
@ -338,8 +337,8 @@ WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_do
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
auto error = stage1(_buf, _len, false);
if (error) { return error; }
return stage2(_doc);
}

View File

@ -28,8 +28,6 @@ public:
size_t len{0};
/** Document passed to stage 2 */
dom::document *doc{};
/** Error code (TODO remove, this is not even used, we just set it so the g++ optimizer doesn't get confused) */
error_code error{UNINITIALIZED};
really_inline dom_parser_implementation();
dom_parser_implementation(const dom_parser_implementation &) = delete;

View File

@ -27,28 +27,27 @@ struct structural_parser : structural_iterator {
current_string_buf_loc{parser.doc->string_buf.get()} {
}
WARN_UNUSED really_inline bool start_scope(bool parent_is_array) {
WARN_UNUSED really_inline error_code start_scope(bool parent_is_array) {
parser.containing_scope[depth].tape_index = next_tape_index();
parser.containing_scope[depth].count = 0;
tape.skip(); // We don't actually *write* the start element until the end.
parser.is_array[depth] = parent_is_array;
depth++;
bool exceeded_max_depth = depth >= parser.max_depth();
if (exceeded_max_depth) { log_error("Exceeded max depth!"); }
return exceeded_max_depth;
if (depth >= parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
return SUCCESS;
}
WARN_UNUSED really_inline bool start_document() {
WARN_UNUSED really_inline error_code start_document() {
log_start_value("document");
return start_scope(false);
}
WARN_UNUSED really_inline bool start_object(bool parent_is_array) {
WARN_UNUSED really_inline error_code start_object(bool parent_is_array) {
log_start_value("object");
return start_scope(parent_is_array);
}
WARN_UNUSED really_inline bool start_array(bool parent_is_array) {
WARN_UNUSED really_inline error_code start_array(bool parent_is_array) {
log_start_value("array");
return start_scope(parent_is_array);
}
@ -111,29 +110,28 @@ struct structural_parser : structural_iterator {
current_string_buf_loc = dst + 1;
}
WARN_UNUSED really_inline bool parse_string(bool key = false) {
WARN_UNUSED really_inline error_code parse_string(bool key = false) {
log_value(key ? "key" : "string");
uint8_t *dst = on_start_string();
dst = stringparsing::parse_string(current(), dst);
if (dst == nullptr) {
log_error("Invalid escape in string");
return true;
return STRING_ERROR;
}
on_end_string(dst);
return false;
return SUCCESS;
}
WARN_UNUSED really_inline bool parse_number(const uint8_t *src) {
WARN_UNUSED really_inline error_code parse_number(const uint8_t *src) {
log_value("number");
bool succeeded = numberparsing::parse_number(src, tape);
if (!succeeded) { log_error("Invalid number"); }
return !succeeded;
if (!numberparsing::parse_number(src, tape)) { log_error("Invalid number"); return NUMBER_ERROR; }
return SUCCESS;
}
WARN_UNUSED really_inline bool parse_number() {
WARN_UNUSED really_inline error_code parse_number() {
return parse_number(current());
}
really_inline bool parse_root_number() {
really_inline error_code parse_root_number() {
/**
* We need to make a copy to make sure that the string is space terminated.
* This is not about padding the input, which should already padded up
@ -149,56 +147,56 @@ struct structural_parser : structural_iterator {
*/
uint8_t *copy = static_cast<uint8_t *>(malloc(parser.len + SIMDJSON_PADDING));
if (copy == nullptr) {
return true;
return MEMALLOC;
}
memcpy(copy, buf, parser.len);
memset(copy + parser.len, ' ', SIMDJSON_PADDING);
size_t idx = *current_structural;
bool result = parse_number(&copy[idx]); // parse_number does not throw
error_code error = parse_number(&copy[idx]); // parse_number does not throw
free(copy);
return result;
return error;
}
WARN_UNUSED really_inline bool parse_true_atom() {
WARN_UNUSED really_inline error_code parse_true_atom() {
log_value("true");
if (!atomparsing::is_valid_true_atom(current())) { return true; }
if (!atomparsing::is_valid_true_atom(current())) { return T_ATOM_ERROR; }
tape.append(0, internal::tape_type::TRUE_VALUE);
return false;
return SUCCESS;
}
WARN_UNUSED really_inline bool parse_root_true_atom() {
WARN_UNUSED really_inline error_code parse_root_true_atom() {
log_value("true");
if (!atomparsing::is_valid_true_atom(current(), remaining_len())) { return true; }
if (!atomparsing::is_valid_true_atom(current(), remaining_len())) { return T_ATOM_ERROR; }
tape.append(0, internal::tape_type::TRUE_VALUE);
return false;
return SUCCESS;
}
WARN_UNUSED really_inline bool parse_false_atom() {
WARN_UNUSED really_inline error_code parse_false_atom() {
log_value("false");
if (!atomparsing::is_valid_false_atom(current())) { return true; }
if (!atomparsing::is_valid_false_atom(current())) { return F_ATOM_ERROR; }
tape.append(0, internal::tape_type::FALSE_VALUE);
return false;
return SUCCESS;
}
WARN_UNUSED really_inline bool parse_root_false_atom() {
WARN_UNUSED really_inline error_code parse_root_false_atom() {
log_value("false");
if (!atomparsing::is_valid_false_atom(current(), remaining_len())) { return true; }
if (!atomparsing::is_valid_false_atom(current(), remaining_len())) { return F_ATOM_ERROR; }
tape.append(0, internal::tape_type::FALSE_VALUE);
return false;
return SUCCESS;
}
WARN_UNUSED really_inline bool parse_null_atom() {
WARN_UNUSED really_inline error_code parse_null_atom() {
log_value("null");
if (!atomparsing::is_valid_null_atom(current())) { return true; }
if (!atomparsing::is_valid_null_atom(current())) { return N_ATOM_ERROR; }
tape.append(0, internal::tape_type::NULL_VALUE);
return false;
return SUCCESS;
}
WARN_UNUSED really_inline bool parse_root_null_atom() {
WARN_UNUSED really_inline error_code parse_root_null_atom() {
log_value("null");
if (!atomparsing::is_valid_null_atom(current(), remaining_len())) { return true; }
if (!atomparsing::is_valid_null_atom(current(), remaining_len())) { return N_ATOM_ERROR; }
tape.append(0, internal::tape_type::NULL_VALUE);
return false;
return SUCCESS;
}
WARN_UNUSED really_inline error_code finish() {
@ -207,67 +205,25 @@ struct structural_parser : structural_iterator {
if (depth != 0) {
log_error("Unclosed objects or arrays!");
return parser.error = TAPE_ERROR;
return TAPE_ERROR;
}
return SUCCESS;
}
WARN_UNUSED really_inline error_code error() {
// At this point in the code, we have all the time in the world.
// Note that we know exactly where we are in the document so we could,
// without any overhead on the processing code, report a specific
// location.
// We could even trigger special code paths to assess what happened
// carefully,
// all without any added cost.
//
if (depth >= parser.max_depth()) {
return parser.error = DEPTH_ERROR;
}
switch (current_char()) {
case '"':
return parser.error = STRING_ERROR;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
return parser.error = NUMBER_ERROR;
case 't':
return parser.error = T_ATOM_ERROR;
case 'n':
return parser.error = N_ATOM_ERROR;
case 'f':
return parser.error = F_ATOM_ERROR;
default:
return parser.error = TAPE_ERROR;
}
}
really_inline void init() {
log_start();
parser.error = UNINITIALIZED;
}
WARN_UNUSED really_inline error_code start() {
// If there are no structurals left, return EMPTY
if (at_end(parser.n_structural_indexes)) {
return parser.error = EMPTY;
return EMPTY;
}
init();
// Push the root scope (there is always at least one scope)
if (start_document()) {
return parser.error = DEPTH_ERROR;
}
return SUCCESS;
return start_document();
}
really_inline void log_value(const char *type) {
@ -293,45 +249,42 @@ struct structural_parser : structural_iterator {
}
}; // struct structural_parser
#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
template<bool STREAMING>
WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept {
dom_parser.doc = &doc;
stage2::structural_parser parser(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
error_code result = parser.start();
if (result) { return result; }
SIMDJSON_TRY( parser.start() );
//
// Read first value
//
switch (parser.current_char()) {
case '{':
if ( parser.start_object(false) ) { goto error; };
SIMDJSON_TRY( parser.start_object(false) );
goto object_begin;
case '[':
if ( parser.start_array(false) ) { goto error; }
SIMDJSON_TRY( parser.start_array(false) );
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
if (!STREAMING) {
if (parser.buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') {
goto error;
return TAPE_ERROR;
}
}
goto array_begin;
case '"': if ( parser.parse_string() ) { goto error; }; goto finish;
case 't': if ( parser.parse_root_true_atom() ) { goto error; }; goto finish;
case 'f': if ( parser.parse_root_false_atom() ) { goto error; }; goto finish;
case 'n': if ( parser.parse_root_null_atom() ) { goto error; }; goto finish;
case '"': SIMDJSON_TRY( parser.parse_string() ); goto finish;
case 't': SIMDJSON_TRY( parser.parse_root_true_atom() ); goto finish;
case 'f': SIMDJSON_TRY( parser.parse_root_false_atom() ); goto finish;
case 'n': SIMDJSON_TRY( parser.parse_root_null_atom() ); goto finish;
case '-':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// Next line used to be an interesting functional programming exercise with
// a lambda that gets passed to another function via a closure. This would confuse the
// clangcl compiler under Visual Studio 2019 (recent release).
if ( parser.parse_root_number() ) { goto error; }
goto finish;
SIMDJSON_TRY( parser.parse_root_number() ); goto finish;
default:
parser.log_error("Document starts with a non-value character");
goto error;
return TAPE_ERROR;
}
//
@ -341,7 +294,7 @@ object_begin:
switch (parser.advance_char()) {
case '"': {
parser.increment_count();
if ( parser.parse_string(true) ) { goto error; }
SIMDJSON_TRY( parser.parse_string(true) );
goto object_key_state;
}
case '}':
@ -349,40 +302,40 @@ object_begin:
goto scope_end;
default:
parser.log_error("Object does not start with a key");
goto error;
return TAPE_ERROR;
}
object_key_state:
if (unlikely( parser.advance_char() != ':' )) { parser.log_error("Missing colon after key in object"); goto error; }
if (unlikely( parser.advance_char() != ':' )) { parser.log_error("Missing colon after key in object"); return TAPE_ERROR; }
switch (parser.advance_char()) {
case '{': if ( parser.start_object(false) ) { goto error; }; goto object_begin;
case '[': if ( parser.start_array(false) ) { goto error; }; goto array_begin;
case '"': if ( parser.parse_string() ) { goto error; }; break;
case 't': if ( parser.parse_true_atom() ) { goto error; }; break;
case 'f': if ( parser.parse_false_atom() ) { goto error; }; break;
case 'n': if ( parser.parse_null_atom() ) { goto error; }; break;
case '{': SIMDJSON_TRY( parser.start_object(false) ); goto object_begin;
case '[': SIMDJSON_TRY( parser.start_array(false) ); goto array_begin;
case '"': SIMDJSON_TRY( parser.parse_string() ); break;
case 't': SIMDJSON_TRY( parser.parse_true_atom() ); break;
case 'f': SIMDJSON_TRY( parser.parse_false_atom() ); break;
case 'n': SIMDJSON_TRY( parser.parse_null_atom() ); break;
case '-':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if ( parser.parse_number() ) { goto error; }; break;
SIMDJSON_TRY( parser.parse_number() ); break;
default:
parser.log_error("Non-value found when value was expected!");
goto error;
return TAPE_ERROR;
}
object_continue:
switch (parser.advance_char()) {
case ',':
parser.increment_count();
if (unlikely( parser.advance_char() != '"' )) { parser.log_error("Key string missing at beginning of field in object"); goto error; }
if ( parser.parse_string(true) ) { goto error; }
if (unlikely( parser.advance_char() != '"' )) { parser.log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
SIMDJSON_TRY( parser.parse_string(true) );
goto object_key_state;
case '}':
parser.end_object();
goto scope_end;
default:
parser.log_error("No comma between object fields");
goto error;
return TAPE_ERROR;
}
scope_end:
@ -403,19 +356,19 @@ array_begin:
main_array_switch:
switch (parser.advance_char()) {
case '{': if ( parser.start_object(true) ) { goto error; }; goto object_begin;
case '[': if ( parser.start_array(true) ) { goto error; }; goto array_begin;
case '"': if ( parser.parse_string() ) { goto error; }; break;
case 't': if ( parser.parse_true_atom() ) { goto error; }; break;
case 'f': if ( parser.parse_false_atom() ) { goto error; }; break;
case 'n': if ( parser.parse_null_atom() ) { goto error; }; break;
case '{': SIMDJSON_TRY( parser.start_object(true) ); goto object_begin;
case '[': SIMDJSON_TRY( parser.start_array(true) ); goto array_begin;
case '"': SIMDJSON_TRY( parser.parse_string() ); break;
case 't': SIMDJSON_TRY( parser.parse_true_atom() ); break;
case 'f': SIMDJSON_TRY( parser.parse_false_atom() ); break;
case 'n': SIMDJSON_TRY( parser.parse_null_atom() ); break;
case '-':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if ( parser.parse_number() ) { goto error; }; break;
SIMDJSON_TRY( parser.parse_number() ); break;
default:
parser.log_error("Non-value found when value was expected!");
goto error;
return TAPE_ERROR;
}
array_continue:
@ -428,14 +381,11 @@ array_continue:
goto scope_end;
default:
parser.log_error("Missing comma between array values");
goto error;
return TAPE_ERROR;
}
finish:
return parser.finish();
error:
return parser.error();
}
} // namespace stage2

View File

@ -107,13 +107,12 @@ WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) cons
}
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
error_code result = stage2::parse_structurals<false>(*this, _doc);
if (result) { return result; }
if (auto error = stage2::parse_structurals<false>(*this, _doc)) { return error; }
// If we didn't make it to the end, it's an error
if ( next_structural_index != n_structural_indexes ) {
logger::log_string("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
return error = TAPE_ERROR;
return TAPE_ERROR;
}
return SUCCESS;
@ -124,8 +123,8 @@ WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_do
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
auto error = stage1(_buf, _len, false);
if (error) { return error; }
return stage2(_doc);
}

View File

@ -113,13 +113,12 @@ WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) cons
}
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
error_code result = stage2::parse_structurals<false>(*this, _doc);
if (result) { return result; }
if (auto error = stage2::parse_structurals<false>(*this, _doc)) { return error; }
// If we didn't make it to the end, it's an error
if ( next_structural_index != n_structural_indexes ) {
logger::log_string("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
return error = TAPE_ERROR;
return TAPE_ERROR;
}
return SUCCESS;
@ -130,8 +129,8 @@ WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_do
}
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
error_code err = stage1(_buf, _len, false);
if (err) { return err; }
auto error = stage1(_buf, _len, false);
if (error) { return error; }
return stage2(_doc);
}