Improving error codes. (#176)

* This commit adds new error codes.
This commit is contained in:
Daniel Lemire 2019-05-24 17:28:56 -04:00 committed by GitHub
parent 17ac5c0525
commit 8526387acb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 110 additions and 34 deletions

View File

@ -11,7 +11,12 @@
// Parse a document found in buf.
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
// Return 0 on success, an error code from simdjson/simdjson.h otherwise
//
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
// into a string).
//
// You can also check validity by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@ -24,7 +29,13 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
// Parse a document found in buf.
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
//
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
// into a string).
//
// You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@ -42,8 +53,11 @@ int json_parse(const char * buf, ParsedJson &pj) = delete;
// Parse a document found in in string s.
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
// into a string).
//
// A temporary buffer is created when needed during processing
// (a copy of the input string is made).
@ -54,7 +68,13 @@ inline int json_parse(const std::string &s, ParsedJson &pj) {
// Parse a document found in in string s.
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
//
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
// into a string).
//
// You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
WARN_UNUSED
inline int json_parse(const padded_string &s, ParsedJson &pj) {
@ -69,6 +89,8 @@ inline int json_parse(const padded_string &s, ParsedJson &pj) {
//
// the input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after buf + len are ignored (can be garbage).
//
// This is a convenience function which calls json_parse.
WARN_UNUSED
ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneeded = true);
@ -79,6 +101,8 @@ WARN_UNUSED
// (a copy of the input string is made).
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after buf + len are ignored (can be garbage).
//
// This is a convenience function which calls json_parse.
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
}
@ -90,11 +114,13 @@ ParsedJson build_parsed_json(const char *buf) = delete;
// Parse a document found in in string s.
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
// Return SUCCESS (an integer = 0) in case of a success. You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// A temporary buffer is created when needed during processing
// (a copy of the input string is made).
//
// This is a convenience function which calls json_parse.
WARN_UNUSED
inline ParsedJson build_parsed_json(const std::string &s) {
return build_parsed_json(s.data(), s.length(), true);
@ -103,8 +129,10 @@ inline ParsedJson build_parsed_json(const std::string &s) {
// Parse a document found in in string s.
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
// Return SUCCESS (an integer = 0) in case of a success. You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// This is a convenience function which calls json_parse.
WARN_UNUSED
inline ParsedJson build_parsed_json(const padded_string &s) {
return build_parsed_json(s.data(), s.length(), false);

View File

@ -10,6 +10,12 @@ struct simdjson {
MEMALLOC, // Error allocating memory, most likely out of memory
TAPE_ERROR, // Something went wrong while writing to the tape
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
STRING_ERROR, // Problem while parsing a string
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR // the input is not valid UTF-8
};
static const std::string& errorMsg(const int);
};

View File

@ -35,18 +35,18 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
#else // SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN
if(true) { // if not SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN, we always reallocate
#endif
const uint8_t *tmpbuf = buf;
const uint8_t *tmpbuf = buf;
buf = (uint8_t *) allocate_padded_buffer(len);
if(buf == NULL) return simdjson::MEMALLOC;
memcpy((void*)buf,tmpbuf,len);
reallocated = true;
}
}
// find_structural_bits returns a boolean, not an int, we invert its result to keep consistent with res == 0 meaning success
int res = !find_structural_bits(buf, len, pj);
if (!res) {
res = unified_machine(buf, len, pj);
bool stage1_is_ok = find_structural_bits(buf, len, pj);
if(!stage1_is_ok) {
return simdjson::UTF8_ERROR;
}
int res = unified_machine(buf, len, pj);
if(reallocated) { aligned_free((void*)buf);}
return res;
}

View File

@ -5,7 +5,13 @@ const std::map<int, const std::string> errorStrings = {
{simdjson::SUCCESS, "No errors"},
{simdjson::CAPACITY, "This ParsedJson can't support a document that big"},
{simdjson::MEMALLOC, "Error allocating memory, we're most likely out of memory"},
{simdjson::TAPE_ERROR, "Something went wrong while writing to the tape"}
{simdjson::TAPE_ERROR, "Something went wrong while writing to the tape"},
{simdjson::STRING_ERROR, "Problem while parsing a string"},
{simdjson::T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"},
{simdjson::F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
{simdjson::N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
{simdjson::NUMBER_ERROR, "Problem while parsing a number"},
{simdjson::UTF8_ERROR, "The input is not valid UTF-8"}
};
const std::string& simdjson::errorMsg(const int errorCode) {

View File

@ -103,7 +103,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// the root is used, if nothing else, to capture the size of the tape
depth++; // everything starts at depth = 1, depth = 0 is just for the root, the root may contain an object, an array or something else.
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
goto fail;
}
UPDATE_CHAR();
@ -117,7 +117,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
#endif
depth++;
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
goto fail;
}
pj.write_tape(0, c); // strangely, moving this to object_begin slows things down
goto object_begin;
@ -130,7 +130,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
#endif
depth++;
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
goto fail;
}
pj.write_tape(0, c);
goto array_begin;
@ -153,8 +153,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// this only applies to the JSON document made solely of the true value.
// this will almost never be called in practice
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
if(copy == nullptr) {
goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
@ -170,8 +171,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// this only applies to the JSON document made solely of the false value.
// this will almost never be called in practice
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
if(copy == nullptr) {
goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
@ -187,8 +189,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// this only applies to the JSON document made solely of the null value.
// this will almost never be called in practice
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
if(copy == nullptr) {
goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
@ -213,8 +216,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// this is done only for JSON documents made of a sole number
// this will almost never be called in practice
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
if(copy == nullptr) {
goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
@ -229,8 +233,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// this is done only for JSON documents made of a sole number
// this will almost never be called in practice
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
if(copy == nullptr) {
goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
@ -277,7 +282,7 @@ object_key_state:
switch (c) {
case '"': {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
goto fail;
}
break;
}
@ -332,7 +337,7 @@ object_key_state:
// we found an object inside an object, so we need to increment the depth
depth++;
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
goto fail;
}
goto object_begin;
@ -349,7 +354,7 @@ object_key_state:
// we found an array inside an object, so we need to increment the depth
depth++;
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
goto fail;
}
goto array_begin;
}
@ -366,7 +371,7 @@ object_continue:
goto fail;
} else {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
goto fail;
}
goto object_key_state;
}
@ -464,7 +469,7 @@ main_array_switch:
// we found an object inside an array, so we need to increment the depth
depth++;
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
goto fail;
}
goto object_begin;
@ -481,7 +486,7 @@ main_array_switch:
// we found an array inside an array, so we need to increment the depth
depth++;
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
goto fail;
}
goto array_begin;
}
@ -521,9 +526,40 @@ succeed:
pj.isvalid = true;
return simdjson::SUCCESS;
fail:
return simdjson::TAPE_ERROR;
// At this point in the code, we have all the time in the world.
// Note that we know exactly where we are in the document so we could,
// without any overhead on the processing code, report a specific location.
// We could even trigger special code paths to assess what happened carefully,
// all without any added cost.
if (depth >= pj.depthcapacity) {
return simdjson::DEPTH_ERROR;
}
switch(c) {
case '"':
return simdjson::STRING_ERROR;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
return simdjson::NUMBER_ERROR;
case 't':
return simdjson::T_ATOM_ERROR;
case 'n':
return simdjson::N_ATOM_ERROR;
case 'f':
return simdjson::F_ATOM_ERROR;
default:
break;
}
return simdjson::TAPE_ERROR;
}
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {