parent
17ac5c0525
commit
8526387acb
|
@ -11,7 +11,12 @@
|
|||
|
||||
// Parse a document found in buf.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return 0 on success, an error code from simdjson/simdjson.h otherwise
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
|
||||
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
|
||||
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
|
||||
// into a string).
|
||||
//
|
||||
// You can also check validity by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
|
@ -24,7 +29,13 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
|
|||
|
||||
// Parse a document found in buf.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
|
||||
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
|
||||
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
|
||||
// into a string).
|
||||
//
|
||||
// You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
|
@ -42,8 +53,11 @@ int json_parse(const char * buf, ParsedJson &pj) = delete;
|
|||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
|
||||
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
|
||||
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
|
||||
// into a string).
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
|
@ -54,7 +68,13 @@ inline int json_parse(const std::string &s, ParsedJson &pj) {
|
|||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from
|
||||
// simdjson/simdjson.h in case of failure such as simdjson::CAPACITY, simdjson::MEMALLOC,
|
||||
// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes
|
||||
// into a string).
|
||||
//
|
||||
// You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
WARN_UNUSED
|
||||
inline int json_parse(const padded_string &s, ParsedJson &pj) {
|
||||
|
@ -69,6 +89,8 @@ inline int json_parse(const padded_string &s, ParsedJson &pj) {
|
|||
//
|
||||
// the input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneeded = true);
|
||||
|
||||
|
@ -79,6 +101,8 @@ WARN_UNUSED
|
|||
// (a copy of the input string is made).
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
|
||||
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
|
||||
}
|
||||
|
@ -90,11 +114,13 @@ ParsedJson build_parsed_json(const char *buf) = delete;
|
|||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// Return SUCCESS (an integer = 0) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
inline ParsedJson build_parsed_json(const std::string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), true);
|
||||
|
@ -103,8 +129,10 @@ inline ParsedJson build_parsed_json(const std::string &s) {
|
|||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// Return SUCCESS (an integer = 0) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
inline ParsedJson build_parsed_json(const padded_string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), false);
|
||||
|
|
|
@ -10,6 +10,12 @@ struct simdjson {
|
|||
MEMALLOC, // Error allocating memory, most likely out of memory
|
||||
TAPE_ERROR, // Something went wrong while writing to the tape
|
||||
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
|
||||
STRING_ERROR, // Problem while parsing a string
|
||||
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
|
||||
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
|
||||
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
|
||||
NUMBER_ERROR, // Problem while parsing a number
|
||||
UTF8_ERROR // the input is not valid UTF-8
|
||||
};
|
||||
static const std::string& errorMsg(const int);
|
||||
};
|
||||
|
|
|
@ -35,18 +35,18 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
|
|||
#else // SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN
|
||||
if(true) { // if not SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN, we always reallocate
|
||||
#endif
|
||||
const uint8_t *tmpbuf = buf;
|
||||
const uint8_t *tmpbuf = buf;
|
||||
buf = (uint8_t *) allocate_padded_buffer(len);
|
||||
if(buf == NULL) return simdjson::MEMALLOC;
|
||||
memcpy((void*)buf,tmpbuf,len);
|
||||
reallocated = true;
|
||||
}
|
||||
}
|
||||
// find_structural_bits returns a boolean, not an int, we invert its result to keep consistent with res == 0 meaning success
|
||||
int res = !find_structural_bits(buf, len, pj);
|
||||
if (!res) {
|
||||
res = unified_machine(buf, len, pj);
|
||||
bool stage1_is_ok = find_structural_bits(buf, len, pj);
|
||||
if(!stage1_is_ok) {
|
||||
return simdjson::UTF8_ERROR;
|
||||
}
|
||||
int res = unified_machine(buf, len, pj);
|
||||
if(reallocated) { aligned_free((void*)buf);}
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,13 @@ const std::map<int, const std::string> errorStrings = {
|
|||
{simdjson::SUCCESS, "No errors"},
|
||||
{simdjson::CAPACITY, "This ParsedJson can't support a document that big"},
|
||||
{simdjson::MEMALLOC, "Error allocating memory, we're most likely out of memory"},
|
||||
{simdjson::TAPE_ERROR, "Something went wrong while writing to the tape"}
|
||||
{simdjson::TAPE_ERROR, "Something went wrong while writing to the tape"},
|
||||
{simdjson::STRING_ERROR, "Problem while parsing a string"},
|
||||
{simdjson::T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"},
|
||||
{simdjson::F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
|
||||
{simdjson::N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
|
||||
{simdjson::NUMBER_ERROR, "Problem while parsing a number"},
|
||||
{simdjson::UTF8_ERROR, "The input is not valid UTF-8"}
|
||||
};
|
||||
|
||||
const std::string& simdjson::errorMsg(const int errorCode) {
|
||||
|
|
|
@ -103,7 +103,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// the root is used, if nothing else, to capture the size of the tape
|
||||
depth++; // everything starts at depth = 1, depth = 0 is just for the root, the root may contain an object, an array or something else.
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
UPDATE_CHAR();
|
||||
|
@ -117,7 +117,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
#endif
|
||||
depth++;
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c); // strangely, moving this to object_begin slows things down
|
||||
goto object_begin;
|
||||
|
@ -130,7 +130,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
#endif
|
||||
depth++;
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
goto array_begin;
|
||||
|
@ -153,8 +153,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// this only applies to the JSON document made solely of the true value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
if(copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
|
@ -170,8 +171,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// this only applies to the JSON document made solely of the false value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
if(copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
|
@ -187,8 +189,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// this only applies to the JSON document made solely of the null value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
if(copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
|
@ -213,8 +216,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// this is done only for JSON documents made of a sole number
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
if(copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
|
||||
|
@ -229,8 +233,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// this is done only for JSON documents made of a sole number
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
if(copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
|
||||
|
@ -277,7 +282,7 @@ object_key_state:
|
|||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -332,7 +337,7 @@ object_key_state:
|
|||
// we found an object inside an object, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
goto object_begin;
|
||||
|
@ -349,7 +354,7 @@ object_key_state:
|
|||
// we found an array inside an object, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
goto array_begin;
|
||||
}
|
||||
|
@ -366,7 +371,7 @@ object_continue:
|
|||
goto fail;
|
||||
} else {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
|
@ -464,7 +469,7 @@ main_array_switch:
|
|||
// we found an object inside an array, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
goto object_begin;
|
||||
|
@ -481,7 +486,7 @@ main_array_switch:
|
|||
// we found an array inside an array, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
goto array_begin;
|
||||
}
|
||||
|
@ -521,9 +526,40 @@ succeed:
|
|||
|
||||
pj.isvalid = true;
|
||||
return simdjson::SUCCESS;
|
||||
|
||||
fail:
|
||||
return simdjson::TAPE_ERROR;
|
||||
// At this point in the code, we have all the time in the world.
|
||||
// Note that we know exactly where we are in the document so we could,
|
||||
// without any overhead on the processing code, report a specific location.
|
||||
// We could even trigger special code paths to assess what happened carefully,
|
||||
// all without any added cost.
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
}
|
||||
switch(c) {
|
||||
case '"':
|
||||
return simdjson::STRING_ERROR;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
case '-':
|
||||
return simdjson::NUMBER_ERROR;
|
||||
case 't':
|
||||
return simdjson::T_ATOM_ERROR;
|
||||
case 'n':
|
||||
return simdjson::N_ATOM_ERROR;
|
||||
case 'f':
|
||||
return simdjson::F_ATOM_ERROR;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return simdjson::TAPE_ERROR;
|
||||
}
|
||||
|
||||
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
|
|
Loading…
Reference in New Issue