Merge branch 'master' of github.com:lemire/simdjson
This commit is contained in:
commit
a2d05b21ff
|
@ -64,6 +64,7 @@ jobs:
|
|||
done
|
||||
- name: Run the other fuzzer variants for $fuzzer, with sanitizers etc
|
||||
run: |
|
||||
set -x
|
||||
for fuzzer in $allfuzzers; do
|
||||
build-ossfuzz-withavx/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=20 $artifactsprefix || touch failed
|
||||
build-ossfuzz-noavx/fuzz/fuzz_$fuzzer out/$fuzzer -max_total_time=10 $artifactsprefix || touch failed
|
||||
|
|
|
@ -14,7 +14,8 @@ WARN_UNUSED int
|
|||
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
template <Architecture T = Architecture::NATIVE>
|
||||
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
WARN_UNUSED int
|
||||
unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, pj);
|
||||
}
|
||||
|
||||
|
|
|
@ -21,13 +21,13 @@ namespace simdjson {
|
|||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
return arm64::unified_machine(buf, len, pj);
|
||||
return arm64::stage2::unified_machine(buf, len, pj);
|
||||
}
|
||||
|
||||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
|
||||
return arm64::unified_machine(buf, len, pj, next_json);
|
||||
return arm64::stage2::unified_machine(buf, len, pj, next_json);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
|
|
@ -3,192 +3,86 @@
|
|||
// We assume the file in which it is include already includes
|
||||
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
|
||||
|
||||
// this macro reads the next structural character, updating idx, i and c.
|
||||
#define UPDATE_CHAR() \
|
||||
{ \
|
||||
idx = pj.structural_indexes[i++]; \
|
||||
c = buf[idx]; \
|
||||
}
|
||||
namespace stage2 {
|
||||
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue;
|
||||
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue;
|
||||
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue;
|
||||
#define GOTO_CONTINUE() goto *pj.ret_address[depth];
|
||||
typedef void* ret_address;
|
||||
#define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
|
||||
#define GOTO(address) { goto *(address); }
|
||||
#define CONTINUE(address) { goto *(address); }
|
||||
#else
|
||||
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a';
|
||||
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o';
|
||||
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's';
|
||||
#define GOTO_CONTINUE() \
|
||||
{ \
|
||||
if (pj.ret_address[depth] == 'a') { \
|
||||
goto array_continue; \
|
||||
} else if (pj.ret_address[depth] == 'o') { \
|
||||
goto object_continue; \
|
||||
} else { \
|
||||
goto start_continue; \
|
||||
} \
|
||||
typedef char ret_address;
|
||||
#define INIT_ADDRESSES() { '[', 'a', 'e', 'f', '{', 'o' };
|
||||
#define GOTO(address) \
|
||||
{ \
|
||||
switch(address) { \
|
||||
case '[': goto array_begin; \
|
||||
case 'a': goto array_continue; \
|
||||
case 'e': goto error; \
|
||||
case 'f': goto finish; \
|
||||
case '{': goto object_begin; \
|
||||
case 'o': goto object_continue; \
|
||||
} \
|
||||
}
|
||||
// For the more constrained pop_scope() situation
|
||||
#define CONTINUE(address) \
|
||||
{ \
|
||||
switch(address) { \
|
||||
case 'a': goto array_continue; \
|
||||
case 'o': goto object_continue; \
|
||||
case 'f': goto finish; \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
/************
|
||||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED int
|
||||
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */
|
||||
uint32_t idx; /* location of the structural character in the input (buf) */
|
||||
uint8_t c; /* used to track the (structural) character we are looking at,
|
||||
updated */
|
||||
/* by UPDATE_CHAR macro */
|
||||
uint32_t depth = 0; /* could have an arbitrary starting depth */
|
||||
pj.init(); /* sets is_valid to false */
|
||||
if (pj.byte_capacity < len) {
|
||||
pj.error_code = simdjson::CAPACITY;
|
||||
return pj.error_code;
|
||||
struct unified_machine_addresses {
|
||||
ret_address array_begin;
|
||||
ret_address array_continue;
|
||||
ret_address error;
|
||||
ret_address finish;
|
||||
ret_address object_begin;
|
||||
ret_address object_continue;
|
||||
};
|
||||
|
||||
#undef FAIL_IF
|
||||
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
|
||||
|
||||
// This is just so we can call parse_string() from parser.parse_string() without conflict.
|
||||
WARN_UNUSED really_inline bool
|
||||
really_parse_string(const uint8_t *buf, size_t len, ParsedJson &pj, uint32_t depth, uint32_t idx) {
|
||||
return parse_string(buf, len, pj, depth, idx);
|
||||
}
|
||||
WARN_UNUSED really_inline bool
|
||||
really_parse_number(const uint8_t *const buf, ParsedJson &pj, const uint32_t offset, bool found_minus) {
|
||||
return parse_number(buf, pj, offset, found_minus);
|
||||
}
|
||||
|
||||
struct structural_parser {
|
||||
const uint8_t* const buf;
|
||||
const size_t len;
|
||||
ParsedJson &pj;
|
||||
uint32_t i; // next structural index
|
||||
uint32_t idx; // location of the structural character in the input (buf)
|
||||
uint8_t c; // used to track the (structural) character we are looking at
|
||||
uint32_t depth = 0; // could have an arbitrary starting depth
|
||||
|
||||
really_inline structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_pj, uint32_t _i = 0) : buf{_buf}, len{_len}, pj{_pj}, i{_i} {}
|
||||
|
||||
WARN_UNUSED really_inline int set_error_code(ErrorValues error_code) {
|
||||
pj.error_code = error_code;
|
||||
return error_code;
|
||||
}
|
||||
|
||||
/*//////////////////////////// START STATE /////////////////////////////
|
||||
*/
|
||||
SET_GOTO_START_CONTINUE()
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */
|
||||
/* the root is used, if nothing else, to capture the size of the tape */
|
||||
depth++; /* everything starts at depth = 1, depth = 0 is just for the
|
||||
root, the root may contain an object, an array or something
|
||||
else. */
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
really_inline char advance_char() {
|
||||
idx = pj.structural_indexes[i++];
|
||||
c = buf[idx];
|
||||
return c;
|
||||
}
|
||||
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '{':
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
SET_GOTO_START_CONTINUE();
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(
|
||||
0, c); /* strangely, moving this to object_begin slows things down */
|
||||
goto object_begin;
|
||||
case '[':
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
SET_GOTO_START_CONTINUE();
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
goto array_begin;
|
||||
template<typename F>
|
||||
really_inline bool with_space_terminated_copy(const F& f) {
|
||||
/**
|
||||
* A JSON text is a serialized value. Note that certain previous
|
||||
* specifications of JSON constrained a JSON text to be an object or an
|
||||
* array. Implementations that generate only objects or arrays where a
|
||||
* JSON text is called for will be interoperable in the sense that all
|
||||
* implementations will accept these as conforming JSON texts.
|
||||
* https://tools.ietf.org/html/rfc8259
|
||||
**/
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't': {
|
||||
/* We need to make a copy to make sure that the string is space
|
||||
* terminated.
|
||||
* This only applies to the JSON document made solely of the true
|
||||
* value.
|
||||
* This is not about padding the input, which should already be padded up
|
||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||
* on how the padding was done. What if the input string was padded with nulls?
|
||||
* It is quite common for an input string to have an extra null character (C string).
|
||||
* This copy is relatively expensive, but it will almost never be called in
|
||||
* practice unless you are in the strange scenario where you have many JSON
|
||||
* documents made of single atoms.
|
||||
*/
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
memset(copy + len, ' ', sizeof(uint64_t));
|
||||
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
}
|
||||
case 'f': {
|
||||
/* We need to make a copy to make sure that the string is space
|
||||
* terminated.
|
||||
* This only applies to the JSON document made solely of the false
|
||||
* value.
|
||||
* This is not about padding the input, which should already be padded up
|
||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||
* on how the padding was done. What if the input string was padded with nulls?
|
||||
* It is quite common for an input string to have an extra null character (C string).
|
||||
* This copy is relatively expensive, but it will almost never be called in
|
||||
* practice unless you are in the strange scenario where you have many JSON
|
||||
* documents made of single atoms.
|
||||
*/
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
memset(copy + len, ' ', sizeof(uint64_t));
|
||||
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
}
|
||||
case 'n': {
|
||||
/* We need to make a copy to make sure that the string is space
|
||||
* terminated.
|
||||
* This is not about padding the input, which should already padded up
|
||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||
* on how the padding was done. What if the input string was padded with nulls?
|
||||
* It is quite common for an input string to have an extra null character (C string).
|
||||
* This only applies to the JSON document made solely of the null value.
|
||||
* This copy is relatively expensive, but it will almost never be called in
|
||||
* practice unless you are in the strange scenario where you have many JSON
|
||||
* documents made of single atoms.
|
||||
*/
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
memset(copy + len, ' ', sizeof(uint64_t));
|
||||
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
}
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
/**
|
||||
* We need to make a copy to make sure that the input string is space terminated.
|
||||
* We need to make a copy to make sure that the string is space terminated.
|
||||
* This is not about padding the input, which should already padded up
|
||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||
* on how the padding was done. What if the input string was padded with nulls?
|
||||
|
@ -202,352 +96,301 @@ unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
*/
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
return true;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
memset(copy + len, ' ', SIMDJSON_PADDING);
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx,
|
||||
false)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
|
||||
free(copy);
|
||||
break;
|
||||
return result;
|
||||
}
|
||||
case '-': {
|
||||
/**
|
||||
* We need to make a copy to make sure that the input string is space terminated.
|
||||
* This is not about padding the input, which should already padded up
|
||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||
* on how the padding was done. What if the input string was padded with nulls?
|
||||
* It is quite common for an input string to have an extra null character (C string).
|
||||
* We do not want to allow -9\0 (where \0 is the null character) inside a JSON
|
||||
* document, but the string "-9\0" by itself is fine. So we make a copy and
|
||||
* pad the input with spaces when we know that there is just one input element.
|
||||
* This copy is relatively expensive, but it will almost never be called in
|
||||
* practice unless you are in the strange scenario where you have many JSON
|
||||
* documents made of single atoms.
|
||||
*/
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
memset(copy + len, ' ', SIMDJSON_PADDING);
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
start_continue:
|
||||
/* the string might not be NULL terminated. */
|
||||
if (i + 1 == pj.n_structural_indexes) {
|
||||
goto succeed;
|
||||
} else {
|
||||
goto fail;
|
||||
}
|
||||
/*//////////////////////////// OBJECT STATES ///////////////////////////*/
|
||||
|
||||
object_begin:
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
WARN_UNUSED really_inline bool push_start_scope(ret_address continue_state, char type) {
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.ret_address[depth] = continue_state;
|
||||
depth++;
|
||||
pj.write_tape(0, type);
|
||||
return depth >= pj.depth_capacity;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool push_start_scope(ret_address continue_state) {
|
||||
return push_start_scope(continue_state, c);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool push_scope(ret_address continue_state) {
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); // Do this as early as possible
|
||||
pj.ret_address[depth] = continue_state;
|
||||
depth++;
|
||||
return depth >= pj.depth_capacity;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline ret_address pop_scope() {
|
||||
// write our tape location to the header scope
|
||||
depth--;
|
||||
pj.write_tape(pj.containing_scope_offset[depth], c);
|
||||
pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc());
|
||||
return pj.ret_address[depth];
|
||||
}
|
||||
really_inline void pop_root_scope() {
|
||||
// write our tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
depth--;
|
||||
pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc());
|
||||
pj.write_tape(pj.containing_scope_offset[depth], 'r');
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_string() {
|
||||
return !really_parse_string(buf, len, pj, depth, idx);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_number(const uint8_t *copy, uint32_t offset, bool found_minus) {
|
||||
return !really_parse_number(copy, pj, offset, found_minus);
|
||||
}
|
||||
WARN_UNUSED really_inline bool parse_number(bool found_minus) {
|
||||
return parse_number(buf, idx, found_minus);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_atom(const uint8_t *copy, uint32_t offset) {
|
||||
switch (c) {
|
||||
case 't':
|
||||
if (!is_valid_true_atom(copy + offset)) { return true; };
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(copy + offset)) { return true; }
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(copy + offset)) { return true; }
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
return false;
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool parse_atom() {
|
||||
return parse_atom(buf, idx);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline ret_address parse_value(const unified_machine_addresses &addresses, ret_address continue_state) {
|
||||
switch (c) {
|
||||
case '"':
|
||||
FAIL_IF( parse_string() );
|
||||
return continue_state;
|
||||
case 't': case 'f': case 'n':
|
||||
FAIL_IF( parse_atom() );
|
||||
return continue_state;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
FAIL_IF( parse_number(false) );
|
||||
return continue_state;
|
||||
case '-':
|
||||
FAIL_IF( parse_number(true) );
|
||||
return continue_state;
|
||||
case '{':
|
||||
FAIL_IF( push_scope(continue_state) );
|
||||
return addresses.object_begin;
|
||||
case '[':
|
||||
FAIL_IF( push_scope(continue_state) );
|
||||
return addresses.array_begin;
|
||||
default:
|
||||
return addresses.error;
|
||||
}
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline int finish() {
|
||||
// the string might not be NULL terminated.
|
||||
if ( i + 1 != pj.n_structural_indexes ) {
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
pop_root_scope();
|
||||
if (depth != 0) {
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
if (pj.containing_scope_offset[depth] != 0) {
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
|
||||
pj.valid = true;
|
||||
return set_error_code(SUCCESS);
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline int error() {
|
||||
/* We do not need the next line because this is done by pj.init(),
|
||||
* pessimistically.
|
||||
* pj.is_valid = false;
|
||||
* At this point in the code, we have all the time in the world.
|
||||
* Note that we know exactly where we are in the document so we could,
|
||||
* without any overhead on the processing code, report a specific
|
||||
* location.
|
||||
* We could even trigger special code paths to assess what happened
|
||||
* carefully,
|
||||
* all without any added cost. */
|
||||
if (depth >= pj.depth_capacity) {
|
||||
return set_error_code(DEPTH_ERROR);
|
||||
}
|
||||
switch (c) {
|
||||
case '"':
|
||||
return set_error_code(STRING_ERROR);
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
case '-':
|
||||
return set_error_code(NUMBER_ERROR);
|
||||
case 't':
|
||||
return set_error_code(T_ATOM_ERROR);
|
||||
case 'n':
|
||||
return set_error_code(N_ATOM_ERROR);
|
||||
case 'f':
|
||||
return set_error_code(F_ATOM_ERROR);
|
||||
default:
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline int start(ret_address finish_state) {
|
||||
pj.init(); // sets is_valid to false
|
||||
if (len > pj.byte_capacity) {
|
||||
return CAPACITY;
|
||||
}
|
||||
// Advance to the first character as soon as possible
|
||||
advance_char();
|
||||
// Push the root scope (there is always at least one scope)
|
||||
if (push_start_scope(finish_state, 'r')) {
|
||||
return DEPTH_ERROR;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
// Redefine FAIL_IF to use goto since it'll be used inside the function now
|
||||
#undef FAIL_IF
|
||||
#define FAIL_IF(EXPR) { if (EXPR) { goto error; } }
|
||||
|
||||
/************
|
||||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED int
|
||||
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
static constexpr unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||
structural_parser parser(buf, len, pj);
|
||||
int result = parser.start(addresses.finish);
|
||||
if (result) { return result; }
|
||||
|
||||
//
|
||||
// Read first value
|
||||
//
|
||||
switch (parser.c) {
|
||||
case '{':
|
||||
FAIL_IF( parser.push_start_scope(addresses.finish) );
|
||||
goto object_begin;
|
||||
case '[':
|
||||
FAIL_IF( parser.push_start_scope(addresses.finish) );
|
||||
goto array_begin;
|
||||
case '"':
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto finish;
|
||||
case 't': case 'f': case 'n':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_atom(copy, idx);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, false);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '-':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, true);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
default:
|
||||
goto error;
|
||||
}
|
||||
|
||||
//
|
||||
// Object parser states
|
||||
//
|
||||
object_begin:
|
||||
parser.advance_char();
|
||||
switch (parser.c) {
|
||||
case '"': {
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}':
|
||||
goto scope_end; /* could also go to object_continue */
|
||||
goto scope_end; // could also go to object_continue
|
||||
default:
|
||||
goto fail;
|
||||
goto error;
|
||||
}
|
||||
|
||||
object_key_state:
|
||||
UPDATE_CHAR();
|
||||
if (c != ':') {
|
||||
goto fail;
|
||||
}
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
if (!parse_number(buf, pj, idx, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, pj, idx, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '{': {
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
/* we have not yet encountered } so we need to come back for it */
|
||||
SET_GOTO_OBJECT_CONTINUE()
|
||||
/* we found an object inside an object, so we need to increment the
|
||||
* depth */
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
FAIL_IF( parser.advance_char() != ':' );
|
||||
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
/* we have not yet encountered } so we need to come back for it */
|
||||
SET_GOTO_OBJECT_CONTINUE()
|
||||
/* we found an array inside an object, so we need to increment the depth
|
||||
*/
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
goto array_begin;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
parser.advance_char();
|
||||
GOTO( parser.parse_value(addresses, addresses.object_continue) );
|
||||
|
||||
object_continue:
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
UPDATE_CHAR();
|
||||
if (c != '"') {
|
||||
goto fail;
|
||||
} else {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
FAIL_IF( parser.advance_char() != '"' );
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_state;
|
||||
case '}':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto fail;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*//////////////////////////// COMMON STATE ///////////////////////////*/
|
||||
|
||||
scope_end:
|
||||
/* write our tape location to the header scope */
|
||||
depth--;
|
||||
pj.write_tape(pj.containing_scope_offset[depth], c);
|
||||
pj.annotate_previous_loc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
/* goto saved_state */
|
||||
GOTO_CONTINUE()
|
||||
CONTINUE( parser.pop_scope() );
|
||||
|
||||
/*//////////////////////////// ARRAY STATES ///////////////////////////*/
|
||||
//
|
||||
// Array parser states
|
||||
//
|
||||
array_begin:
|
||||
UPDATE_CHAR();
|
||||
if (c == ']') {
|
||||
goto scope_end; /* could also go to array_continue */
|
||||
if (parser.advance_char() == ']') {
|
||||
goto scope_end; // could also go to array_continue
|
||||
}
|
||||
|
||||
main_array_switch:
|
||||
/* we call update char on all paths in, so we can peek at c on the
|
||||
/* we call update char on all paths in, so we can peek at parser.c on the
|
||||
* on paths that can accept a close square brace (post-, and at start) */
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break; /* goto array_continue; */
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
if (!parse_number(buf, pj, idx, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break; /* goto array_continue; */
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, pj, idx, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break; /* goto array_continue; */
|
||||
}
|
||||
case '{': {
|
||||
/* we have not yet encountered ] so we need to come back for it */
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
SET_GOTO_ARRAY_CONTINUE()
|
||||
/* we found an object inside an array, so we need to increment the depth
|
||||
*/
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
/* we have not yet encountered ] so we need to come back for it */
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
SET_GOTO_ARRAY_CONTINUE()
|
||||
/* we found an array inside an array, so we need to increment the depth
|
||||
*/
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
goto array_begin;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
GOTO( parser.parse_value(addresses, addresses.array_continue) );
|
||||
|
||||
array_continue:
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
UPDATE_CHAR();
|
||||
parser.advance_char();
|
||||
goto main_array_switch;
|
||||
case ']':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto fail;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*//////////////////////////// FINAL STATES ///////////////////////////*/
|
||||
finish:
|
||||
return parser.finish();
|
||||
|
||||
succeed:
|
||||
depth--;
|
||||
if (depth != 0) {
|
||||
fprintf(stderr, "internal bug\n");
|
||||
abort();
|
||||
}
|
||||
if (pj.containing_scope_offset[depth] != 0) {
|
||||
fprintf(stderr, "internal bug\n");
|
||||
abort();
|
||||
}
|
||||
pj.annotate_previous_loc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */
|
||||
|
||||
pj.valid = true;
|
||||
pj.error_code = simdjson::SUCCESS;
|
||||
return pj.error_code;
|
||||
fail:
|
||||
/* we do not need the next line because this is done by pj.init(),
|
||||
* pessimistically.
|
||||
* pj.is_valid = false;
|
||||
* At this point in the code, we have all the time in the world.
|
||||
* Note that we know exactly where we are in the document so we could,
|
||||
* without any overhead on the processing code, report a specific
|
||||
* location.
|
||||
* We could even trigger special code paths to assess what happened
|
||||
* carefully,
|
||||
* all without any added cost. */
|
||||
if (depth >= pj.depth_capacity) {
|
||||
pj.error_code = simdjson::DEPTH_ERROR;
|
||||
return pj.error_code;
|
||||
}
|
||||
switch (c) {
|
||||
case '"':
|
||||
pj.error_code = simdjson::STRING_ERROR;
|
||||
return pj.error_code;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
case '-':
|
||||
pj.error_code = simdjson::NUMBER_ERROR;
|
||||
return pj.error_code;
|
||||
case 't':
|
||||
pj.error_code = simdjson::T_ATOM_ERROR;
|
||||
return pj.error_code;
|
||||
case 'n':
|
||||
pj.error_code = simdjson::N_ATOM_ERROR;
|
||||
return pj.error_code;
|
||||
case 'f':
|
||||
pj.error_code = simdjson::F_ATOM_ERROR;
|
||||
return pj.error_code;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
pj.error_code = simdjson::TAPE_ERROR;
|
||||
return pj.error_code;
|
||||
error:
|
||||
return parser.error();
|
||||
}
|
||||
|
||||
} // namespace stage2
|
|
@ -1,497 +1,161 @@
|
|||
namespace stage2 {
|
||||
|
||||
struct streaming_structural_parser: structural_parser {
|
||||
really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_pj, size_t _i) : structural_parser(_buf, _len, _pj, _i) {}
|
||||
|
||||
// override to add streaming
|
||||
WARN_UNUSED really_inline int start(ret_address finish_parser) {
|
||||
pj.init(); // sets is_valid to false
|
||||
// Capacity ain't no thang for streaming, so we don't check it.
|
||||
// Advance to the first character as soon as possible
|
||||
advance_char();
|
||||
// Push the root scope (there is always at least one scope)
|
||||
if (push_start_scope(finish_parser, 'r')) {
|
||||
return DEPTH_ERROR;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
// override to add streaming
|
||||
WARN_UNUSED really_inline int finish() {
|
||||
/* the string might not be NULL terminated. */
|
||||
if ( i + 1 > pj.n_structural_indexes ) {
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
bool finished = i + 1 == pj.n_structural_indexes;
|
||||
if (finished && buf[idx+2] != '\0') {
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
pop_root_scope();
|
||||
if (depth != 0) {
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
if (pj.containing_scope_offset[depth] != 0) {
|
||||
return set_error_code(TAPE_ERROR);
|
||||
}
|
||||
|
||||
pj.valid = true;
|
||||
return set_error_code(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
|
||||
}
|
||||
};
|
||||
|
||||
/************
|
||||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED int
|
||||
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
|
||||
size_t i{next_json}; /* index of the structural character (0,1,2,3...) */
|
||||
size_t idx; /* location of the structural character in the input (buf) */
|
||||
uint8_t c; /* used to track the (structural) character we are looking at,
|
||||
updated */
|
||||
/* by UPDATE_CHAR macro */
|
||||
size_t depth = 0; /* could have an arbitrary starting depth */
|
||||
pj.init(); /* sets is_valid to false */
|
||||
/*//////////////////////////// START STATE /////////////////////////////
|
||||
*/
|
||||
SET_GOTO_START_CONTINUE()
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */
|
||||
/* the root is used, if nothing else, to capture the size of the tape */
|
||||
depth++; /* everything starts at depth = 1, depth = 0 is just for the
|
||||
root, the root may contain an object, an array or something
|
||||
else. */
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
static constexpr unified_machine_addresses addresses = INIT_ADDRESSES();
|
||||
streaming_structural_parser parser(buf, len, pj, next_json);
|
||||
int result = parser.start(addresses.finish);
|
||||
if (result) { return result; }
|
||||
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '{':
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
SET_GOTO_START_CONTINUE();
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(
|
||||
0, c); /* strangely, moving this to object_begin slows things down */
|
||||
goto object_begin;
|
||||
case '[':
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
SET_GOTO_START_CONTINUE();
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
goto array_begin;
|
||||
/* #define SIMDJSON_ALLOWANYTHINGINROOT
|
||||
* A JSON text is a serialized value. Note that certain previous
|
||||
* specifications of JSON constrained a JSON text to be an object or an
|
||||
* array. Implementations that generate only objects or arrays where a
|
||||
* JSON text is called for will be interoperable in the sense that all
|
||||
* implementations will accept these as conforming JSON texts.
|
||||
* https://tools.ietf.org/html/rfc8259
|
||||
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't': {
|
||||
/* we need to make a copy to make sure that the string is space
|
||||
* terminated.
|
||||
* this only applies to the JSON document made solely of the true value.
|
||||
* this will almost never be called in practice */
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = ' ';
|
||||
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
}
|
||||
case 'f': {
|
||||
/* we need to make a copy to make sure that the string is space
|
||||
* terminated.
|
||||
* this only applies to the JSON document made solely of the false
|
||||
* value.
|
||||
* this will almost never be called in practice */
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = ' ';
|
||||
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
}
|
||||
case 'n': {
|
||||
/* we need to make a copy to make sure that the string is space
|
||||
* terminated.
|
||||
* this only applies to the JSON document made solely of the null value.
|
||||
* this will almost never be called in practice */
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = ' ';
|
||||
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
}
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
/* we need to make a copy to make sure that the string is space
|
||||
* terminated.
|
||||
* this is done only for JSON documents made of a sole number
|
||||
* this will almost never be called in practice. We terminate with a
|
||||
* space
|
||||
* because we do not want to allow NULLs in the middle of a number
|
||||
* (whereas a
|
||||
* space in the middle of a number would be identified in stage 1). */
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = ' ';
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx,
|
||||
false)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
break;
|
||||
}
|
||||
case '-': {
|
||||
/* we need to make a copy to make sure that the string is NULL
|
||||
* terminated.
|
||||
* this is done only for JSON documents made of a sole number
|
||||
* this will almost never be called in practice */
|
||||
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = ' ';
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
free(copy);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
start_continue:
|
||||
/* the string might not be NULL terminated. */
|
||||
if (i + 1 == pj.n_structural_indexes && buf[idx+2] == '\0') {
|
||||
goto succeed;
|
||||
} else if(depth == 1 && i<=pj.n_structural_indexes) {
|
||||
goto succeedAndHasMore;
|
||||
} else {
|
||||
goto fail;
|
||||
}
|
||||
/*//////////////////////////// OBJECT STATES ///////////////////////////*/
|
||||
//
|
||||
// Read first value
|
||||
//
|
||||
switch (parser.c) {
|
||||
case '{':
|
||||
FAIL_IF( parser.push_start_scope(addresses.finish) );
|
||||
goto object_begin;
|
||||
case '[':
|
||||
FAIL_IF( parser.push_start_scope(addresses.finish) );
|
||||
goto array_begin;
|
||||
case '"':
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto finish;
|
||||
case 't': case 'f': case 'n':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_atom(copy, idx);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, false);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
case '-':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](auto copy, auto idx) {
|
||||
return parser.parse_number(copy, idx, true);
|
||||
})
|
||||
);
|
||||
goto finish;
|
||||
default:
|
||||
goto error;
|
||||
}
|
||||
|
||||
object_begin:
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}':
|
||||
goto scope_end; /* could also go to object_continue */
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
//
|
||||
// Object parser parsers
|
||||
//
|
||||
object_begin:
|
||||
parser.advance_char();
|
||||
switch (parser.c) {
|
||||
case '"': {
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_parser;
|
||||
}
|
||||
case '}':
|
||||
goto scope_end; // could also go to object_continue
|
||||
default:
|
||||
goto error;
|
||||
}
|
||||
|
||||
object_key_state:
|
||||
UPDATE_CHAR();
|
||||
if (c != ':') {
|
||||
goto fail;
|
||||
}
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
if (!parse_number(buf, pj, idx, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, pj, idx, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '{': {
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
/* we have not yet encountered } so we need to come back for it */
|
||||
SET_GOTO_OBJECT_CONTINUE()
|
||||
/* we found an object inside an object, so we need to increment the
|
||||
* depth */
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
object_key_parser:
|
||||
FAIL_IF( parser.advance_char() != ':' );
|
||||
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
/* we have not yet encountered } so we need to come back for it */
|
||||
SET_GOTO_OBJECT_CONTINUE()
|
||||
/* we found an array inside an object, so we need to increment the depth
|
||||
*/
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
goto array_begin;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
parser.advance_char();
|
||||
GOTO( parser.parse_value(addresses, addresses.object_continue) );
|
||||
|
||||
object_continue:
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case ',':
|
||||
UPDATE_CHAR();
|
||||
if (c != '"') {
|
||||
goto fail;
|
||||
} else {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
object_continue:
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
FAIL_IF( parser.advance_char() != '"' );
|
||||
FAIL_IF( parser.parse_string() );
|
||||
goto object_key_parser;
|
||||
case '}':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*//////////////////////////// COMMON STATE ///////////////////////////*/
|
||||
scope_end:
|
||||
CONTINUE( parser.pop_scope() );
|
||||
|
||||
scope_end:
|
||||
/* write our tape location to the header scope */
|
||||
depth--;
|
||||
pj.write_tape(pj.containing_scope_offset[depth], c);
|
||||
pj.annotate_previous_loc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
/* goto saved_state */
|
||||
GOTO_CONTINUE()
|
||||
//
|
||||
// Array parser parsers
|
||||
//
|
||||
array_begin:
|
||||
if (parser.advance_char() == ']') {
|
||||
goto scope_end; // could also go to array_continue
|
||||
}
|
||||
|
||||
/*//////////////////////////// ARRAY STATES ///////////////////////////*/
|
||||
array_begin:
|
||||
UPDATE_CHAR();
|
||||
if (c == ']') {
|
||||
goto scope_end; /* could also go to array_continue */
|
||||
}
|
||||
main_array_switch:
|
||||
/* we call update char on all paths in, so we can peek at parser.c on the
|
||||
* on paths that can accept a close square brace (post-, and at start) */
|
||||
GOTO( parser.parse_value(addresses, addresses.array_continue) );
|
||||
|
||||
main_array_switch:
|
||||
/* we call update char on all paths in, so we can peek at c on the
|
||||
* on paths that can accept a close square brace (post-, and at start) */
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break; /* goto array_continue; */
|
||||
array_continue:
|
||||
switch (parser.advance_char()) {
|
||||
case ',':
|
||||
parser.advance_char();
|
||||
goto main_array_switch;
|
||||
case ']':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto error;
|
||||
}
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
if (!parse_number(buf, pj, idx, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break; /* goto array_continue; */
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, pj, idx, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break; /* goto array_continue; */
|
||||
}
|
||||
case '{': {
|
||||
/* we have not yet encountered ] so we need to come back for it */
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
SET_GOTO_ARRAY_CONTINUE()
|
||||
/* we found an object inside an array, so we need to increment the depth
|
||||
*/
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
finish:
|
||||
next_json = parser.i;
|
||||
return parser.finish();
|
||||
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
/* we have not yet encountered ] so we need to come back for it */
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); /* here the compilers knows what c is so this gets
|
||||
optimized */
|
||||
SET_GOTO_ARRAY_CONTINUE()
|
||||
/* we found an array inside an array, so we need to increment the depth
|
||||
*/
|
||||
depth++;
|
||||
if (depth >= pj.depth_capacity) {
|
||||
goto fail;
|
||||
}
|
||||
goto array_begin;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
array_continue:
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case ',':
|
||||
UPDATE_CHAR();
|
||||
goto main_array_switch;
|
||||
case ']':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*//////////////////////////// FINAL STATES ///////////////////////////*/
|
||||
succeedAndHasMore:
|
||||
depth--;
|
||||
if (pj.containing_scope_offset[depth] != 0) {
|
||||
fprintf(stderr, "internal bug\n");
|
||||
abort();
|
||||
}
|
||||
pj.annotate_previous_loc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */
|
||||
|
||||
|
||||
next_json = i;
|
||||
|
||||
pj.valid = true;
|
||||
pj.error_code = simdjson::SUCCESS_AND_HAS_MORE;
|
||||
return pj.error_code;
|
||||
|
||||
succeed:
|
||||
depth--;
|
||||
if (depth != 0) {
|
||||
fprintf(stderr, "internal bug\n");
|
||||
abort();
|
||||
}
|
||||
if (pj.containing_scope_offset[depth] != 0) {
|
||||
fprintf(stderr, "internal bug\n");
|
||||
abort();
|
||||
}
|
||||
pj.annotate_previous_loc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */
|
||||
|
||||
pj.valid = true;
|
||||
pj.error_code = simdjson::SUCCESS;
|
||||
return pj.error_code;
|
||||
fail:
|
||||
/* we do not need the next line because this is done by pj.init(),
|
||||
* pessimistically.
|
||||
* pj.is_valid = false;
|
||||
* At this point in the code, we have all the time in the world.
|
||||
* Note that we know exactly where we are in the document so we could,
|
||||
* without any overhead on the processing code, report a specific
|
||||
* location.
|
||||
* We could even trigger special code paths to assess what happened
|
||||
* carefully,
|
||||
* all without any added cost. */
|
||||
if (depth >= pj.depth_capacity) {
|
||||
pj.error_code = simdjson::DEPTH_ERROR;
|
||||
return pj.error_code;
|
||||
}
|
||||
switch (c) {
|
||||
case '"':
|
||||
pj.error_code = simdjson::STRING_ERROR;
|
||||
return pj.error_code;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
case '-':
|
||||
pj.error_code = simdjson::NUMBER_ERROR;
|
||||
return pj.error_code;
|
||||
case 't':
|
||||
pj.error_code = simdjson::T_ATOM_ERROR;
|
||||
return pj.error_code;
|
||||
case 'n':
|
||||
pj.error_code = simdjson::N_ATOM_ERROR;
|
||||
return pj.error_code;
|
||||
case 'f':
|
||||
pj.error_code = simdjson::F_ATOM_ERROR;
|
||||
return pj.error_code;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
pj.error_code = simdjson::TAPE_ERROR;
|
||||
return pj.error_code;
|
||||
error:
|
||||
return parser.error();
|
||||
}
|
||||
|
||||
} // namespace stage2
|
|
@ -24,13 +24,13 @@ namespace simdjson {
|
|||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
return haswell::unified_machine(buf, len, pj);
|
||||
return haswell::stage2::unified_machine(buf, len, pj);
|
||||
}
|
||||
|
||||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
|
||||
return haswell::unified_machine(buf, len, pj, next_json);
|
||||
return haswell::stage2::unified_machine(buf, len, pj, next_json);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
|
|
@ -24,13 +24,13 @@ namespace simdjson {
|
|||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
return westmere::unified_machine(buf, len, pj);
|
||||
return westmere::stage2::unified_machine(buf, len, pj);
|
||||
}
|
||||
|
||||
template <>
|
||||
WARN_UNUSED int
|
||||
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
|
||||
return westmere::unified_machine(buf, len, pj, next_json);
|
||||
return westmere::stage2::unified_machine(buf, len, pj, next_json);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue