Fixing issue. (#193)
This commit is contained in:
parent
8914b12db5
commit
d7f7f1b200
|
@ -92,6 +92,7 @@ padded_string p = get_corpus(filename);
|
|||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
// something went wrong
|
||||
std::cout << pj.getErrorMsg() << std::endl;
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -127,6 +128,7 @@ std::string mystring = ... //
|
|||
ParsedJson pj = build_parsed_json(mystring); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
// something went wrong
|
||||
std::cout << pj.getErrorMsg() << std::endl;
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -148,6 +150,7 @@ int main(int argc, char *argv[]) {
|
|||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
std::cout << "not valid" << std::endl;
|
||||
std::cout << pj.getErrorMsg() << std::endl;
|
||||
} else {
|
||||
std::cout << "valid" << std::endl;
|
||||
}
|
||||
|
|
|
@ -144,7 +144,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
|
||||
}
|
||||
unified.start();
|
||||
isok = find_structural_bits(p.data(), p.size(), pj);
|
||||
isok = (find_structural_bits(p.data(), p.size(), pj) == simdjson::SUCCESS);
|
||||
unified.end(results);
|
||||
cy1 += results[0];
|
||||
cl1 += results[1];
|
||||
|
@ -185,18 +185,20 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
isok = find_structural_bits(p.data(), p.size(), pj);
|
||||
isok = (find_structural_bits(p.data(), p.size(), pj) == simdjson::SUCCESS);
|
||||
isok = isok && (simdjson::SUCCESS == unified_machine(p.data(), p.size(), pj));
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> secs = end - start;
|
||||
res[i] = secs.count();
|
||||
if(! isok) {
|
||||
std::cerr << pj.getErrorMsg() << std::endl;
|
||||
std::cerr << "Could not parse. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
|
||||
if (!pj.isValid()) {
|
||||
std::cerr << pj.getErrorMsg() << std::endl;
|
||||
std::cerr << "Could not parse. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
|
|
@ -180,7 +180,7 @@ int main(int argc, char *argv[]) {
|
|||
results.resize(evts.size());
|
||||
for (uint32_t i = 0; i < iterations; i++) {
|
||||
unified.start();
|
||||
bool isok = find_structural_bits(p.data(), p.size(), pj);
|
||||
bool isok = (find_structural_bits(p.data(), p.size(), pj) == simdjson::SUCCESS);
|
||||
unified.end(results);
|
||||
|
||||
cy1 += results[0];
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
// these are the chars that can follow a true/false/null or number atom
|
||||
// and nothing else
|
||||
const uint32_t structural_or_whitespace_negated[256] = {
|
||||
const uint32_t structural_or_whitespace_or_null_negated[256] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
|
||||
|
@ -28,13 +28,37 @@ const uint32_t structural_or_whitespace_negated[256] = {
|
|||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
// return non-zero if not a structural or whitespace char
|
||||
// zero otherwise
|
||||
really_inline uint32_t is_not_structural_or_whitespace_or_null(uint8_t c) {
|
||||
return structural_or_whitespace_or_null_negated[c];
|
||||
}
|
||||
|
||||
|
||||
const uint32_t structural_or_whitespace_negated[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
// return non-zero if not a structural or whitespace char
|
||||
// zero otherwise
|
||||
really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
|
||||
return structural_or_whitespace_negated[c];
|
||||
}
|
||||
|
||||
const uint32_t structural_or_whitespace[256] = {
|
||||
const uint32_t structural_or_whitespace_or_null[256] = {
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -47,6 +71,24 @@ const uint32_t structural_or_whitespace[256] = {
|
|||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
really_inline uint32_t is_structural_or_whitespace_or_null(uint8_t c) {
|
||||
return structural_or_whitespace_or_null[c];
|
||||
}
|
||||
|
||||
|
||||
const uint32_t structural_or_whitespace[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
|
||||
return structural_or_whitespace[c];
|
||||
}
|
||||
|
|
|
@ -90,7 +90,7 @@ static inline bool is_integer(char c) {
|
|||
// probably frequent and it is hard than it looks. We are building all of this
|
||||
// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)...
|
||||
const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
|
||||
|
@ -103,7 +103,7 @@ const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
|
|||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
really_inline bool
|
||||
is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(unsigned char c) {
|
||||
is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
||||
return structural_or_whitespace_or_exponent_or_decimal_negated[c];
|
||||
}
|
||||
|
||||
|
@ -380,6 +380,12 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
|
||||
// parse the number at buf + offset
|
||||
// define JSON_TEST_NUMBERS for unit testing
|
||||
//
|
||||
// It is assumed that the number is followed by a structural ({,},],[) character
|
||||
// or a white space character. If that is not the case (e.g., when the JSON document
|
||||
// is made of a single number), then it is necessary to copy the content and append
|
||||
// a space before calling this function.
|
||||
//
|
||||
static really_inline bool parse_number(const uint8_t *const buf,
|
||||
ParsedJson &pj,
|
||||
const uint32_t offset,
|
||||
|
@ -405,7 +411,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
uint64_t i; // an unsigned int avoids signed overflows (which are bad)
|
||||
if (*p == '0') { // 0 cannot be followed by an integer
|
||||
++p;
|
||||
if (is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(*p)) {
|
||||
if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
|
@ -430,7 +436,6 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t exponent = 0;
|
||||
bool is_float = false;
|
||||
if ('.' == *p) {
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include "simdjson/simdjson.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsonformatutils.h"
|
||||
#include "simdjson/portability.h"
|
||||
|
@ -34,8 +34,16 @@ public:
|
|||
WARN_UNUSED
|
||||
bool allocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH);
|
||||
|
||||
// returns true if the document parsed was valid
|
||||
bool isValid() const;
|
||||
|
||||
// return an error code corresponding to the last parsing attempt, see simdjson.h
|
||||
// will return simdjson::UNITIALIZED if no parsing was attempted
|
||||
int getErrorCode() const;
|
||||
|
||||
// return the string equivalent of "getErrorCode"
|
||||
std::string getErrorMsg() const;
|
||||
|
||||
// deallocate memory and set capacity to zero, called automatically by the
|
||||
// destructor
|
||||
void deallocate();
|
||||
|
@ -297,6 +305,7 @@ private:
|
|||
uint8_t *string_buf; // should be at least bytecapacity
|
||||
uint8_t *current_string_buf_loc;
|
||||
bool isvalid{false};
|
||||
int errorcode{simdjson::UNITIALIZED};
|
||||
|
||||
private :
|
||||
|
||||
|
|
|
@ -15,7 +15,12 @@ struct simdjson {
|
|||
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
|
||||
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
|
||||
NUMBER_ERROR, // Problem while parsing a number
|
||||
UTF8_ERROR // the input is not valid UTF-8
|
||||
UTF8_ERROR, // the input is not valid UTF-8
|
||||
UNITIALIZED, // unknown error, or uninitialized document
|
||||
EMPTY, // no structural document found
|
||||
UNESCAPED_CHARS, // found unescaped characters in a string.
|
||||
UNCLOSED_STRING, // missing quote at the end
|
||||
UNEXPECTED_ERROR // indicative of a bug in simdjson
|
||||
};
|
||||
static const std::string& errorMsg(const int);
|
||||
};
|
||||
|
|
|
@ -6,9 +6,9 @@
|
|||
struct ParsedJson;
|
||||
|
||||
WARN_UNUSED
|
||||
bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
int find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
WARN_UNUSED
|
||||
bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj);
|
||||
int find_structural_bits(const char *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
44
|
Binary file not shown.
Binary file not shown.
|
@ -42,10 +42,11 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
|
|||
reallocated = true;
|
||||
}
|
||||
}
|
||||
bool stage1_is_ok = find_structural_bits(buf, len, pj);
|
||||
if(!stage1_is_ok) {
|
||||
return simdjson::UTF8_ERROR;
|
||||
}
|
||||
int stage1_is_ok = find_structural_bits(buf, len, pj);
|
||||
if(stage1_is_ok != simdjson::SUCCESS) {
|
||||
pj.errorcode = stage1_is_ok;
|
||||
return pj.errorcode;
|
||||
}
|
||||
int res = unified_machine(buf, len, pj);
|
||||
if(reallocated) { aligned_free((void*)buf);}
|
||||
return res;
|
||||
|
@ -56,9 +57,7 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
|
|||
ParsedJson pj;
|
||||
bool ok = pj.allocateCapacity(len);
|
||||
if(ok) {
|
||||
int res = json_parse(buf, len, pj, reallocifneeded);
|
||||
ok = res == simdjson::SUCCESS;
|
||||
assert(ok == pj.isValid());
|
||||
(void)json_parse(buf, len, pj, reallocifneeded);
|
||||
} else {
|
||||
std::cerr << "failure during memory allocation " << std::endl;
|
||||
}
|
||||
|
|
|
@ -92,6 +92,14 @@ bool ParsedJson::isValid() const {
|
|||
return isvalid;
|
||||
}
|
||||
|
||||
int ParsedJson::getErrorCode() const {
|
||||
return errorcode;
|
||||
}
|
||||
|
||||
std::string ParsedJson::getErrorMsg() const {
|
||||
return simdjson::errorMsg(errorcode);
|
||||
}
|
||||
|
||||
void ParsedJson::deallocate() {
|
||||
bytecapacity = 0;
|
||||
depthcapacity = 0;
|
||||
|
|
|
@ -11,7 +11,11 @@ const std::map<int, const std::string> errorStrings = {
|
|||
{simdjson::F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
|
||||
{simdjson::N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
|
||||
{simdjson::NUMBER_ERROR, "Problem while parsing a number"},
|
||||
{simdjson::UTF8_ERROR, "The input is not valid UTF-8"}
|
||||
{simdjson::UTF8_ERROR, "The input is not valid UTF-8"},
|
||||
{simdjson::UNITIALIZED, "Unitialized"},
|
||||
{simdjson::EMPTY, "Empty"},
|
||||
{simdjson::UNESCAPED_CHARS, "Within strings, some characters must be escapted, we found unescapted characters"},
|
||||
{simdjson::UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson"},
|
||||
};
|
||||
|
||||
const std::string& simdjson::errorMsg(const int errorCode) {
|
||||
|
|
|
@ -609,7 +609,7 @@ really_inline uint64_t finalize_structurals(
|
|||
// following it.
|
||||
|
||||
// a qualified predecessor is something that can happen 1 position before an
|
||||
// psuedo-structural character
|
||||
// pseudo-structural character
|
||||
uint64_t pseudo_pred = structurals | whitespace;
|
||||
|
||||
uint64_t shifted_pseudo_pred =
|
||||
|
@ -626,13 +626,13 @@ really_inline uint64_t finalize_structurals(
|
|||
}
|
||||
|
||||
WARN_UNUSED
|
||||
/*never_inline*/ bool find_structural_bits(const uint8_t *buf, size_t len,
|
||||
/*never_inline*/ int find_structural_bits(const uint8_t *buf, size_t len,
|
||||
ParsedJson &pj) {
|
||||
if (len > pj.bytecapacity) {
|
||||
std::cerr << "Your ParsedJson object only supports documents up to "
|
||||
<< pj.bytecapacity << " bytes but you are trying to process " << len
|
||||
<< " bytes" << std::endl;
|
||||
return false;
|
||||
return simdjson::CAPACITY;
|
||||
}
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
uint32_t base = 0;
|
||||
|
@ -740,7 +740,7 @@ WARN_UNUSED
|
|||
|
||||
// is last string quote closed?
|
||||
if (prev_iter_inside_quote) {
|
||||
return false;
|
||||
return simdjson::UNCLOSED_STRING;
|
||||
}
|
||||
|
||||
// finally, flatten out the remaining structurals from the last iteration
|
||||
|
@ -750,12 +750,12 @@ WARN_UNUSED
|
|||
// a valid JSON file cannot have zero structural indexes - we should have
|
||||
// found something
|
||||
if (pj.n_structural_indexes == 0u) {
|
||||
printf("wacky exit\n");
|
||||
return false;
|
||||
fprintf(stderr, "Empty document?\n");
|
||||
return simdjson::EMPTY;
|
||||
}
|
||||
if (base_ptr[pj.n_structural_indexes - 1] > len) {
|
||||
fprintf(stderr, "Internal bug\n");
|
||||
return false;
|
||||
return simdjson::UNEXPECTED_ERROR;
|
||||
}
|
||||
if (len != base_ptr[pj.n_structural_indexes - 1]) {
|
||||
// the string might not be NULL terminated, but we add a virtual NULL ending
|
||||
|
@ -765,16 +765,16 @@ printf("wacky exit\n");
|
|||
// make it safe to dereference one beyond this array
|
||||
base_ptr[pj.n_structural_indexes] = 0;
|
||||
if (error_mask) {
|
||||
printf("had error mask\n");
|
||||
return false;
|
||||
fprintf(stderr, "Unescaped characters\n");
|
||||
return simdjson::UNESCAPED_CHARS;
|
||||
}
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
return _mm256_testz_si256(has_error, has_error) != 0;
|
||||
return _mm256_testz_si256(has_error, has_error) == 0 ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
|
||||
#else
|
||||
return true;
|
||||
return simdjson::SUCCESS;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
int find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
|
||||
}
|
||||
|
|
|
@ -80,9 +80,10 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
uint8_t c; // used to track the (structural) character we are looking at, updated
|
||||
// by UPDATE_CHAR macro
|
||||
uint32_t depth = 0; // could have an arbitrary starting depth
|
||||
pj.init();
|
||||
pj.init(); // sets isvalid to false
|
||||
if(pj.bytecapacity < len) {
|
||||
return simdjson::CAPACITY;
|
||||
pj.errorcode = simdjson::CAPACITY;
|
||||
return pj.errorcode;
|
||||
}
|
||||
// this macro reads the next structural character, updating idx, i and c.
|
||||
#define UPDATE_CHAR() \
|
||||
|
@ -149,7 +150,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
break;
|
||||
}
|
||||
case 't': {
|
||||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// we need to make a copy to make sure that the string is space terminated.
|
||||
// this only applies to the JSON document made solely of the true value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
|
@ -157,7 +158,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
copy[len] = ' ';
|
||||
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
|
@ -167,7 +168,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
break;
|
||||
}
|
||||
case 'f': {
|
||||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// we need to make a copy to make sure that the string is space terminated.
|
||||
// this only applies to the JSON document made solely of the false value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
|
@ -175,7 +176,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
copy[len] = ' ';
|
||||
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
|
@ -185,7 +186,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
break;
|
||||
}
|
||||
case 'n': {
|
||||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// we need to make a copy to make sure that the string is space terminated.
|
||||
// this only applies to the JSON document made solely of the null value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
|
@ -193,7 +194,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
copy[len] = ' ';
|
||||
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
|
@ -212,15 +213,17 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// we need to make a copy to make sure that the string is space terminated.
|
||||
// this is done only for JSON documents made of a sole number
|
||||
// this will almost never be called in practice
|
||||
// this will almost never be called in practice. We terminate with a space
|
||||
// because we do not want to allow NULLs in the middle of a number (whereas a
|
||||
// space in the middle of a number would be identified in stage 1).
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) {
|
||||
goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
copy[len] = ' ';
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
|
@ -522,22 +525,25 @@ succeed:
|
|||
pj.get_current_loc());
|
||||
pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
|
||||
|
||||
|
||||
|
||||
pj.isvalid = true;
|
||||
return simdjson::SUCCESS;
|
||||
pj.errorcode = simdjson::SUCCESS;
|
||||
return pj.errorcode;
|
||||
fail:
|
||||
// we do not need the next line because this is done by pj.init(), pessimistically.
|
||||
// pj.isvalid = false;
|
||||
// At this point in the code, we have all the time in the world.
|
||||
// Note that we know exactly where we are in the document so we could,
|
||||
// without any overhead on the processing code, report a specific location.
|
||||
// We could even trigger special code paths to assess what happened carefully,
|
||||
// all without any added cost.
|
||||
if (depth >= pj.depthcapacity) {
|
||||
return simdjson::DEPTH_ERROR;
|
||||
pj.errorcode = simdjson::DEPTH_ERROR;
|
||||
return pj.errorcode;
|
||||
}
|
||||
switch(c) {
|
||||
case '"':
|
||||
return simdjson::STRING_ERROR;
|
||||
case '"':
|
||||
pj.errorcode = simdjson::STRING_ERROR;
|
||||
return pj.errorcode;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
|
@ -549,17 +555,22 @@ fail:
|
|||
case '8':
|
||||
case '9':
|
||||
case '-':
|
||||
return simdjson::NUMBER_ERROR;
|
||||
pj.errorcode = simdjson::NUMBER_ERROR;
|
||||
return pj.errorcode;
|
||||
case 't':
|
||||
return simdjson::T_ATOM_ERROR;
|
||||
pj.errorcode = simdjson::T_ATOM_ERROR;
|
||||
return pj.errorcode;
|
||||
case 'n':
|
||||
return simdjson::N_ATOM_ERROR;
|
||||
pj.errorcode = simdjson::N_ATOM_ERROR;
|
||||
return pj.errorcode;
|
||||
case 'f':
|
||||
return simdjson::F_ATOM_ERROR;
|
||||
pj.errorcode = simdjson::F_ATOM_ERROR;
|
||||
return pj.errorcode;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return simdjson::TAPE_ERROR;
|
||||
pj.errorcode = simdjson::TAPE_ERROR;
|
||||
return pj.errorcode;
|
||||
}
|
||||
|
||||
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
|
|
Loading…
Reference in New Issue