From bf9b1b14576c5708549ec74a7a047be1d1c7c562 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 13 Mar 2019 21:02:39 -0400 Subject: [PATCH] New version (mostly setting the singleheader version in sync). --- CMakeLists.txt | 4 +- include/simdjson/simdjson_version.h | 4 +- singleheader/amalgamation_demo.cpp | 2 +- singleheader/simdjson.cpp | 99 +++++--- singleheader/simdjson.h | 355 +++++++++++++++++----------- 5 files changed, 297 insertions(+), 167 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffd047a5..cc8930fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,8 +11,8 @@ project(simdjson) set(SIMDJSON_LIB_NAME simdjson) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 1) -set(PROJECT_VERSION_PATCH 0) -set(SIMDJSON_LIB_VERSION "0.1.0" CACHE STRING "simdjson library version") +set(PROJECT_VERSION_PATCH 1) +set(SIMDJSON_LIB_VERSION "0.1.1" CACHE STRING "simdjson library version") set(SIMDJSON_LIB_SOVERSION "0" CACHE STRING "simdjson library soversion") if(NOT MSVC) diff --git a/include/simdjson/simdjson_version.h b/include/simdjson/simdjson_version.h index 639ba72b..effee1ff 100644 --- a/include/simdjson/simdjson_version.h +++ b/include/simdjson/simdjson_version.h @@ -1,10 +1,10 @@ // /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION #define SIMDJSON_INCLUDE_SIMDJSON_VERSION -#define SIMDJSON_VERSION 0.1.0 +#define SIMDJSON_VERSION 0.1.1 enum { SIMDJSON_VERSION_MAJOR = 0, SIMDJSON_VERSION_MINOR = 1, - SIMDJSON_VERSION_REVISION = 0 + SIMDJSON_VERSION_REVISION = 1 }; #endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION diff --git a/singleheader/amalgamation_demo.cpp b/singleheader/amalgamation_demo.cpp index 64a8421a..24d37b45 100644 --- a/singleheader/amalgamation_demo.cpp +++ b/singleheader/amalgamation_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Wed 13 Mar 2019 20:02:04 EDT. Do not edit! */ +/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */ #include #include "simdjson.h" diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index dca49ed2..2a85f24d 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Wed 13 Mar 2019 20:02:04 EDT. Do not edit! */ +/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */ #include "simdjson.h" /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ @@ -391,6 +391,16 @@ really_inline uint64_t cmp_mask_against_input(__m256i input_lo, return res_0 | (res_1 << 32); } +// find all values less than or equal than the content of maxval (using unsigned arithmetic) +really_inline uint64_t unsigned_lteq_against_input(__m256i input_lo, + __m256i input_hi, __m256i maxval) { + __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,input_lo),maxval); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,input_hi),maxval); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); +} + // return a bitvector indicating where we have characters that end an odd-length // sequence of backslashes (and thus change the behavior of the next character // to follow). A even-length sequence of backslashes, and, for that matter, the @@ -449,13 +459,21 @@ find_odd_backslash_sequences(__m256i input_lo, __m256i input_hi, // backslash sequences (of any length) will be detected elsewhere. really_inline uint64_t find_quote_mask_and_bits( __m256i input_lo, __m256i input_hi, uint64_t odd_ends, - uint64_t &prev_iter_inside_quote, uint64_t "e_bits) { + uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t &error_mask) { quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); quote_bits = quote_bits & ~odd_ends; + // remove from the valid quoted region the unescapted characters. uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); quote_mask ^= prev_iter_inside_quote; + // All Unicode characters may be placed within the + // quotation marks, except for the characters that MUST be escaped: + // quotation mark, reverse solidus, and the control characters (U+0000 + //through U+001F). + // https://tools.ietf.org/html/rfc8259 + uint64_t unescaped = unsigned_lteq_against_input(input_lo, input_hi, _mm256_set1_epi8(0x1F)); + error_mask |= quote_mask & unescaped; // right shift of a signed value expected to be well-defined and standard // compliant as of C++20, // John Regher from Utah U. says this is fine code @@ -558,11 +576,9 @@ really_inline uint64_t finalize_structurals( uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) { // mask off anything inside quotes structurals &= ~quote_mask; - // add the real quote bits back into our bitmask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; - // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent @@ -574,6 +590,7 @@ really_inline uint64_t finalize_structurals( // a qualified predecessor is something that can happen 1 position before an // psuedo-structural character uint64_t pseudo_pred = structurals | whitespace; + uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; @@ -631,6 +648,7 @@ WARN_UNUSED size_t lenminus64 = len < 64 ? 0 : len - 64; size_t idx = 0; + uint64_t error_mask = 0; // for unescaped characters within strings (ASCII code points < 0x20) for (; idx < lenminus64; idx += 64) { #ifndef _MSC_VER @@ -653,7 +671,7 @@ WARN_UNUSED // themselves uint64_t quote_bits; uint64_t quote_mask = find_quote_mask_and_bits( - input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits); + input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); // take the previous iterations structural bits, not our current iteration, // and flatten @@ -694,7 +712,7 @@ WARN_UNUSED // themselves uint64_t quote_bits; uint64_t quote_mask = find_quote_mask_and_bits( - input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits); + input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); // take the previous iterations structural bits, not our current iteration, // and flatten @@ -729,7 +747,9 @@ WARN_UNUSED } // make it safe to dereference one beyond this array base_ptr[pj.n_structural_indexes] = 0; - + if (error_mask) { + return false; + } #ifdef SIMDJSON_UTF8VALIDATE return _mm256_testz_si256(has_error, has_error) != 0; #else @@ -1297,8 +1317,12 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) { std::cerr << "capacities must be non-zero " << std::endl; return false; } - if ((len <= bytecapacity) && (depthcapacity < maxdepth)) + if(len > SIMDJSON_MAXSIZE_BYTES) { + return false; + } + if ((len <= bytecapacity) && (depthcapacity < maxdepth)) { return true; + } deallocate(); isvalid = false; bytecapacity = 0; // will only set it to len after allocations are a success @@ -1306,7 +1330,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) { uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7; structural_indexes = new (std::nothrow) uint32_t[max_structures]; size_t localtapecapacity = ROUNDUP_N(len, 64); - size_t localstringcapacity = ROUNDUP_N(len + 32, 64); + // a document with only zero-length strings... could have len/3 string + // and we would need len/3 * 5 bytes on the string buffer + size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64); string_buf = new (std::nothrow) uint8_t[localstringcapacity]; tape = new (std::nothrow) uint64_t[localtapecapacity]; containing_scope_offset = new (std::nothrow) uint32_t[maxdepth]; @@ -1362,6 +1388,7 @@ bool ParsedJson::printjson(std::ostream &os) { if(!isvalid) { return false; } + uint32_t string_length; size_t tapeidx = 0; uint64_t tape_val = tape[tapeidx]; uint8_t type = (tape_val >> 56); @@ -1405,7 +1432,8 @@ bool ParsedJson::printjson(std::ostream &os) { switch (type) { case '"': // we have a string os << '"'; - print_with_escapes((const unsigned char *)(string_buf + payload)); + memcpy(&string_length,string_buf + payload, sizeof(uint32_t)); + print_with_escapes((const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length); os << '"'; break; case 'l': // we have a long int @@ -1474,8 +1502,10 @@ bool ParsedJson::printjson(std::ostream &os) { WARN_UNUSED bool ParsedJson::dump_raw_tape(std::ostream &os) { - if(!isvalid) { return false; -} + if(!isvalid) { + return false; + } + uint32_t string_length; size_t tapeidx = 0; uint64_t tape_val = tape[tapeidx]; uint8_t type = (tape_val >> 56); @@ -1498,7 +1528,8 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) { switch (type) { case '"': // we have a string os << "string \""; - print_with_escapes((const unsigned char *)(string_buf + payload)); + memcpy(&string_length,string_buf + payload, sizeof(uint32_t)); + print_with_escapes((const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length); os << '"'; os << '\n'; break; @@ -1553,6 +1584,7 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) { } /* end file src/parsedjson.cpp */ /* begin file src/parsedjsoniterator.cpp */ +#include ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) { if(pj.isValid()) { @@ -1659,24 +1691,32 @@ uint8_t ParsedJson::iterator::get_type() const { int64_t ParsedJson::iterator::get_integer() const { - if(location + 1 >= tape_length) { return 0;// default value in case of error -} + if(location + 1 >= tape_length) { + return 0;// default value in case of error + } return static_cast(pj.tape[location + 1]); } double ParsedJson::iterator::get_double() const { - if(location + 1 >= tape_length) { return NAN;// default value in case of error -} + if(location + 1 >= tape_length) { + return NAN;// default value in case of error + } double answer; memcpy(&answer, & pj.tape[location + 1], sizeof(answer)); return answer; } const char * ParsedJson::iterator::get_string() const { - return reinterpret_cast(pj.string_buf + (current_val & JSONVALUEMASK)) ; + return reinterpret_cast(pj.string_buf + (current_val & JSONVALUEMASK) + sizeof(uint32_t)) ; } +uint32_t ParsedJson::iterator::get_string_length() const { + uint32_t answer; + memcpy(&answer, reinterpret_cast(pj.string_buf + (current_val & JSONVALUEMASK)), sizeof(uint32_t)); + return answer; +} + bool ParsedJson::iterator::is_object_or_array() const { return is_object_or_array(get_type()); } @@ -1707,14 +1747,15 @@ bool ParsedJson::iterator::is_object_or_array(uint8_t type) { bool ParsedJson::iterator::move_to_key(const char * key) { if(down()) { - do { + do { assert(is_string()); - bool rightkey = (strcmp(get_string(),key)==0); + bool rightkey = (strcmp(get_string(),key)==0);// null chars would fool this next(); - if(rightkey) { return true; -} - } while(next()); - assert(up());// not found + if(rightkey) { + return true; + } + } while(next()); + assert(up());// not found } return false; } @@ -1813,15 +1854,17 @@ void ParsedJson::iterator::to_start_scope() { } bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const { - if(!isOk()) { return false; -} + if(!isOk()) { + return false; + } switch (current_type) { case '"': // we have a string os << '"'; if(escape_strings) { - print_with_escapes(get_string(), os); + print_with_escapes(get_string(), os, get_string_length()); } else { - os << get_string(); + // was: os << get_string();, but given that we can include null chars, we have to do something crazier: + std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); } os << '"'; break; diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index 96d4f222..28ff0f36 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,13 +1,13 @@ -/* auto-generated on Wed 13 Mar 2019 20:02:04 EDT. Do not edit! */ +/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */ /* begin file include/simdjson/simdjson_version.h */ // /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION #define SIMDJSON_INCLUDE_SIMDJSON_VERSION -#define SIMDJSON_VERSION 0.1.0 +#define SIMDJSON_VERSION 0.1.1 enum { SIMDJSON_VERSION_MAJOR = 0, SIMDJSON_VERSION_MINOR = 1, - SIMDJSON_VERSION_REVISION = 0 + SIMDJSON_VERSION_REVISION = 1 }; #endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION /* end file include/simdjson/simdjson_version.h */ @@ -165,6 +165,9 @@ static inline void aligned_free(void *memblock) { #include +// we support documents up to 4GB +#define SIMDJSON_MAXSIZE_BYTES 0xFFFFFFFF + // the input buf should be readable up to buf + SIMDJSON_PADDING #define SIMDJSON_PADDING sizeof(__m256i) @@ -349,87 +352,183 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #include #include +// ends with zero char static inline void print_with_escapes(const unsigned char *src) { - while (*src != 0u) { + while (*src) { switch (*src) { - case '\b': - putchar('\\'); - putchar('b'); - break; - case '\f': - putchar('\\'); - putchar('f'); - break; - case '\n': - putchar('\\'); - putchar('n'); - break; - case '\r': - putchar('\\'); - putchar('r'); - break; - case '\"': - putchar('\\'); - putchar('"'); - break; - case '\t': - putchar('\\'); - putchar('t'); - break; - case '\\': - putchar('\\'); - putchar('\\'); - break; - default: - if (*src <= 0x1F) { - printf("\\u%04x", *src); - } else { - putchar(*src); -} + case '\b': + putchar('\\'); + putchar('b'); + break; + case '\f': + putchar('\\'); + putchar('f'); + break; + case '\n': + putchar('\\'); + putchar('n'); + break; + case '\r': + putchar('\\'); + putchar('r'); + break; + case '\"': + putchar('\\'); + putchar('"'); + break; + case '\t': + putchar('\\'); + putchar('t'); + break; + case '\\': + putchar('\\'); + putchar('\\'); + break; + default: + if (*src <= 0x1F) { + printf("\\u%04x", *src); + } else { + putchar(*src); + } } src++; } } -static inline void print_with_escapes(const unsigned char *src, std::ostream &os) { - while (*src != 0u) { +// ends with zero char +static inline void print_with_escapes(const unsigned char *src, + std::ostream &os) { + while (*src) { switch (*src) { - case '\b': - os << '\\'; - os << 'b'; - break; - case '\f': - os << '\\'; - os << 'f'; - break; - case '\n': - os << '\\'; - os << 'n'; - break; - case '\r': - os << '\\'; - os << 'r'; - break; - case '\"': - os << '\\'; - os << '"'; - break; - case '\t': - os << '\\'; - os << 't'; - break; - case '\\': - os << '\\'; - os << '\\'; - break; - default: - if (*src <= 0x1F) { - std::ios::fmtflags f(os.flags()); - os << std::hex << std::setw(4) << std::setfill('0') << static_cast(*src); - os.flags(f); - } else { - os << *src; + case '\b': + os << '\\'; + os << 'b'; + break; + case '\f': + os << '\\'; + os << 'f'; + break; + case '\n': + os << '\\'; + os << 'n'; + break; + case '\r': + os << '\\'; + os << 'r'; + break; + case '\"': + os << '\\'; + os << '"'; + break; + case '\t': + os << '\\'; + os << 't'; + break; + case '\\': + os << '\\'; + os << '\\'; + break; + default: + if (*src <= 0x1F) { + std::ios::fmtflags f(os.flags()); + os << std::hex << std::setw(4) << std::setfill('0') + << static_cast(*src); + os.flags(f); + } else { + os << *src; + } + } + src++; + } } + +// print len chars +static inline void print_with_escapes(const unsigned char *src, size_t len) { + const unsigned char *finalsrc = src + len; + while (src < finalsrc) { + switch (*src) { + case '\b': + putchar('\\'); + putchar('b'); + break; + case '\f': + putchar('\\'); + putchar('f'); + break; + case '\n': + putchar('\\'); + putchar('n'); + break; + case '\r': + putchar('\\'); + putchar('r'); + break; + case '\"': + putchar('\\'); + putchar('"'); + break; + case '\t': + putchar('\\'); + putchar('t'); + break; + case '\\': + putchar('\\'); + putchar('\\'); + break; + default: + if (*src <= 0x1F) { + printf("\\u%04x", *src); + } else { + putchar(*src); + } + } + src++; + } +} + +// print len chars +static inline void print_with_escapes(const unsigned char *src, + std::ostream &os, size_t len) { + const unsigned char *finalsrc = src + len; + while (src < finalsrc) { + switch (*src) { + case '\b': + os << '\\'; + os << 'b'; + break; + case '\f': + os << '\\'; + os << 'f'; + break; + case '\n': + os << '\\'; + os << 'n'; + break; + case '\r': + os << '\\'; + os << 'r'; + break; + case '\"': + os << '\\'; + os << '"'; + break; + case '\t': + os << '\\'; + os << 't'; + break; + case '\\': + os << '\\'; + os << '\\'; + break; + default: + if (*src <= 0x1F) { + std::ios::fmtflags f(os.flags()); + os << std::hex << std::setw(4) << std::setfill('0') + << static_cast(*src); + os.flags(f); + } else { + os << *src; + } } src++; } @@ -439,6 +538,12 @@ static inline void print_with_escapes(const char *src, std::ostream &os) { print_with_escapes(reinterpret_cast(src), os); } +static inline void print_with_escapes(const char *src, std::ostream &os, + size_t len) { + print_with_escapes(reinterpret_cast(src), os, len); +} + +# #endif /* end file include/simdjson/jsonformatutils.h */ /* begin file include/simdjson/jsonioutil.h */ @@ -35907,8 +36012,12 @@ public: // get the string value at this node (NULL ended); valid only if we're at " // note that tabs, and line endings are escaped in the returned value (see print_with_escapes) // return value is valid UTF-8 + // It may contain NULL chars within the string: get_string_length determines the true + // string length. const char * get_string() const; + uint32_t get_string_length() const; + // get the double value at this node; valid only if // we're at "d" double get_double() const; @@ -35931,6 +36040,9 @@ public: // if successful, we are left pointing at the value, // if not, we are still pointing at the object ({) // (in case of repeated keys, this only finds the first one) + // We seek the key using C's strcmp so if your JSON strings contain + // NULL chars, this would trigger a false positive: if you expect that + // to be the case, take extra precautions. bool move_to_key(const char * key); // throughout return true if we can do the navigation, false @@ -36129,67 +36241,51 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len, pj.write_tape(0, '"');// don't bother with the string parsing at all return true; // always succeeds #else + pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); const uint8_t *src = &buf[offset + 1]; // we know that buf at offset is a " - uint8_t *dst = pj.current_string_buf_loc; -#ifdef JSON_TEST_STRINGS // for unit testing - uint8_t *const start_of_string = dst; -#endif + uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); + const uint8_t *const start_of_string = dst; while (1) { __m256i v = _mm256_loadu_si256(reinterpret_cast(src)); - auto bs_bits = - static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')))); - auto quote_bits = - static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')))); -#define CHECKUNESCAPED - // All Unicode characters may be placed within the - // quotation marks, except for the characters that MUST be escaped: - // quotation mark, reverse solidus, and the control characters (U+0000 - //through U+001F). - // https://tools.ietf.org/html/rfc8259 -#ifdef CHECKUNESCAPED - __m256i unitsep = _mm256_set1_epi8(0x1F); - __m256i unescaped_vec = _mm256_cmpeq_epi8(_mm256_max_epu8(unitsep,v),unitsep);// could do it with saturated subtraction -#endif // CHECKUNESCAPED - - uint32_t quote_dist = trailingzeroes(quote_bits); - uint32_t bs_dist = trailingzeroes(bs_bits); // store to dest unconditionally - we can overwrite the bits we don't like // later _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v); - if (quote_dist < bs_dist) { + auto bs_bits = + static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')))); + auto quote_mask = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')); + auto quote_bits = + static_cast(_mm256_movemask_epi8(quote_mask)); + if(((bs_bits - 1) & quote_bits) != 0 ) { // we encountered quotes first. Move dst to point to quotes and exit - dst[quote_dist] = 0; // null terminate and get out - pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); + // find out where the quote is... + uint32_t quote_dist = trailingzeroes(quote_bits); + + // NULL termination is still handy if you expect all your strings to be NULL terminated? + // It comes at a small cost + dst[quote_dist] = 0; + + uint32_t str_length = (dst - start_of_string) + quote_dist; + memcpy(pj.current_string_buf_loc,&str_length, sizeof(uint32_t)); + /////////////////////// + // Above, check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + //////////////////////// + + + // we advance the point, accounting for the fact that we have a NULl termination + pj.current_string_buf_loc = dst + quote_dist + 1; - pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value -#ifdef CHECKUNESCAPED - // check that there is no unescaped char before the quote - auto unescaped_bits = static_cast(_mm256_movemask_epi8(unescaped_vec)); - bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0; #ifdef JSON_TEST_STRINGS // for unit testing - if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1); - else foundBadString(buf + offset); -#endif // JSON_TEST_STRINGS - return is_ok; -#else //CHECKUNESCAPED -#ifdef JSON_TEST_STRINGS // for unit testing - foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1); + foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1); #endif // JSON_TEST_STRINGS return true; -#endif //CHECKUNESCAPED - } if (quote_dist > bs_dist) { + } + if(((quote_bits - 1) & bs_bits ) != 0 ) { + // find out where the backspace is + uint32_t bs_dist = trailingzeroes(bs_bits); uint8_t escape_char = src[bs_dist + 1]; -#ifdef CHECKUNESCAPED - // we are going to need the unescaped_bits to check for unescaped chars - auto unescaped_bits = static_cast(_mm256_movemask_epi8(unescaped_vec)); - if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) { -#ifdef JSON_TEST_STRINGS // for unit testing - foundBadString(buf + offset); -#endif // JSON_TEST_STRINGS - return false; - } -#endif //CHECKUNESCAPED // we encountered backslash first. Handle backslash if (escape_char == 'u') { // move src/dst up to the start; they will be further adjusted @@ -36223,15 +36319,6 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len, // neither. src += 32; dst += 32; -#ifdef CHECKUNESCAPED - // check for unescaped chars - if(_mm256_testz_si256(unescaped_vec,unescaped_vec) != 1) { -#ifdef JSON_TEST_STRINGS // for unit testing - foundBadString(buf + offset); -#endif // JSON_TEST_STRINGS - return false; - } -#endif // CHECKUNESCAPED } } // can't be reached @@ -36789,7 +36876,7 @@ WARN_UNUSED int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true); // Parse a document found in buf, need to preallocate ParsedJson. -// Return false in case of a failure. You can also check validity +// Return SUCCESS (an integer = 1) in case of a success. You can also check validity // by calling pj.isValid(). The same ParsedJson can be reused for other documents. // // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing @@ -36802,7 +36889,7 @@ inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool realloc } // Parse a document found in buf, need to preallocate ParsedJson. -// Return false in case of a failure. You can also check validity +// Return SUCCESS (an integer = 1) in case of a success. You can also check validity // by calling pj.isValid(). The same ParsedJson can be reused for other documents. // // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing