diff --git a/src/generic/stage1/json_string_scanner.h b/src/generic/stage1/json_string_scanner.h index 0e7fc1ca..834943bf 100644 --- a/src/generic/stage1/json_string_scanner.h +++ b/src/generic/stage1/json_string_scanner.h @@ -13,6 +13,8 @@ struct json_string_block { really_inline uint64_t string_start() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Whether the entire block is strings, or not + really_inline bool all_string() { return _in_string == 0xFFFFFFFFFFFFFFFFULL; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) @@ -35,6 +37,7 @@ class json_string_scanner { public: really_inline json_string_block next(const simd::simd8x64 in); really_inline error_code finish(bool streaming); + really_inline bool in_unclosed_string() { return prev_in_string; } private: really_inline uint64_t find_escaped(uint64_t escape); @@ -73,6 +76,8 @@ private: // text | \\\ | \\\"\\\" \\\" \\"\\" | // really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -101,13 +106,23 @@ really_inline json_string_block json_string_scanner::next(const simd::simd8x64(in_string) >> 63); + // Use ^ to turn the beginning quote off, and the end quote on. return { backslash, @@ -118,7 +133,7 @@ really_inline json_string_block json_string_scanner::next(const simd::simd8x64