From bf9b1b14576c5708549ec74a7a047be1d1c7c562 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Wed, 13 Mar 2019 21:02:39 -0400
Subject: [PATCH] New version (mostly setting the singleheader version in
 sync).

---
 CMakeLists.txt                      |   4 +-
 include/simdjson/simdjson_version.h |   4 +-
 singleheader/amalgamation_demo.cpp  |   2 +-
 singleheader/simdjson.cpp           |  99 +++++---
 singleheader/simdjson.h             | 355 +++++++++++++++++-----------
 5 files changed, 297 insertions(+), 167 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ffd047a5..cc8930fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,8 +11,8 @@ project(simdjson)
 set(SIMDJSON_LIB_NAME simdjson)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 1)
-set(PROJECT_VERSION_PATCH 0)
-set(SIMDJSON_LIB_VERSION "0.1.0" CACHE STRING "simdjson library version")
+set(PROJECT_VERSION_PATCH 1)
+set(SIMDJSON_LIB_VERSION "0.1.1" CACHE STRING "simdjson library version")
 set(SIMDJSON_LIB_SOVERSION "0" CACHE STRING "simdjson library soversion")
 
 if(NOT MSVC)
diff --git a/include/simdjson/simdjson_version.h b/include/simdjson/simdjson_version.h
index 639ba72b..effee1ff 100644
--- a/include/simdjson/simdjson_version.h
+++ b/include/simdjson/simdjson_version.h
@@ -1,10 +1,10 @@
 // /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand 
 #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 #define SIMDJSON_INCLUDE_SIMDJSON_VERSION 
-#define SIMDJSON_VERSION 0.1.0 
+#define SIMDJSON_VERSION 0.1.1 
 enum { 
     SIMDJSON_VERSION_MAJOR = 0,  
     SIMDJSON_VERSION_MINOR = 1,  
-    SIMDJSON_VERSION_REVISION = 0  
+    SIMDJSON_VERSION_REVISION = 1  
 }; 
 #endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION 
diff --git a/singleheader/amalgamation_demo.cpp b/singleheader/amalgamation_demo.cpp
index 64a8421a..24d37b45 100644
--- a/singleheader/amalgamation_demo.cpp
+++ b/singleheader/amalgamation_demo.cpp
@@ -1,4 +1,4 @@
-/* auto-generated on Wed 13 Mar 2019 20:02:04 EDT. Do not edit! */
+/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */
 
 #include <iostream>
 #include "simdjson.h"
diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp
index dca49ed2..2a85f24d 100644
--- a/singleheader/simdjson.cpp
+++ b/singleheader/simdjson.cpp
@@ -1,4 +1,4 @@
-/* auto-generated on Wed 13 Mar 2019 20:02:04 EDT. Do not edit! */
+/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */
 #include "simdjson.h"
 
 /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
@@ -391,6 +391,16 @@ really_inline uint64_t cmp_mask_against_input(__m256i input_lo,
   return res_0 | (res_1 << 32);
 }
 
+// find all values less than or equal than the content of maxval (using unsigned arithmetic) 
+really_inline uint64_t unsigned_lteq_against_input(__m256i input_lo,
+                                              __m256i input_hi, __m256i maxval) {
+  __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,input_lo),maxval);
+  uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
+  __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,input_hi),maxval);
+  uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
+  return res_0 | (res_1 << 32);
+}
+
 // return a bitvector indicating where we have characters that end an odd-length
 // sequence of backslashes (and thus change the behavior of the next character
 // to follow). A even-length sequence of backslashes, and, for that matter, the
@@ -449,13 +459,21 @@ find_odd_backslash_sequences(__m256i input_lo, __m256i input_hi,
 // backslash sequences (of any length) will be detected elsewhere.
 really_inline uint64_t find_quote_mask_and_bits(
     __m256i input_lo, __m256i input_hi, uint64_t odd_ends,
-    uint64_t &prev_iter_inside_quote, uint64_t &quote_bits) {
+    uint64_t &prev_iter_inside_quote, uint64_t &quote_bits, uint64_t &error_mask) {
   quote_bits =
       cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
   quote_bits = quote_bits & ~odd_ends;
+  // remove from the valid quoted region the unescapted characters.
   uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
       _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
   quote_mask ^= prev_iter_inside_quote;
+  // All Unicode characters may be placed within the
+  // quotation marks, except for the characters that MUST be escaped:
+  // quotation mark, reverse solidus, and the control characters (U+0000
+  //through U+001F).
+  // https://tools.ietf.org/html/rfc8259
+  uint64_t unescaped = unsigned_lteq_against_input(input_lo, input_hi, _mm256_set1_epi8(0x1F));
+  error_mask |= quote_mask & unescaped;
   // right shift of a signed value expected to be well-defined and standard
   // compliant as of C++20,
   // John Regher from Utah U. says this is fine code
@@ -558,11 +576,9 @@ really_inline uint64_t finalize_structurals(
     uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
   // mask off anything inside quotes
   structurals &= ~quote_mask;
-
   // add the real quote bits back into our bitmask as well, so we can
   // quickly traverse the strings we've spent all this trouble gathering
   structurals |= quote_bits;
-
   // Now, establish "pseudo-structural characters". These are non-whitespace
   // characters that are (a) outside quotes and (b) have a predecessor that's
   // either whitespace or a structural character. This means that subsequent
@@ -574,6 +590,7 @@ really_inline uint64_t finalize_structurals(
   // a qualified predecessor is something that can happen 1 position before an
   // psuedo-structural character
   uint64_t pseudo_pred = structurals | whitespace;
+
   uint64_t shifted_pseudo_pred =
       (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
   prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
@@ -631,6 +648,7 @@ WARN_UNUSED
 
   size_t lenminus64 = len < 64 ? 0 : len - 64;
   size_t idx = 0;
+  uint64_t error_mask = 0; // for unescaped characters within strings (ASCII code points < 0x20)
 
   for (; idx < lenminus64; idx += 64) {
 #ifndef _MSC_VER
@@ -653,7 +671,7 @@ WARN_UNUSED
     // themselves
     uint64_t quote_bits;
     uint64_t quote_mask = find_quote_mask_and_bits(
-        input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits);
+        input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
 
     // take the previous iterations structural bits, not our current iteration,
     // and flatten
@@ -694,7 +712,7 @@ WARN_UNUSED
     // themselves
     uint64_t quote_bits;
     uint64_t quote_mask = find_quote_mask_and_bits(
-        input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits);
+        input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
 
     // take the previous iterations structural bits, not our current iteration,
     // and flatten
@@ -729,7 +747,9 @@ WARN_UNUSED
   }
   // make it safe to dereference one beyond this array
   base_ptr[pj.n_structural_indexes] = 0;  
-
+  if (error_mask) {
+    return false;
+  }
 #ifdef SIMDJSON_UTF8VALIDATE
   return _mm256_testz_si256(has_error, has_error) != 0;
 #else
@@ -1297,8 +1317,12 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
       std::cerr << "capacities must be non-zero " << std::endl;
       return false;
     }
-    if ((len <= bytecapacity) && (depthcapacity < maxdepth))
+    if(len > SIMDJSON_MAXSIZE_BYTES) {
+      return false;
+    }
+    if ((len <= bytecapacity) && (depthcapacity < maxdepth)) {
       return true;
+    }
     deallocate();
     isvalid = false;
     bytecapacity = 0; // will only set it to len after allocations are a success
@@ -1306,7 +1330,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
     uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
     structural_indexes = new (std::nothrow) uint32_t[max_structures];
     size_t localtapecapacity = ROUNDUP_N(len, 64);
-    size_t localstringcapacity = ROUNDUP_N(len + 32, 64);
+    // a document with only zero-length strings... could have len/3 string
+    // and we would need len/3 * 5 bytes on the string buffer 
+    size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64); 
     string_buf = new (std::nothrow) uint8_t[localstringcapacity];
     tape = new (std::nothrow) uint64_t[localtapecapacity];
     containing_scope_offset = new (std::nothrow) uint32_t[maxdepth];
@@ -1362,6 +1388,7 @@ bool ParsedJson::printjson(std::ostream &os) {
     if(!isvalid) { 
       return false;
     }
+    uint32_t string_length;
     size_t tapeidx = 0;
     uint64_t tape_val = tape[tapeidx];
     uint8_t type = (tape_val >> 56);
@@ -1405,7 +1432,8 @@ bool ParsedJson::printjson(std::ostream &os) {
       switch (type) {
       case '"': // we have a string
         os << '"';
-        print_with_escapes((const unsigned char *)(string_buf + payload));
+        memcpy(&string_length,string_buf + payload, sizeof(uint32_t));
+        print_with_escapes((const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length); 
         os << '"';
         break;
       case 'l': // we have a long int
@@ -1474,8 +1502,10 @@ bool ParsedJson::printjson(std::ostream &os) {
 
 WARN_UNUSED
 bool ParsedJson::dump_raw_tape(std::ostream &os) {
-    if(!isvalid) { return false;
-}
+    if(!isvalid) { 
+      return false;
+    }
+    uint32_t string_length;
     size_t tapeidx = 0;
     uint64_t tape_val = tape[tapeidx];
     uint8_t type = (tape_val >> 56);
@@ -1498,7 +1528,8 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
       switch (type) {
       case '"': // we have a string
         os << "string \"";
-        print_with_escapes((const unsigned char *)(string_buf + payload));
+        memcpy(&string_length,string_buf + payload, sizeof(uint32_t));
+        print_with_escapes((const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length);
         os << '"';
         os << '\n';
         break;
@@ -1553,6 +1584,7 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
 }
 /* end file src/parsedjson.cpp */
 /* begin file src/parsedjsoniterator.cpp */
+#include <iterator>
 
 ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
         if(pj.isValid()) {
@@ -1659,24 +1691,32 @@ uint8_t ParsedJson::iterator::get_type()  const {
 
 
 int64_t ParsedJson::iterator::get_integer()  const {
-    if(location + 1 >= tape_length) { return 0;// default value in case of error
-}
+    if(location + 1 >= tape_length) { 
+      return 0;// default value in case of error
+    }
     return static_cast<int64_t>(pj.tape[location + 1]);
 }
 
 double ParsedJson::iterator::get_double()  const {
-    if(location + 1 >= tape_length) { return NAN;// default value in case of error
-}
+    if(location + 1 >= tape_length) { 
+      return NAN;// default value in case of error
+    }
     double answer;
     memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
     return answer;
 }
 
 const char * ParsedJson::iterator::get_string() const {
-    return  reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)) ;
+   return  reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK) + sizeof(uint32_t)) ;
 }
 
 
+uint32_t ParsedJson::iterator::get_string_length() const {
+    uint32_t answer;
+    memcpy(&answer, reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)), sizeof(uint32_t));
+    return answer;
+}
+
 bool ParsedJson::iterator::is_object_or_array() const {
     return is_object_or_array(get_type());
 }
@@ -1707,14 +1747,15 @@ bool ParsedJson::iterator::is_object_or_array(uint8_t type) {
 
 bool ParsedJson::iterator::move_to_key(const char * key) {
     if(down()) {
-    do {
+      do {
         assert(is_string());
-        bool rightkey = (strcmp(get_string(),key)==0);
+        bool rightkey = (strcmp(get_string(),key)==0);// null chars would fool this
         next();
-        if(rightkey) { return true;
-}
-    } while(next());
-    assert(up());// not found
+        if(rightkey) { 
+          return true;
+        }
+      } while(next());
+      assert(up());// not found
     }
     return false;
 }
@@ -1813,15 +1854,17 @@ void ParsedJson::iterator::to_start_scope()  {
 }
 
 bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
-    if(!isOk()) { return false;
-}
+    if(!isOk()) { 
+      return false;
+    }
     switch (current_type) {
     case '"': // we have a string
     os << '"';
     if(escape_strings) {
-        print_with_escapes(get_string(), os);
+        print_with_escapes(get_string(), os, get_string_length());
     } else {
-        os << get_string();
+        // was: os << get_string();, but given that we can include null chars, we have to do something crazier:
+        std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator<char>(os));
     }
     os << '"';
     break;
diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h
index 96d4f222..28ff0f36 100644
--- a/singleheader/simdjson.h
+++ b/singleheader/simdjson.h
@@ -1,13 +1,13 @@
-/* auto-generated on Wed 13 Mar 2019 20:02:04 EDT. Do not edit! */
+/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */
 /* begin file include/simdjson/simdjson_version.h */
 // /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand 
 #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 #define SIMDJSON_INCLUDE_SIMDJSON_VERSION 
-#define SIMDJSON_VERSION 0.1.0 
+#define SIMDJSON_VERSION 0.1.1 
 enum { 
     SIMDJSON_VERSION_MAJOR = 0,  
     SIMDJSON_VERSION_MINOR = 1,  
-    SIMDJSON_VERSION_REVISION = 0  
+    SIMDJSON_VERSION_REVISION = 1  
 }; 
 #endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 /* end file include/simdjson/simdjson_version.h */
@@ -165,6 +165,9 @@ static inline void aligned_free(void *memblock) {
 
 #include <cassert>
 
+// we support documents up to 4GB
+#define SIMDJSON_MAXSIZE_BYTES 0xFFFFFFFF
+
 // the input buf should be readable up to buf + SIMDJSON_PADDING
 #define SIMDJSON_PADDING  sizeof(__m256i)
 
@@ -349,87 +352,183 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
 #include <iomanip>
 #include <iostream>
 
+// ends with zero char
 static inline void print_with_escapes(const unsigned char *src) {
-  while (*src != 0u) {
+  while (*src) {
     switch (*src) {
-    case '\b':
-      putchar('\\');
-      putchar('b');
-      break;
-    case '\f':
-      putchar('\\');
-      putchar('f');
-      break;
-    case '\n':
-      putchar('\\');
-      putchar('n');
-      break;
-    case '\r':
-      putchar('\\');
-      putchar('r');
-      break;
-    case '\"':
-      putchar('\\');
-      putchar('"');
-      break;
-    case '\t':
-      putchar('\\');
-      putchar('t');
-      break;
-    case '\\':
-      putchar('\\');
-      putchar('\\');
-      break;
-    default:
-      if (*src <= 0x1F) {
-        printf("\\u%04x", *src);
-      } else {
-        putchar(*src);
-}
+      case '\b':
+        putchar('\\');
+        putchar('b');
+        break;
+      case '\f':
+        putchar('\\');
+        putchar('f');
+        break;
+      case '\n':
+        putchar('\\');
+        putchar('n');
+        break;
+      case '\r':
+        putchar('\\');
+        putchar('r');
+        break;
+      case '\"':
+        putchar('\\');
+        putchar('"');
+        break;
+      case '\t':
+        putchar('\\');
+        putchar('t');
+        break;
+      case '\\':
+        putchar('\\');
+        putchar('\\');
+        break;
+      default:
+        if (*src <= 0x1F) {
+          printf("\\u%04x", *src);
+        } else {
+          putchar(*src);
+        }
     }
     src++;
   }
 }
 
-static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
-  while (*src != 0u) {
+// ends with zero char
+static inline void print_with_escapes(const unsigned char *src,
+                                      std::ostream &os) {
+  while (*src) {
     switch (*src) {
-    case '\b':
-      os << '\\';
-      os << 'b';
-      break;
-    case '\f':
-      os << '\\';
-      os << 'f';
-      break;
-    case '\n':
-      os << '\\';
-      os << 'n';
-      break;
-    case '\r':
-      os << '\\';
-      os << 'r';
-      break;
-    case '\"':
-      os << '\\';
-      os << '"';
-      break;
-    case '\t':
-      os << '\\';
-      os << 't';
-      break;
-    case '\\':
-      os << '\\';
-      os << '\\';
-      break;
-    default:
-      if (*src <= 0x1F) {
-        std::ios::fmtflags f(os.flags());
-        os << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*src);
-        os.flags(f);
-      } else {
-        os << *src;
+      case '\b':
+        os << '\\';
+        os << 'b';
+        break;
+      case '\f':
+        os << '\\';
+        os << 'f';
+        break;
+      case '\n':
+        os << '\\';
+        os << 'n';
+        break;
+      case '\r':
+        os << '\\';
+        os << 'r';
+        break;
+      case '\"':
+        os << '\\';
+        os << '"';
+        break;
+      case '\t':
+        os << '\\';
+        os << 't';
+        break;
+      case '\\':
+        os << '\\';
+        os << '\\';
+        break;
+      default:
+        if (*src <= 0x1F) {
+          std::ios::fmtflags f(os.flags());
+          os << std::hex << std::setw(4) << std::setfill('0')
+             << static_cast<int>(*src);
+          os.flags(f);
+        } else {
+          os << *src;
+        }
+    }
+    src++;
+  }
 }
+
+// print len chars
+static inline void print_with_escapes(const unsigned char *src, size_t len) {
+  const unsigned char *finalsrc = src + len;
+  while (src < finalsrc) {
+    switch (*src) {
+      case '\b':
+        putchar('\\');
+        putchar('b');
+        break;
+      case '\f':
+        putchar('\\');
+        putchar('f');
+        break;
+      case '\n':
+        putchar('\\');
+        putchar('n');
+        break;
+      case '\r':
+        putchar('\\');
+        putchar('r');
+        break;
+      case '\"':
+        putchar('\\');
+        putchar('"');
+        break;
+      case '\t':
+        putchar('\\');
+        putchar('t');
+        break;
+      case '\\':
+        putchar('\\');
+        putchar('\\');
+        break;
+      default:
+        if (*src <= 0x1F) {
+          printf("\\u%04x", *src);
+        } else {
+          putchar(*src);
+        }
+    }
+    src++;
+  }
+}
+
+// print len chars
+static inline void print_with_escapes(const unsigned char *src,
+                                      std::ostream &os, size_t len) {
+  const unsigned char *finalsrc = src + len;
+  while (src < finalsrc) {
+    switch (*src) {
+      case '\b':
+        os << '\\';
+        os << 'b';
+        break;
+      case '\f':
+        os << '\\';
+        os << 'f';
+        break;
+      case '\n':
+        os << '\\';
+        os << 'n';
+        break;
+      case '\r':
+        os << '\\';
+        os << 'r';
+        break;
+      case '\"':
+        os << '\\';
+        os << '"';
+        break;
+      case '\t':
+        os << '\\';
+        os << 't';
+        break;
+      case '\\':
+        os << '\\';
+        os << '\\';
+        break;
+      default:
+        if (*src <= 0x1F) {
+          std::ios::fmtflags f(os.flags());
+          os << std::hex << std::setw(4) << std::setfill('0')
+             << static_cast<int>(*src);
+          os.flags(f);
+        } else {
+          os << *src;
+        }
     }
     src++;
   }
@@ -439,6 +538,12 @@ static inline void print_with_escapes(const char *src, std::ostream &os) {
   print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
 }
 
+static inline void print_with_escapes(const char *src, std::ostream &os,
+                                      size_t len) {
+  print_with_escapes(reinterpret_cast<const unsigned char *>(src), os, len);
+}
+
+#
 #endif
 /* end file include/simdjson/jsonformatutils.h */
 /* begin file include/simdjson/jsonioutil.h */
@@ -35907,8 +36012,12 @@ public:
     // get the string value at this node (NULL ended); valid only if we're at "
     // note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
     // return value is valid UTF-8
+    // It may contain NULL chars within the string: get_string_length determines the true 
+    // string length.
     const char * get_string() const;
 
+    uint32_t get_string_length() const;
+
     // get the double value at this node; valid only if
     // we're at "d"
     double get_double()  const;
@@ -35931,6 +36040,9 @@ public:
     // if successful, we are left pointing at the value,
     // if not, we are still pointing at the object ({)
     // (in case of repeated keys, this only finds the first one)
+    // We seek the key using C's strcmp so if your JSON strings contain
+    // NULL chars, this would trigger a false positive: if you expect that
+    // to be the case, take extra precautions.
     bool move_to_key(const char * key);
 
     // throughout return true if we can do the navigation, false
@@ -36129,67 +36241,51 @@ really_inline  bool parse_string(const uint8_t *buf, UNUSED size_t len,
   pj.write_tape(0, '"');// don't bother with the string parsing at all
   return true; // always succeeds
 #else
+  pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
   const uint8_t *src = &buf[offset + 1]; // we know that buf at offset is a "
-  uint8_t *dst = pj.current_string_buf_loc;
-#ifdef JSON_TEST_STRINGS // for unit testing
-  uint8_t *const start_of_string = dst;
-#endif
+  uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t);
+  const uint8_t *const start_of_string = dst;
   while (1) {
     __m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
-    auto bs_bits =
-        static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))));
-    auto quote_bits =
-        static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))));
-#define CHECKUNESCAPED
-    // All Unicode characters may be placed within the
-    // quotation marks, except for the characters that MUST be escaped:
-    // quotation mark, reverse solidus, and the control characters (U+0000
-    //through U+001F).
-    // https://tools.ietf.org/html/rfc8259
-#ifdef CHECKUNESCAPED
-    __m256i unitsep = _mm256_set1_epi8(0x1F);
-    __m256i unescaped_vec = _mm256_cmpeq_epi8(_mm256_max_epu8(unitsep,v),unitsep);// could do it with saturated subtraction
-#endif // CHECKUNESCAPED
-
-    uint32_t quote_dist = trailingzeroes(quote_bits);
-    uint32_t bs_dist = trailingzeroes(bs_bits);
     // store to dest unconditionally - we can overwrite the bits we don't like
     // later
     _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
-    if (quote_dist < bs_dist) {
+    auto bs_bits =
+        static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))));
+    auto quote_mask = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'));
+    auto quote_bits =
+        static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask));
+    if(((bs_bits - 1) & quote_bits) != 0 ) {
       // we encountered quotes first. Move dst to point to quotes and exit
-      dst[quote_dist] = 0; // null terminate and get out
 
-      pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
+      // find out where the quote is...
+      uint32_t quote_dist = trailingzeroes(quote_bits);
+
+      // NULL termination is still handy if you expect all your strings to be NULL terminated?
+      // It comes at a small cost
+      dst[quote_dist] = 0; 
+
+      uint32_t str_length = (dst - start_of_string) + quote_dist; 
+      memcpy(pj.current_string_buf_loc,&str_length, sizeof(uint32_t));
+      ///////////////////////
+      // Above, check for overflow in case someone has a crazy string (>=4GB?)
+      // But only add the overflow check when the document itself exceeds 4GB
+      // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+      ////////////////////////
+
+      
+      // we advance the point, accounting for the fact that we have a NULl termination
+      pj.current_string_buf_loc = dst + quote_dist + 1;
 
-      pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
-#ifdef CHECKUNESCAPED
-      // check that there is no unescaped char before the quote
-      auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
-      bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
 #ifdef JSON_TEST_STRINGS // for unit testing
-       if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
-       else  foundBadString(buf + offset);
-#endif // JSON_TEST_STRINGS
-      return is_ok;
-#else  //CHECKUNESCAPED
-#ifdef JSON_TEST_STRINGS // for unit testing
-       foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
+      foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
 #endif // JSON_TEST_STRINGS
       return true;
-#endif //CHECKUNESCAPED
-    } if (quote_dist > bs_dist) {
+    } 
+    if(((quote_bits - 1) & bs_bits ) != 0 ) {
+      // find out where the backspace is
+      uint32_t bs_dist = trailingzeroes(bs_bits);
       uint8_t escape_char = src[bs_dist + 1];
-#ifdef CHECKUNESCAPED
-      // we are going to need the unescaped_bits to check for unescaped chars
-      auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
-      if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
-#ifdef JSON_TEST_STRINGS // for unit testing
-        foundBadString(buf + offset);
-#endif // JSON_TEST_STRINGS
-        return false;
-      }
-#endif //CHECKUNESCAPED
       // we encountered backslash first. Handle backslash
       if (escape_char == 'u') {
         // move src/dst up to the start; they will be further adjusted
@@ -36223,15 +36319,6 @@ really_inline  bool parse_string(const uint8_t *buf, UNUSED size_t len,
       // neither.
       src += 32;
       dst += 32;
-#ifdef CHECKUNESCAPED
-      // check for unescaped chars
-      if(_mm256_testz_si256(unescaped_vec,unescaped_vec) != 1) {
-#ifdef JSON_TEST_STRINGS // for unit testing
-          foundBadString(buf + offset);
-#endif // JSON_TEST_STRINGS
-        return false;
-      }
-#endif // CHECKUNESCAPED
     }
   }
   // can't be reached
@@ -36789,7 +36876,7 @@ WARN_UNUSED
 int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
 
 // Parse a document found in buf, need to preallocate ParsedJson.
-// Return false in case of a failure. You can also check validity
+// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
 //
 // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@@ -36802,7 +36889,7 @@ inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool realloc
 }
 
 // Parse a document found in buf, need to preallocate ParsedJson.
-// Return false in case of a failure. You can also check validity
+// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
 //
 // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing