From 9230588ce89880e06295ab607d4da6b8eebdae7c Mon Sep 17 00:00:00 2001
From: ioioioio <iodadi@gmail.com>
Date: Tue, 2 Jul 2019 15:21:00 -0400
Subject: [PATCH 1/4] conflicts are solved

---
 CONTRIBUTORS                            |  1 +
 benchmark/distinctuseridcompetition.cpp | 30 +++++------
 benchmark/minifiercompetition.cpp       |  4 +-
 benchmark/parse.cpp                     | 14 ++---
 benchmark/parseandstatcompetition.cpp   |  8 +--
 benchmark/parsingcompetition.cpp        |  6 +--
 benchmark/statisticalmodel.cpp          | 12 ++---
 include/simdjson/jsoncharutils.h        |  2 +
 include/simdjson/jsonformatutils.h      |  2 +
 include/simdjson/jsonioutil.h           |  4 +-
 include/simdjson/jsonminifier.h         |  3 +-
 include/simdjson/jsonparser.h           | 11 ++--
 include/simdjson/numberparsing.h        |  3 +-
 include/simdjson/padded_string.h        |  3 ++
 include/simdjson/parsedjson.h           |  3 +-
 include/simdjson/portability.h          |  9 ++--
 include/simdjson/simdjson.h             | 69 ++++++++++++-------------
 include/simdjson/simdjson_version.h     |  2 +
 include/simdjson/simdprune_tables.h     |  8 +--
 include/simdjson/simdutf8check.h        |  4 +-
 include/simdjson/stage1_find_marks.h    |  3 +-
 include/simdjson/stage2_build_tape.h    |  2 +
 include/simdjson/stringparsing.h        |  4 +-
 src/jsonioutil.cpp                      |  2 +
 src/jsonminifier.cpp                    |  9 ++--
 src/jsonparser.cpp                      | 23 +++++----
 src/parsedjson.cpp                      |  4 +-
 src/parsedjsoniterator.cpp              |  2 +
 src/simdjson.cpp                        | 34 ++++++------
 src/stage2_build_tape.cpp               | 20 +++----
 tests/allparserscheckfile.cpp           |  6 +--
 tests/basictests.cpp                    |  2 +-
 tests/jsoncheck.cpp                     |  6 +--
 tests/numberparsingcheck.cpp            |  6 +--
 tests/stringparsingcheck.cpp            |  6 +--
 tools/json2json.cpp                     | 12 ++---
 tools/jsonstats.cpp                     |  8 +--
 tools/minify.cpp                        |  6 +--
 38 files changed, 192 insertions(+), 161 deletions(-)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 06d542c0..35fb3b30 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -19,5 +19,6 @@ Reini Urban
 Tom Dyson
 Ihor Dotsenko
 Alexey Milovidov
+Chang Liu
 # if you have contributed to the project and your name does not 
 # appear in this list, please let us know!
diff --git a/benchmark/distinctuseridcompetition.cpp b/benchmark/distinctuseridcompetition.cpp
index d62217a7..d1e1bd0a 100644
--- a/benchmark/distinctuseridcompetition.cpp
+++ b/benchmark/distinctuseridcompetition.cpp
@@ -30,7 +30,7 @@ void print_vec(const std::vector<int64_t> &v) {
   std::cout << std::endl;
 }
 
-void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
+void simdjson_scan(std::vector<int64_t> &answer, simdjson::ParsedJson::iterator &i) {
    while(i.move_forward()) {
      if(i.get_scope_type() == '{') {
        bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
@@ -48,30 +48,30 @@ void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
 }
 
 __attribute__ ((noinline))
-std::vector<int64_t> simdjson_justdom(ParsedJson &pj) {
+std::vector<int64_t> simdjson_justdom(simdjson::ParsedJson &pj) {
   std::vector<int64_t> answer;
-  ParsedJson::iterator i(pj);
+  simdjson::ParsedJson::iterator i(pj);
   simdjson_scan(answer,i);
   remove_duplicates(answer);
   return answer;
 }
 
 __attribute__ ((noinline))
-std::vector<int64_t> simdjson_computestats(const padded_string &p) {
+std::vector<int64_t> simdjson_computestats(const simdjson::padded_string &p) {
   std::vector<int64_t> answer;
-  ParsedJson pj = build_parsed_json(p);
+  simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
   if (!pj.isValid()) {
     return answer;
   }
-  ParsedJson::iterator i(pj);
+  simdjson::ParsedJson::iterator i(pj);
   simdjson_scan(answer,i);
   remove_duplicates(answer);
   return answer;
 }
 
 __attribute__ ((noinline))
-bool simdjson_justparse(const padded_string &p) {
-  ParsedJson pj = build_parsed_json(p);
+bool simdjson_justparse(const simdjson::padded_string &p) {
+  simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
   bool answer = !pj.isValid();
   return answer;
 }
@@ -135,7 +135,7 @@ std::vector<int64_t> sasjon_justdom(sajson::document & d) {
 }
 
 __attribute__ ((noinline))
-std::vector<int64_t> sasjon_computestats(const padded_string &p) {
+std::vector<int64_t> sasjon_computestats(const simdjson::padded_string &p) {
   std::vector<int64_t> answer;
   char *buffer = (char *)malloc(p.size());
   memcpy(buffer, p.data(), p.size());
@@ -152,7 +152,7 @@ std::vector<int64_t> sasjon_computestats(const padded_string &p) {
 }
 
 __attribute__ ((noinline))
-bool sasjon_justparse(const padded_string &p) {
+bool sasjon_justparse(const simdjson::padded_string &p) {
   char *buffer = (char *)malloc(p.size());
   memcpy(buffer, p.data(), p.size());
   auto d = sajson::parse(sajson::dynamic_allocation(),
@@ -210,7 +210,7 @@ std::vector<int64_t> rapid_justdom(rapidjson::Document &d) {
 }
 
 __attribute__ ((noinline))
-std::vector<int64_t> rapid_computestats(const padded_string &p) {
+std::vector<int64_t> rapid_computestats(const simdjson::padded_string &p) {
   std::vector<int64_t> answer;
   char *buffer = (char *)malloc(p.size() + 1);
   memcpy(buffer, p.data(), p.size());
@@ -228,7 +228,7 @@ std::vector<int64_t> rapid_computestats(const padded_string &p) {
 }
 
 __attribute__ ((noinline))
-bool rapid_justparse(const padded_string &p) {
+bool rapid_justparse(const simdjson::padded_string &p) {
   char *buffer = (char *)malloc(p.size() + 1);
   memcpy(buffer, p.data(), p.size());
   buffer[p.size()] = '\0';
@@ -267,9 +267,9 @@ int main(int argc, char *argv[]) {
   if (optind + 1 < argc) {
     std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
   }
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cout << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
@@ -321,7 +321,7 @@ int main(int argc, char *argv[]) {
             !justdata);
   BEST_TIME("sasjon (just parse) ", sasjon_justparse(p), false, , repeat, volume,
             !justdata);
-  ParsedJson dsimdjson = build_parsed_json(p);
+  simdjson::ParsedJson dsimdjson = simdjson::build_parsed_json(p);
   BEST_TIME("simdjson (just dom)  ", simdjson_justdom(dsimdjson).size(), size, , repeat,
             volume, !justdata);
   char *buffer = (char *)malloc(p.size());
diff --git a/benchmark/minifiercompetition.cpp b/benchmark/minifiercompetition.cpp
index 14d73a88..78d681c0 100644
--- a/benchmark/minifiercompetition.cpp
+++ b/benchmark/minifiercompetition.cpp
@@ -65,9 +65,9 @@ int main(int argc, char *argv[]) {
     exit(1);
   }
   const char * filename = argv[optind];
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception& e) { // caught by reference to base
     std::cout << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
diff --git a/benchmark/parse.cpp b/benchmark/parse.cpp
index 132de549..deaa8088 100644
--- a/benchmark/parse.cpp
+++ b/benchmark/parse.cpp
@@ -78,9 +78,9 @@ int main(int argc, char *argv[]) {
   if (verbose) {
     std::cout << "[verbose] loading " << filename << std::endl;
   }
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cout << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
@@ -128,7 +128,7 @@ int main(int argc, char *argv[]) {
       std::cout << "[verbose] iteration # " << i << std::endl;
     }
     unified.start();
-    ParsedJson pj;
+    simdjson::ParsedJson pj;
     bool allocok = pj.allocateCapacity(p.size());
     if (!allocok) {
       std::cerr << "failed to allocate memory" << std::endl;
@@ -145,7 +145,7 @@ int main(int argc, char *argv[]) {
     }
     unified.start();
     // The default template is simdjson::instruction_set::native.
-    isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
+    isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
     unified.end(results);
     cy1 += results[0];
     cl1 += results[1];
@@ -175,7 +175,7 @@ int main(int argc, char *argv[]) {
     if (verbose) {
       std::cout << "[verbose] iteration # " << i << std::endl;
     }
-    ParsedJson pj;
+    simdjson::ParsedJson pj;
     bool allocok = pj.allocateCapacity(p.size());
     if (!allocok) {
       std::cerr << "failed to allocate memory" << std::endl;
@@ -187,7 +187,7 @@ int main(int argc, char *argv[]) {
 
     auto start = std::chrono::steady_clock::now();
     // The default template is simdjson::instruction_set::native.
-    isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
+    isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
     isok = isok && (simdjson::SUCCESS == unified_machine(p.data(), p.size(), pj));
     auto end = std::chrono::steady_clock::now();
     std::chrono::duration<double> secs = end - start;
@@ -198,7 +198,7 @@ int main(int argc, char *argv[]) {
       return EXIT_FAILURE;
     }
   }  
-  ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
+  simdjson::ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
   if (!pj.isValid()) {
     std::cerr << pj.getErrorMsg() << std::endl;
     std::cerr << "Could not parse. " << std::endl;
diff --git a/benchmark/parseandstatcompetition.cpp b/benchmark/parseandstatcompetition.cpp
index 04ba142b..138d3d9d 100644
--- a/benchmark/parseandstatcompetition.cpp
+++ b/benchmark/parseandstatcompetition.cpp
@@ -146,7 +146,7 @@ void sajson_traverse(stat_t &stats, const sajson::value &node) {
 }
 
 __attribute__ ((noinline))
-stat_t sasjon_computestats(const padded_string &p) {
+stat_t sasjon_computestats(const simdjson::padded_string &p) {
   stat_t answer;
   char *buffer = (char *)malloc(p.size());
   memcpy(buffer, p.data(), p.size());
@@ -204,7 +204,7 @@ void rapid_traverse(stat_t &stats, const rapidjson::Value &v) {
 }
 
 __attribute__ ((noinline))
-stat_t rapid_computestats(const padded_string &p) {
+stat_t rapid_computestats(const simdjson::padded_string &p) {
   stat_t answer;
   char *buffer = (char *)malloc(p.size() + 1);
   memcpy(buffer, p.data(), p.size());
@@ -253,9 +253,9 @@ int main(int argc, char *argv[]) {
   if (optind + 1 < argc) {
     std::cerr << "warning: ignoring everything after " << argv[optind + 1]  << std::endl;
   }
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cout << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
diff --git a/benchmark/parsingcompetition.cpp b/benchmark/parsingcompetition.cpp
index da9a8b64..17297232 100644
--- a/benchmark/parsingcompetition.cpp
+++ b/benchmark/parsingcompetition.cpp
@@ -83,9 +83,9 @@ int main(int argc, char *argv[]) {
     std::cerr << "warning: ignoring everything after " << argv[optind + 1]
               << std::endl;
   }
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cout << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
@@ -101,7 +101,7 @@ int main(int argc, char *argv[]) {
       std::cout << p.size() << " B ";
     std::cout << std::endl;
   }
-  ParsedJson pj;
+  simdjson::ParsedJson pj;
   bool allocok = pj.allocateCapacity(p.size(), 1024);
 
   if (!allocok) {
diff --git a/benchmark/statisticalmodel.cpp b/benchmark/statisticalmodel.cpp
index c2c06b83..3d6a1b1c 100644
--- a/benchmark/statisticalmodel.cpp
+++ b/benchmark/statisticalmodel.cpp
@@ -42,9 +42,9 @@ struct stat_s {
 
 using stat_t = struct stat_s;
 
-stat_t simdjson_computestats(const padded_string &p) {
+stat_t simdjson_computestats(const simdjson::padded_string &p) {
   stat_t answer;
-  ParsedJson pj = build_parsed_json(p);
+  simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
   answer.valid = pj.isValid();
   if (!answer.valid) {
     return answer;
@@ -134,9 +134,9 @@ int main(int argc, char *argv[]) {
     std::cerr << "warning: ignoring everything after " << argv[optind + 1]
               << std::endl;
   }
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cerr << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
@@ -163,7 +163,7 @@ int main(int argc, char *argv[]) {
          s.object_count, s.array_count, s.null_count, s.true_count,
          s.false_count, s.byte_count, s.structural_indexes_count);
 #ifdef __linux__
-  ParsedJson pj;
+  simdjson::ParsedJson pj;
   bool allocok = pj.allocateCapacity(p.size());
   if (!allocok) {
     std::cerr << "failed to allocate memory" << std::endl;
@@ -181,7 +181,7 @@ int main(int argc, char *argv[]) {
   for (uint32_t i = 0; i < iterations; i++) {
     unified.start();
     // The default template is simdjson::instruction_set::native.
-    bool isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
+    bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
     unified.end(results);
     
     cy1 += results[0];
diff --git a/include/simdjson/jsoncharutils.h b/include/simdjson/jsoncharutils.h
index 5948c7d7..261f0e9e 100644
--- a/include/simdjson/jsoncharutils.h
+++ b/include/simdjson/jsoncharutils.h
@@ -4,6 +4,7 @@
 #include "simdjson/common_defs.h"
 #include "simdjson/parsedjson.h"
 
+namespace simdjson {
 // structural chars here are
 // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
 // we are also interested in the four whitespace characters
@@ -293,5 +294,6 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
   // will return 0 when the code point was too large.
   return 0; // bad r
 }
+}
 
 #endif
diff --git a/include/simdjson/jsonformatutils.h b/include/simdjson/jsonformatutils.h
index 4373f108..8b86ec1c 100644
--- a/include/simdjson/jsonformatutils.h
+++ b/include/simdjson/jsonformatutils.h
@@ -5,6 +5,7 @@
 #include <iomanip>
 #include <iostream>
 
+namespace simdjson {
 // ends with zero char
 static inline void print_with_escapes(const unsigned char *src) {
   while (*src) {
@@ -195,6 +196,7 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
                                       size_t len) {
   print_with_escapes(reinterpret_cast<const unsigned char *>(src), os, len);
 }
+}
 
 #
 #endif
diff --git a/include/simdjson/jsonioutil.h b/include/simdjson/jsonioutil.h
index e6c93fb0..407718d0 100644
--- a/include/simdjson/jsonioutil.h
+++ b/include/simdjson/jsonioutil.h
@@ -12,7 +12,7 @@
 #include "simdjson/padded_string.h"
 
 
-
+namespace simdjson {
 
 // load a file in memory...
 // get a corpus; pad out to cache line so we can always use SIMD
@@ -29,6 +29,6 @@
 //        std::cout << "Could not load the file " << filename << std::endl;
 //      }
 padded_string get_corpus(const std::string& filename);
-
+}
 
 #endif
diff --git a/include/simdjson/jsonminifier.h b/include/simdjson/jsonminifier.h
index 0a485247..a588338c 100644
--- a/include/simdjson/jsonminifier.h
+++ b/include/simdjson/jsonminifier.h
@@ -4,6 +4,7 @@
 #include <cstddef>
 #include <cstdint>
 
+namespace simdjson {
 // Take input from buf and remove useless whitespace, write it to out; buf and
 // out can be the same pointer. Result is null terminated,
 // return the string length (minus the null termination).
@@ -22,5 +23,5 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
 static inline size_t jsonminify(const padded_string & p, char *out) {
     return jsonminify(p.data(), p.size(), out);
 }
-
+}
 #endif
diff --git a/include/simdjson/jsonparser.h b/include/simdjson/jsonparser.h
index 03844660..a113077d 100644
--- a/include/simdjson/jsonparser.h
+++ b/include/simdjson/jsonparser.h
@@ -15,6 +15,7 @@
 #include <unistd.h>
 #endif
 
+namespace simdjson {
 // The function that users are expected to call is json_parse.
 // We have more than one such function because we want to support several 
 // instruction sets.
@@ -27,9 +28,9 @@ extern json_parse_functype *json_parse_ptr;
 
 
 // json_parse_implementation is the generic function, it is specialized for various 
-// SIMD instruction sets, e.g., as json_parse_implementation<simdjson::instruction_set::avx2>
-// or json_parse_implementation<simdjson::instruction_set::neon> 
-template<simdjson::instruction_set T>
+// SIMD instruction sets, e.g., as json_parse_implementation<instruction_set::avx2>
+// or json_parse_implementation<instruction_set::neon> 
+template<instruction_set T>
 int json_parse_implementation(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
   if (pj.bytecapacity < len) {
     return simdjson::CAPACITY;
@@ -200,7 +201,5 @@ WARN_UNUSED
 inline ParsedJson build_parsed_json(const padded_string &s) {
   return build_parsed_json(s.data(), s.length(), false);
 }
-
-
-
+}
 #endif
diff --git a/include/simdjson/numberparsing.h b/include/simdjson/numberparsing.h
index 76248e16..c5fca4ed 100644
--- a/include/simdjson/numberparsing.h
+++ b/include/simdjson/numberparsing.h
@@ -6,6 +6,7 @@
 #include "simdjson/parsedjson.h"
 #include "simdjson/portability.h"
 
+namespace simdjson {
 // Allowable floating-point values range from  std::numeric_limits<double>::lowest() 
 // to std::numeric_limits<double>::max(), so from 
 // -1.7976e308 all the way to 1.7975e308 in binary64. The lowest non-zero
@@ -553,5 +554,5 @@ static really_inline bool parse_number(const uint8_t *const buf,
   return  is_structural_or_whitespace(*p);
 #endif // SIMDJSON_SKIPNUMBERPARSING
 }
-
+}
 #endif
diff --git a/include/simdjson/padded_string.h b/include/simdjson/padded_string.h
index 3421e574..6c7e7e63 100644
--- a/include/simdjson/padded_string.h
+++ b/include/simdjson/padded_string.h
@@ -3,6 +3,8 @@
 #include "simdjson/portability.h"
 #include <memory>
 #include <cstring>
+
+namespace simdjson {
 // low-level function to allocate memory with padding so we can read passed the
 // "length" bytes safely. if you must provide a pointer to some data, create it
 // with this function: length is the max. size in bytes of the string caller is
@@ -63,5 +65,6 @@ private:
   size_t viable_size;
   char *data_ptr;
 };
+}
 
 #endif
diff --git a/include/simdjson/parsedjson.h b/include/simdjson/parsedjson.h
index 9c34c7c7..4662d653 100644
--- a/include/simdjson/parsedjson.h
+++ b/include/simdjson/parsedjson.h
@@ -15,7 +15,7 @@
 
 #define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
 
-
+namespace simdjson {
 /************
  * The JSON is parsed to a tape, see the accompanying tape.md file
  * for documentation.
@@ -499,4 +499,5 @@ bool ParsedJson::iterator::next() {
     current_type = nexttype;
     return true;
 }
+}
 #endif
diff --git a/include/simdjson/portability.h b/include/simdjson/portability.h
index b4eab86c..2069cf72 100644
--- a/include/simdjson/portability.h
+++ b/include/simdjson/portability.h
@@ -7,6 +7,7 @@
 #include <iso646.h>
 #include <cstdint>
 
+namespace simdjson {
 static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
 	return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
 }
@@ -34,7 +35,7 @@ static inline int hamming(uint64_t input_num) {
 		__popcnt((uint32_t)(input_num >> 32)));
 #endif
 }
-
+}
 #else
 #include <cstdint>
 #include <cstdlib>
@@ -42,7 +43,7 @@ static inline int hamming(uint64_t input_num) {
 #if defined(__BMI2__) || defined(__POPCOUNT__) || defined(__AVX2__)
 #include <x86intrin.h>
 #endif
-
+namespace simdjson {
 static inline bool add_overflow(uint64_t  value1, uint64_t  value2, uint64_t *result) {
 	return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
 }
@@ -76,10 +77,11 @@ static inline int hamming(uint64_t input_num) {
 	return __builtin_popcountll(input_num);
 #endif
 }
-
+}
 #endif // _MSC_VER
 
 
+namespace simdjson {
 // portable version of  posix_memalign
 static inline void *aligned_malloc(size_t alignment, size_t size) {
 	void *p;
@@ -139,5 +141,6 @@ static inline void aligned_free(void *memblock) {
 static inline void aligned_free_char(char *memblock) {
 	aligned_free((void*)memblock);
 }
+}
 
 #endif // SIMDJSON_PORTABILITY_H
diff --git a/include/simdjson/simdjson.h b/include/simdjson/simdjson.h
index 7f88d635..9a16692d 100644
--- a/include/simdjson/simdjson.h
+++ b/include/simdjson/simdjson.h
@@ -3,46 +3,45 @@
 
 #include <string>
 
-struct simdjson {
-  enum class instruction_set {
-    avx2,
-    sse4_2,
-    neon,
-    none,
+namespace simdjson {
+enum class instruction_set {
+  avx2,
+  sse4_2,
+  neon,
+  none,
 // the 'native' enum class value should point at a good default on the current machine
 #ifdef __AVX2__
-    native = avx2
+  native = avx2
 #elif defined(__ARM_NEON)
-    native = neon
+  native = neon
 #else
-    // Let us assume that we have an old x64 processor, but one that has SSE (i.e., something
-    // that came out in the second decade of the XXIst century.
-    // It would be nicer to check explicitly, but there many not be a good way to do so
-    // that is cross-platform.
-    // Under Visual Studio, there is no way to check for SSE4.2 support at compile-time.
-    native = sse4_2
+  // Let us assume that we have an old x64 processor, but one that has SSE (i.e., something
+  // that came out in the second decade of the XXIst century.
+  // It would be nicer to check explicitly, but there many not be a good way to do so
+  // that is cross-platform.
+  // Under Visual Studio, there is no way to check for SSE4.2 support at compile-time.
+  native = sse4_2
 #endif
-  };
-
-  enum errorValues {
-    SUCCESS = 0,
-    CAPACITY, // This ParsedJson can't support a document that big
-    MEMALLOC, // Error allocating memory, most likely out of memory
-    TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this is a generic error
-    DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
-    STRING_ERROR, // Problem while parsing a string
-    T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
-    F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
-    N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
-    NUMBER_ERROR, // Problem while parsing a number
-    UTF8_ERROR, // the input is not valid UTF-8
-    UNITIALIZED, // unknown error, or uninitialized document
-    EMPTY, // no structural document found
-    UNESCAPED_CHARS, // found unescaped characters in a string.
-    UNCLOSED_STRING, // missing quote at the end
-    UNEXPECTED_ERROR // indicative of a bug in simdjson
-  };
-  static const std::string& errorMsg(const int);
 };
 
+enum errorValues {
+  SUCCESS = 0,
+  CAPACITY, // This ParsedJson can't support a document that big
+  MEMALLOC, // Error allocating memory, most likely out of memory
+  TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this is a generic error
+  DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
+  STRING_ERROR, // Problem while parsing a string
+  T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
+  F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
+  N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
+  NUMBER_ERROR, // Problem while parsing a number
+  UTF8_ERROR, // the input is not valid UTF-8
+  UNITIALIZED, // unknown error, or uninitialized document
+  EMPTY, // no structural document found
+  UNESCAPED_CHARS, // found unescaped characters in a string.
+  UNCLOSED_STRING, // missing quote at the end
+  UNEXPECTED_ERROR // indicative of a bug in simdjson
+};
+const std::string& errorMsg(const int);
+}
 #endif
diff --git a/include/simdjson/simdjson_version.h b/include/simdjson/simdjson_version.h
index 0a71b895..fd072b30 100644
--- a/include/simdjson/simdjson_version.h
+++ b/include/simdjson/simdjson_version.h
@@ -2,9 +2,11 @@
 #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 #define SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 #define SIMDJSON_VERSION 0.1.2 
+namespace simdjson {
 enum { 
     SIMDJSON_VERSION_MAJOR = 0,  
     SIMDJSON_VERSION_MINOR = 1,  
     SIMDJSON_VERSION_REVISION = 2  
 }; 
+}
 #endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION 
diff --git a/include/simdjson/simdprune_tables.h b/include/simdjson/simdprune_tables.h
index 64b56c22..43566fe7 100644
--- a/include/simdjson/simdprune_tables.h
+++ b/include/simdjson/simdprune_tables.h
@@ -3,8 +3,9 @@
 
 #include "simdjson/portability.h"
 
-#ifdef __AVX__
 
+namespace simdjson {
+#ifdef __AVX__
 static const unsigned char mask128_epi8[] = {
     0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
     0xf, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
@@ -34984,13 +34985,13 @@ static const unsigned char mask128_epi32[] = {
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff,
 };
-
+}
 #endif //__SSE3__
 
 #ifdef __AVX2__
 
 #include <cstdint>
-
+namespace simdjson {
 static const uint32_t mask256_epi32[] = {
     0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,
     3, 4, 5, 6, 7, 7, 7, 0, 1, 3, 4, 5, 6, 7, 7, 1, 3, 4, 5, 6, 7, 7, 7, 0, 3,
@@ -35074,6 +35075,7 @@ static const uint32_t mask256_epi32[] = {
     3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2,
     0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+}
 #endif //__AVX2__
 
 #endif
diff --git a/include/simdjson/simdutf8check.h b/include/simdjson/simdutf8check.h
index 31088829..fe198991 100644
--- a/include/simdjson/simdutf8check.h
+++ b/include/simdjson/simdutf8check.h
@@ -26,9 +26,8 @@
 
 // all byte values must be no larger than 0xF4
 
-
+namespace simdjson {
 #ifdef __AVX2__
-
 /*****************************/
 static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
   return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
@@ -194,4 +193,5 @@ avxcheckUTF8Bytes(__m256i current_bytes,
 #else // __AVX2__
 #warning "We require AVX2 support!"
 #endif // __AVX2__
+}
 #endif
diff --git a/include/simdjson/stage1_find_marks.h b/include/simdjson/stage1_find_marks.h
index 2cf0e7d9..ca4598d0 100644
--- a/include/simdjson/stage1_find_marks.h
+++ b/include/simdjson/stage1_find_marks.h
@@ -32,6 +32,7 @@
 
 #define TRANSPOSE
 
+namespace simdjson {
 template<simdjson::instruction_set>
 struct simd_input;
 #ifdef __AVX2__
@@ -853,5 +854,5 @@ WARN_UNUSED
 int find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
   return find_structural_bits<T>(reinterpret_cast<const uint8_t *>(buf), len, pj);
 }
-
+}
 #endif
diff --git a/include/simdjson/stage2_build_tape.h b/include/simdjson/stage2_build_tape.h
index 44890dcf..214234d1 100644
--- a/include/simdjson/stage2_build_tape.h
+++ b/include/simdjson/stage2_build_tape.h
@@ -3,6 +3,7 @@
 
 #include "simdjson/common_defs.h"
 
+namespace simdjson {
 struct ParsedJson;
 
 void init_state_machine();
@@ -12,5 +13,6 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
 
 WARN_UNUSED
 int unified_machine(const char *buf, size_t len, ParsedJson &pj);
+}
 
 #endif
diff --git a/include/simdjson/stringparsing.h b/include/simdjson/stringparsing.h
index 1a68d0da..8e985dcf 100644
--- a/include/simdjson/stringparsing.h
+++ b/include/simdjson/stringparsing.h
@@ -5,7 +5,7 @@
 #include "simdjson/jsoncharutils.h"
 #include "simdjson/parsedjson.h"
 
-
+namespace simdjson {
 // begin copypasta
 // These chars yield themselves: " \ /
 // b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
@@ -201,6 +201,6 @@ really_inline  bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
   return true;
 #endif // SIMDJSON_SKIPSTRINGPARSING
 }
-
+}
 
 #endif
diff --git a/src/jsonioutil.cpp b/src/jsonioutil.cpp
index 5ac2eb47..2e99e6e1 100644
--- a/src/jsonioutil.cpp
+++ b/src/jsonioutil.cpp
@@ -2,6 +2,7 @@
 #include <cstring>
 #include <cstdlib>
 
+namespace simdjson {
 char * allocate_padded_buffer(size_t length) {
     // we could do a simple malloc
     //return (char *) malloc(length + SIMDJSON_PADDING);
@@ -31,3 +32,4 @@ padded_string get_corpus(const std::string& filename) {
   }
   throw  std::runtime_error("could not load corpus");
 }
+}
diff --git a/src/jsonminifier.cpp b/src/jsonminifier.cpp
index 20872921..d4c14646 100644
--- a/src/jsonminifier.cpp
+++ b/src/jsonminifier.cpp
@@ -1,8 +1,9 @@
 #include "simdjson/portability.h"
 #include <cstdint>
+
 #ifndef __AVX2__
 
-
+namespace simdjson {
 static uint8_t jump_table[256 * 3] = {
     0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
     1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
@@ -56,12 +57,12 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
   }
   return pos;
 }
-
+}
 #else
-
 #include "simdjson/simdprune_tables.h"
 #include <cstring>
 
+namespace simdjson {
 // a straightforward comparison of a mask against input.
 static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
                                             __m256i mask) {
@@ -247,5 +248,5 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
   *out = '\0';// NULL termination
   return out - initout;
 }
-
+}
 #endif
diff --git a/src/jsonparser.cpp b/src/jsonparser.cpp
index c2cf26d9..538ca813 100644
--- a/src/jsonparser.cpp
+++ b/src/jsonparser.cpp
@@ -7,44 +7,44 @@
 #endif
 #include "simdjson/simdjson.h"
 
-
+namespace simdjson {
 // Responsible to select the best json_parse implementation
 int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
   // Versions for each implementation
 #ifdef __AVX2__
-  json_parse_functype* avx_implementation = &json_parse_implementation<simdjson::instruction_set::avx2>;
+  json_parse_functype* avx_implementation = &json_parse_implementation<instruction_set::avx2>;
 #endif
 #ifdef __SSE4_2__
-  // json_parse_functype* sse4_2_implementation = &json_parse_implementation<simdjson::instruction_set::sse4_2>; // not implemented yet
+  // json_parse_functype* sse4_2_implementation = &json_parse_implementation<instruction_set::sse4_2>; // not implemented yet
 #endif
 #ifdef __ARM_NEON
-  json_parse_functype* neon_implementation = &json_parse_implementation<simdjson::instruction_set::neon>;
+  json_parse_functype* neon_implementation = &json_parse_implementation<instruction_set::neon>;
 #endif
 
   // Determining which implementation is the more suitable
   // Should be done at runtime. Does not make any sense on preprocessor.
 #ifdef __AVX2__
-  simdjson::instruction_set best_implementation = simdjson::instruction_set::avx2;
+  instruction_set best_implementation = instruction_set::avx2;
 #elif defined (__SSE4_2__)
-  simdjson::instruction_set best_implementation = simdjson::instruction_set::sse4_2;
+  instruction_set best_implementation = instruction_set::sse4_2;
 #elif defined (__ARM_NEON)
-  simdjson::instruction_set best_implementation = simdjson::instruction_set::neon;
+  instruction_set best_implementation = instruction_set::neon;
 #else
-  simdjson::instruction_set best_implementation = simdjson::instruction_set::none;
+  instruction_set best_implementation = instruction_set::none;
 #endif
   
   // Selecting the best implementation
   switch (best_implementation) {
 #ifdef __AVX2__
-  case simdjson::instruction_set::avx2 :
+  case instruction_set::avx2 :
     json_parse_ptr = avx_implementation;
     break;
 #elif defined (__SSE4_2__)
-  /*case simdjson::instruction_set::sse4_2 :
+  /*case instruction_set::sse4_2 :
     json_parse_ptr = sse4_2_implementation;
     break;*/
 #elif defined (__ARM_NEON)
-  case simdjson::instruction_set::neon :
+  case instruction_set::neon :
     json_parse_ptr = neon_implementation;
     break;
 #endif
@@ -69,3 +69,4 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
   }
   return pj;
 }
+}
\ No newline at end of file
diff --git a/src/parsedjson.cpp b/src/parsedjson.cpp
index b0719c78..d6e661c4 100644
--- a/src/parsedjson.cpp
+++ b/src/parsedjson.cpp
@@ -1,5 +1,6 @@
 #include "simdjson/parsedjson.h"
 
+namespace simdjson {
 ParsedJson::ParsedJson() : 
         structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
         ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
@@ -97,7 +98,7 @@ int ParsedJson::getErrorCode() const {
 }
 
 std::string ParsedJson::getErrorMsg() const {
-  return simdjson::errorMsg(errorcode);
+  return errorMsg(errorcode);
 }
 
 void ParsedJson::deallocate() {
@@ -318,3 +319,4 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
     os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
     return true;
 }
+}
diff --git a/src/parsedjsoniterator.cpp b/src/parsedjsoniterator.cpp
index 72cafcc5..c92a1ce4 100644
--- a/src/parsedjsoniterator.cpp
+++ b/src/parsedjsoniterator.cpp
@@ -2,6 +2,7 @@
 #include "simdjson/common_defs.h"
 #include <iterator>
 
+namespace simdjson {
 ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
         if(!pj.isValid()) {
             throw InvalidJSON();
@@ -92,3 +93,4 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
     }
     return true;
 }
+}
diff --git a/src/simdjson.cpp b/src/simdjson.cpp
index 7ba7950e..d77c3a5b 100644
--- a/src/simdjson.cpp
+++ b/src/simdjson.cpp
@@ -1,23 +1,25 @@
 #include <map>
 #include "simdjson/simdjson.h"
 
+namespace simdjson {
 const std::map<int, const std::string> errorStrings = {
-    {simdjson::SUCCESS, "No errors"},
-    {simdjson::CAPACITY, "This ParsedJson can't support a document that big"},
-    {simdjson::MEMALLOC, "Error allocating memory, we're most likely out of memory"},
-    {simdjson::TAPE_ERROR, "Something went wrong while writing to the tape"},
-    {simdjson::STRING_ERROR, "Problem while parsing a string"},
-    {simdjson::T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"},
-    {simdjson::F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
-    {simdjson::N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
-    {simdjson::NUMBER_ERROR, "Problem while parsing a number"},
-    {simdjson::UTF8_ERROR, "The input is not valid UTF-8"},
-    {simdjson::UNITIALIZED, "Unitialized"},
-    {simdjson::EMPTY, "Empty"},
-    {simdjson::UNESCAPED_CHARS, "Within strings, some characters must be escapted, we found unescapted characters"},
-    {simdjson::UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson"},
+    {SUCCESS, "No errors"},
+    {CAPACITY, "This ParsedJson can't support a document that big"},
+    {MEMALLOC, "Error allocating memory, we're most likely out of memory"},
+    {TAPE_ERROR, "Something went wrong while writing to the tape"},
+    {STRING_ERROR, "Problem while parsing a string"},
+    {T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"},
+    {F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
+    {N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
+    {NUMBER_ERROR, "Problem while parsing a number"},
+    {UTF8_ERROR, "The input is not valid UTF-8"},
+    {UNITIALIZED, "Unitialized"},
+    {EMPTY, "Empty"},
+    {UNESCAPED_CHARS, "Within strings, some characters must be escapted, we found unescapted characters"},
+    {UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson"},
 };
 
-const std::string& simdjson::errorMsg(const int errorCode) {
+const std::string& errorMsg(const int errorCode) {
     return errorStrings.at(errorCode);
-}
\ No newline at end of file
+}
+}
diff --git a/src/stage2_build_tape.cpp b/src/stage2_build_tape.cpp
index 550449a2..9b844cdd 100644
--- a/src/stage2_build_tape.cpp
+++ b/src/stage2_build_tape.cpp
@@ -11,6 +11,7 @@
 #include <iostream>
 #define PATH_SEP '/'
 
+namespace simdjson {
 
 WARN_UNUSED
 really_inline bool is_valid_true_atom(const uint8_t *loc) {
@@ -82,7 +83,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
   uint32_t depth = 0; // could have an arbitrary starting depth
   pj.init(); // sets isvalid to false
   if(pj.bytecapacity < len) {
-      pj.errorcode = simdjson::CAPACITY;
+      pj.errorcode = CAPACITY;
       return pj.errorcode;
   }
 // this macro reads the next structural character, updating idx, i and c.
@@ -526,7 +527,7 @@ succeed:
   pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
 
   pj.isvalid  = true;
-  pj.errorcode = simdjson::SUCCESS;
+  pj.errorcode = SUCCESS;
   return pj.errorcode;
 fail:
   // we do not need the next line because this is done by pj.init(), pessimistically.
@@ -537,12 +538,12 @@ fail:
   // We could even trigger special code paths to assess what happened carefully,
   // all without any added cost.
   if (depth >= pj.depthcapacity) {
-    pj.errorcode = simdjson::DEPTH_ERROR;
+    pj.errorcode = DEPTH_ERROR;
     return pj.errorcode;
   }
   switch(c) {
     case '"':
-      pj.errorcode = simdjson::STRING_ERROR; 
+      pj.errorcode = STRING_ERROR; 
       return pj.errorcode;
     case '0':
     case '1':
@@ -555,24 +556,25 @@ fail:
     case '8':
     case '9': 
     case '-': 
-      pj.errorcode = simdjson::NUMBER_ERROR;
+      pj.errorcode = NUMBER_ERROR;
       return pj.errorcode;
     case 't':
-      pj.errorcode = simdjson::T_ATOM_ERROR;
+      pj.errorcode = T_ATOM_ERROR;
       return pj.errorcode;
     case 'n':
-      pj.errorcode = simdjson::N_ATOM_ERROR;
+      pj.errorcode = N_ATOM_ERROR;
       return pj.errorcode;
     case 'f':
-      pj.errorcode = simdjson::F_ATOM_ERROR;
+      pj.errorcode = F_ATOM_ERROR;
       return pj.errorcode;
     default: 
       break;
   }
-  pj.errorcode = simdjson::TAPE_ERROR;
+  pj.errorcode = TAPE_ERROR;
   return pj.errorcode; 
 }
 
 int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
   return unified_machine(reinterpret_cast<const uint8_t*>(buf), len, pj);
 }
+}
diff --git a/tests/allparserscheckfile.cpp b/tests/allparserscheckfile.cpp
index 1e7d83ca..d67cb96f 100644
--- a/tests/allparserscheckfile.cpp
+++ b/tests/allparserscheckfile.cpp
@@ -59,9 +59,9 @@ int main(int argc, char *argv[]) {
     exit(1);
   }
   const char *filename = argv[optind];
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cout << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
@@ -76,7 +76,7 @@ int main(int argc, char *argv[]) {
       std::cout << p.size() << " B ";
     std::cout << std::endl;
   }
-  ParsedJson pj;
+  simdjson::ParsedJson pj;
   size_t maxdepth = 1024 * 4;
   bool allocok = pj.allocateCapacity(p.size(), maxdepth);
   if (!allocok) {
diff --git a/tests/basictests.cpp b/tests/basictests.cpp
index fd2eff5e..10bd0732 100644
--- a/tests/basictests.cpp
+++ b/tests/basictests.cpp
@@ -39,7 +39,7 @@ bool skyprophet_test() {
     if (maxsize < s.size())
       maxsize = s.size();
   }
-  ParsedJson pj;
+  simdjson::ParsedJson pj;
   if (!pj.allocateCapacity(maxsize)) {
     printf("allocation failure in skyprophet_test\n");
     return false;
diff --git a/tests/jsoncheck.cpp b/tests/jsoncheck.cpp
index d3d1dfaa..8646a5fc 100644
--- a/tests/jsoncheck.cpp
+++ b/tests/jsoncheck.cpp
@@ -65,14 +65,14 @@ bool validate(const char *dirname) {
       } else {
         strcpy(fullpath + dirlen, name);
       }
-      padded_string p;
+      simdjson::padded_string p;
       try {
-        get_corpus(fullpath).swap(p);
+        simdjson::get_corpus(fullpath).swap(p);
       } catch (const std::exception &e) {
         std::cerr << "Could not load the file " << fullpath << std::endl;
         return EXIT_FAILURE;
       }
-      ParsedJson pj;
+      simdjson::ParsedJson pj;
       bool allocok = pj.allocateCapacity(p.size(), 1024);
       if (!allocok) {
         std::cerr << "can't allocate memory" << std::endl;
diff --git a/tests/numberparsingcheck.cpp b/tests/numberparsingcheck.cpp
index 3fcc22f0..8cbfee88 100644
--- a/tests/numberparsingcheck.cpp
+++ b/tests/numberparsingcheck.cpp
@@ -132,15 +132,15 @@ bool validate(const char *dirname) {
       } else {
         strcpy(fullpath + dirlen, name);
       }
-      padded_string p;
+      simdjson::padded_string p;
       try {
-        get_corpus(fullpath).swap(p);
+        simdjson::get_corpus(fullpath).swap(p);
       } catch (const std::exception &e) {
         std::cout << "Could not load the file " << fullpath << std::endl;
         return EXIT_FAILURE;
       }
       // terrible hack but just to get it working
-      ParsedJson pj;
+      simdjson::ParsedJson pj;
       bool allocok = pj.allocateCapacity(p.size(), 1024);
       if (!allocok) {
         std::cerr << "can't allocate memory" << std::endl;
diff --git a/tests/stringparsingcheck.cpp b/tests/stringparsingcheck.cpp
index 88e3521d..5beec83b 100644
--- a/tests/stringparsingcheck.cpp
+++ b/tests/stringparsingcheck.cpp
@@ -325,14 +325,14 @@ bool validate(const char *dirname) {
       } else {
         strcpy(fullpath + dirlen, name);
       }
-      padded_string p;
+      simdjson::padded_string p;
       try {
-        get_corpus(fullpath).swap(p);
+        simdjson::get_corpus(fullpath).swap(p);
       } catch (const std::exception &e) {
         std::cout << "Could not load the file " << fullpath << std::endl;
         return EXIT_FAILURE;
       }
-      ParsedJson pj;
+      simdjson::ParsedJson pj;
       bool allocok = pj.allocateCapacity(p.size(), 1024);
       if (!allocok) {
         std::cerr << "can't allocate memory" << std::endl;
diff --git a/tools/json2json.cpp b/tools/json2json.cpp
index a01a6d8e..ebe4183b 100644
--- a/tools/json2json.cpp
+++ b/tools/json2json.cpp
@@ -5,7 +5,7 @@
 #include "simdjson/jsonioutil.h"
 #include "simdjson/jsonparser.h"
 
-void compute_dump(ParsedJson::iterator &pjh) {
+void compute_dump(simdjson::ParsedJson::iterator &pjh) {
   if (pjh.is_object()) {
     std::cout << "{";
     if (pjh.down()) {
@@ -72,26 +72,26 @@ int main(int argc, char *argv[]) {
   if (optind + 1 < argc) {
     std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
   }
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cout << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
   }
-  ParsedJson pj;
+  simdjson::ParsedJson pj;
   bool allocok = pj.allocateCapacity(p.size(), 1024);
   if (!allocok) {
     std::cerr << "failed to allocate memory" << std::endl;
     return EXIT_FAILURE;
   }
-  int res = json_parse(p, pj); // do the parsing, return false on error
+  int res = simdjson::json_parse(p, pj); // do the parsing, return false on error
   if (res) {
     std::cerr << " Parsing failed. " << std::endl;
     return EXIT_FAILURE;
   }
   if (apidump) {
-    ParsedJson::iterator pjh(pj);
+    simdjson::ParsedJson::iterator pjh(pj);
     if (!pjh.isOk()) {
       std::cerr << " Could not iterate parsed result. " << std::endl;
       return EXIT_FAILURE;
diff --git a/tools/jsonstats.cpp b/tools/jsonstats.cpp
index 8e746209..ed826891 100644
--- a/tools/jsonstats.cpp
+++ b/tools/jsonstats.cpp
@@ -41,9 +41,9 @@ using stat_t = struct stat_s;
 
 
 
-stat_t simdjson_computestats(const padded_string &p) {
+stat_t simdjson_computestats(const simdjson::padded_string &p) {
   stat_t answer;
-  ParsedJson pj = build_parsed_json(p);
+  simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
   answer.valid = pj.isValid();
   if (!answer.valid) {
     return answer;
@@ -125,9 +125,9 @@ int main(int argc, char *argv[]) {
   if (myoptind + 1 < argc) {
     std::cerr << "warning: ignoring everything after " << argv[myoptind + 1] << std::endl;
   }
-  padded_string p;
+  simdjson::padded_string p;
   try {
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception &e) { // caught by reference to base
     std::cerr << "Could not load the file " << filename << std::endl;
     return EXIT_FAILURE;
diff --git a/tools/minify.cpp b/tools/minify.cpp
index 56263b78..b5940482 100644
--- a/tools/minify.cpp
+++ b/tools/minify.cpp
@@ -8,14 +8,14 @@ int main(int argc, char *argv[]) {
     std::cerr << "Usage: " << argv[0] << " <jsonfile>\n";
     exit(1);
   }
-  padded_string p;
+  simdjson::padded_string p;
   std::string filename = argv[argc - 1];
   try{
-    get_corpus(filename).swap(p);
+    simdjson::get_corpus(filename).swap(p);
   } catch (const std::exception& e) { 
         std::cout << "Could not load the file " << filename << std::endl;
         return EXIT_FAILURE;
   }
-  jsonminify(p, p.data());
+  simdjson::jsonminify(p, p.data());
   printf("%s",p.data());
 }

From 9a6a1461837baddb2726ec1b1f112fc11ba98a8f Mon Sep 17 00:00:00 2001
From: ioioioio <iodadi@gmail.com>
Date: Tue, 2 Jul 2019 15:32:48 -0400
Subject: [PATCH 2/4] Fixing amalgation.sh

---
 amalgamation.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/amalgamation.sh b/amalgamation.sh
index 86293c55..de1d3f79 100755
--- a/amalgamation.sh
+++ b/amalgamation.sh
@@ -100,6 +100,7 @@ cat <<< '
 #include <iostream>
 #include "simdjson.h"
 #include "simdjson.cpp"
+using namespace simdjson;
 int main(int argc, char *argv[]) {
   const char * filename = argv[1];
   padded_string p = get_corpus(filename);

From b335af8507d3054cf89665a44593c6172f391abb Mon Sep 17 00:00:00 2001
From: ioioioio <iodadi@gmail.com>
Date: Tue, 2 Jul 2019 16:24:56 -0400
Subject: [PATCH 3/4] Attemp to solve some singleheader's problems

---
 tests/singleheadertest.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/singleheadertest.cpp b/tests/singleheadertest.cpp
index 87e93369..b75ec8ae 100644
--- a/tests/singleheadertest.cpp
+++ b/tests/singleheadertest.cpp
@@ -1,6 +1,8 @@
 #include "../singleheader/simdjson.h"
 #include <iostream>
 
+using namespace simdjson;
+
 int main() {
   const char *filename = JSON_TEST_PATH;
   padded_string p = get_corpus(filename);
@@ -13,7 +15,7 @@ int main() {
   }
   const int res = json_parse(p, pj);
   if (res) {
-    std::cerr << simdjson::errorMsg(res) << std::endl;
+    std::cerr << errorMsg(res) << std::endl;
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;

From 40c098f78a0cf275aeb74e8ee927bf685f1172df Mon Sep 17 00:00:00 2001
From: ioioioio <iodadi@gmail.com>
Date: Tue, 2 Jul 2019 16:37:05 -0400
Subject: [PATCH 4/4] updating single header

---
 singleheader/amalgamation_demo.cpp |    3 +-
 singleheader/simdjson.cpp          | 1154 ++++-----------------
 singleheader/simdjson.h            | 1551 +++++++++++++++++++++++++---
 3 files changed, 1594 insertions(+), 1114 deletions(-)

diff --git a/singleheader/amalgamation_demo.cpp b/singleheader/amalgamation_demo.cpp
index 862b2520..9f7ae23c 100644
--- a/singleheader/amalgamation_demo.cpp
+++ b/singleheader/amalgamation_demo.cpp
@@ -1,8 +1,9 @@
-/* auto-generated on Thu May  9 20:55:13 EDT 2019. Do not edit! */
+/* auto-generated on Tue 02 Jul 2019 04:34:44 PM EDT. Do not edit! */
 
 #include <iostream>
 #include "simdjson.h"
 #include "simdjson.cpp"
+using namespace simdjson;
 int main(int argc, char *argv[]) {
   const char * filename = argv[1];
   padded_string p = get_corpus(filename);
diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp
index cd3ca432..f3857823 100644
--- a/singleheader/simdjson.cpp
+++ b/singleheader/simdjson.cpp
@@ -1,4 +1,4 @@
-/* auto-generated on Thu May  9 20:55:13 EDT 2019. Do not edit! */
+/* auto-generated on Tue 02 Jul 2019 04:34:44 PM EDT. Do not edit! */
 #include "simdjson.h"
 
 /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
@@ -6,10 +6,37 @@
 #include "dmalloc.h"
 #endif
 
+/* begin file src/simdjson.cpp */
+#include <map>
+
+namespace simdjson {
+const std::map<int, const std::string> errorStrings = {
+    {SUCCESS, "No errors"},
+    {CAPACITY, "This ParsedJson can't support a document that big"},
+    {MEMALLOC, "Error allocating memory, we're most likely out of memory"},
+    {TAPE_ERROR, "Something went wrong while writing to the tape"},
+    {STRING_ERROR, "Problem while parsing a string"},
+    {T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"},
+    {F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
+    {N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
+    {NUMBER_ERROR, "Problem while parsing a number"},
+    {UTF8_ERROR, "The input is not valid UTF-8"},
+    {UNITIALIZED, "Unitialized"},
+    {EMPTY, "Empty"},
+    {UNESCAPED_CHARS, "Within strings, some characters must be escapted, we found unescapted characters"},
+    {UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson"},
+};
+
+const std::string& errorMsg(const int errorCode) {
+    return errorStrings.at(errorCode);
+}
+}
+/* end file src/simdjson.cpp */
 /* begin file src/jsonioutil.cpp */
 #include <cstring>
 #include <cstdlib>
 
+namespace simdjson {
 char * allocate_padded_buffer(size_t length) {
     // we could do a simple malloc
     //return (char *) malloc(length + SIMDJSON_PADDING);
@@ -39,12 +66,14 @@ padded_string get_corpus(const std::string& filename) {
   }
   throw  std::runtime_error("could not load corpus");
 }
+}
 /* end file src/jsonioutil.cpp */
 /* begin file src/jsonminifier.cpp */
 #include <cstdint>
+
 #ifndef __AVX2__
 
-
+namespace simdjson {
 static uint8_t jump_table[256 * 3] = {
     0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
     1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
@@ -98,11 +127,11 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
   }
   return pos;
 }
-
+}
 #else
-
 #include <cstring>
 
+namespace simdjson {
 // a straightforward comparison of a mask against input.
 static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
                                             __m256i mask) {
@@ -288,7 +317,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
   *out = '\0';// NULL termination
   return out - initout;
 }
-
+}
 #endif
 /* end file src/jsonminifier.cpp */
 /* begin file src/jsonparser.cpp */
@@ -299,712 +328,71 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
 #include <unistd.h>
 #endif
 
-// parse a document found in buf, need to preallocate ParsedJson.
-WARN_UNUSED
-int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
-  if (pj.bytecapacity < len) {
-    return simdjson::CAPACITY;
-  }
-  bool reallocated = false;
-  if(reallocifneeded) {
-#ifdef ALLOW_SAME_PAGE_BUFFER_OVERRUN
-	  // realloc is needed if the end of the memory crosses a page
-#ifdef _MSC_VER
-	  SYSTEM_INFO sysInfo; 
-	  GetSystemInfo(&sysInfo); 
-	  long pagesize = sysInfo.dwPageSize;
+namespace simdjson {
+// Responsible to select the best json_parse implementation
+int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
+  // Versions for each implementation
+#ifdef __AVX2__
+  json_parse_functype* avx_implementation = &json_parse_implementation<instruction_set::avx2>;
+#endif
+#ifdef __SSE4_2__
+  // json_parse_functype* sse4_2_implementation = &json_parse_implementation<instruction_set::sse4_2>; // not implemented yet
+#endif
+#ifdef __ARM_NEON
+  json_parse_functype* neon_implementation = &json_parse_implementation<instruction_set::neon>;
+#endif
+
+  // Determining which implementation is the more suitable
+  // Should be done at runtime. Does not make any sense on preprocessor.
+#ifdef __AVX2__
+  instruction_set best_implementation = instruction_set::avx2;
+#elif defined (__SSE4_2__)
+  instruction_set best_implementation = instruction_set::sse4_2;
+#elif defined (__ARM_NEON)
+  instruction_set best_implementation = instruction_set::neon;
 #else
-    long pagesize = sysconf (_SC_PAGESIZE); 
+  instruction_set best_implementation = instruction_set::none;
 #endif
-  //////////////
-  // We want to check that buf + len - 1 and buf + len - 1 + SIMDJSON_PADDING
-  // are in the same page.
-  // That is, we want to check that  
-  // (buf + len - 1) / pagesize == (buf + len - 1 + SIMDJSON_PADDING) / pagesize
-  // That's true if (buf + len - 1) % pagesize + SIMDJSON_PADDING < pagesize.
-  ///////////
-	 if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) + SIMDJSON_PADDING < static_cast<uintptr_t>(pagesize) ) {
-#else // SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN
-     if(true) { // if not SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN, we always reallocate
+  
+  // Selecting the best implementation
+  switch (best_implementation) {
+#ifdef __AVX2__
+  case instruction_set::avx2 :
+    json_parse_ptr = avx_implementation;
+    break;
+#elif defined (__SSE4_2__)
+  /*case instruction_set::sse4_2 :
+    json_parse_ptr = sse4_2_implementation;
+    break;*/
+#elif defined (__ARM_NEON)
+  case instruction_set::neon :
+    json_parse_ptr = neon_implementation;
+    break;
 #endif
-	   const uint8_t *tmpbuf  = buf;
-       buf = (uint8_t *) allocate_padded_buffer(len);
-       if(buf == NULL) return simdjson::MEMALLOC;
-       memcpy((void*)buf,tmpbuf,len);
-       reallocated = true;
-     }
+  default :
+    std::cerr << "No implemented simd instruction set supported" << std::endl;
+    return simdjson::UNEXPECTED_ERROR;
   }
-  // find_structural_bits returns a boolean, not an int, we invert its result to keep consistent with res == 0 meaning success
-  int res = !find_structural_bits(buf, len, pj);
-  if (!res) {
-    res = unified_machine(buf, len, pj);
-  }
-  if(reallocated) { aligned_free((void*)buf);}
-  return res;
+
+  return json_parse_ptr(buf, len, pj, reallocifneeded);
 }
 
+json_parse_functype *json_parse_ptr = &json_parse_dispatch;
+
 WARN_UNUSED
 ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneeded) {
   ParsedJson pj;
   bool ok = pj.allocateCapacity(len);
   if(ok) {
-    int res = json_parse(buf, len, pj, reallocifneeded);
-    ok = res == simdjson::SUCCESS;
-    assert(ok == pj.isValid());
+    json_parse(buf, len, pj, reallocifneeded);
   } else {
     std::cerr << "failure during memory allocation " << std::endl;
   }
   return pj;
 }
-/* end file src/jsonparser.cpp */
+}/* end file src/jsonparser.cpp */
 /* begin file src/stage1_find_marks.cpp */
-#include <cassert>
-
-
-#ifdef __AVX2__
-
-#ifndef SIMDJSON_SKIPUTF8VALIDATION
-#define SIMDJSON_UTF8VALIDATE
-
-#endif
-#else
-// currently we don't UTF8 validate for ARM
-// also we assume that if you're not __AVX2__ 
-// you're ARM, which is a bit dumb. TODO: Fix...
-#include <arm_neon.h>
-#endif
-
-// It seems that many parsers do UTF-8 validation.
-// RapidJSON does not do it by default, but a flag
-// allows it.
-#ifdef SIMDJSON_UTF8VALIDATE
-#endif
-
-#define TRANSPOSE
-
-struct simd_input {
-#ifdef __AVX2__
-  __m256i lo;
-  __m256i hi;
-#elif defined(__ARM_NEON)
-#ifndef TRANSPOSE
-  uint8x16_t i0;
-  uint8x16_t i1;
-  uint8x16_t i2;
-  uint8x16_t i3;
-#else
-  uint8x16x4_t i;
-#endif
-#else
-#error "It's called SIMDjson for a reason, bro"
-#endif
-};
-
-really_inline simd_input fill_input(const uint8_t * ptr) {
-  struct simd_input in;
-#ifdef __AVX2__
-  in.lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0));
-  in.hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
-#elif defined(__ARM_NEON)
-#ifndef TRANSPOSE
-  in.i0 = vld1q_u8(ptr + 0);
-  in.i1 = vld1q_u8(ptr + 16);
-  in.i2 = vld1q_u8(ptr + 32);
-  in.i3 = vld1q_u8(ptr + 48);
-#else
-  in.i = vld4q_u8(ptr);
-#endif
-#endif
-  return in;
-}
-
-#ifdef SIMDJSON_UTF8VALIDATE
-really_inline void check_utf8(simd_input in,
-                              __m256i &has_error,
-                              struct avx_processed_utf_bytes &previous) {
-  __m256i highbit = _mm256_set1_epi8(0x80);
-  if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), highbit)) == 1) {
-    // it is ascii, we just check continuation
-    has_error = _mm256_or_si256(
-        _mm256_cmpgt_epi8(
-            previous.carried_continuations,
-            _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
-                             9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1)),
-        has_error);
-  } else {
-    // it is not ascii so we have to do heavy work
-    previous = avxcheckUTF8Bytes(in.lo, &previous, &has_error);
-    previous = avxcheckUTF8Bytes(in.hi, &previous, &has_error);
-  }
-}
-#endif
-
-#ifdef __ARM_NEON
-uint16_t neonmovemask(uint8x16_t input) {
-  const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
-                               0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
-  uint8x16_t minput = vandq_u8(input, bitmask);
-  uint8x16_t tmp = vpaddq_u8(minput, minput);
-  tmp = vpaddq_u8(tmp, tmp);
-  tmp = vpaddq_u8(tmp, tmp);
-  return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
-}
-
-really_inline
-uint64_t neonmovemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) {
-#ifndef TRANSPOSE
-  const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
-                               0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
-  uint8x16_t t0 = vandq_u8(p0, bitmask);
-  uint8x16_t t1 = vandq_u8(p1, bitmask);
-  uint8x16_t t2 = vandq_u8(p2, bitmask);
-  uint8x16_t t3 = vandq_u8(p3, bitmask);
-  uint8x16_t sum0 = vpaddq_u8(t0, t1);
-  uint8x16_t sum1 = vpaddq_u8(t2, t3);
-  sum0 = vpaddq_u8(sum0, sum1);
-  sum0 = vpaddq_u8(sum0, sum0);
-  return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
-#else
-  const uint8x16_t bitmask1 = { 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10,
-                                0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10};
-  const uint8x16_t bitmask2 = { 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20,
-                                0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20};
-  const uint8x16_t bitmask3 = { 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40,
-                                0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40};
-  const uint8x16_t bitmask4 = { 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80,
-                                0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80};
-#if 0
-  uint8x16_t t0 = vandq_u8(p0, bitmask1);
-  uint8x16_t t1 = vandq_u8(p1, bitmask2);
-  uint8x16_t t2 = vandq_u8(p2, bitmask3);
-  uint8x16_t t3 = vandq_u8(p3, bitmask4);
-  uint8x16_t tmp = vorrq_u8(vorrq_u8(t0, t1), vorrq_u8(t2, t3));
-#else
-  uint8x16_t t0 = vandq_u8(p0, bitmask1);
-  uint8x16_t t1 = vbslq_u8(bitmask2, p1, t0);
-  uint8x16_t t2 = vbslq_u8(bitmask3, p2, t1);
-  uint8x16_t tmp = vbslq_u8(bitmask4, p3, t2);
-#endif
-  uint8x16_t sum = vpaddq_u8(tmp, tmp);
-  return vgetq_lane_u64(vreinterpretq_u64_u8(sum), 0);
-#endif
-}
-#endif
-
-// a straightforward comparison of a mask against input. 5 uops; would be
-// cheaper in AVX512.
-really_inline uint64_t cmp_mask_against_input(simd_input in, uint8_t m) {
-#ifdef __AVX2__
-  const __m256i mask = _mm256_set1_epi8(m);
-  __m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask);
-  uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
-  __m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask);
-  uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
-  return res_0 | (res_1 << 32);
-#elif defined(__ARM_NEON)
-  const uint8x16_t mask = vmovq_n_u8(m); 
-  uint8x16_t cmp_res_0 = vceqq_u8(in.i.val[0], mask); 
-  uint8x16_t cmp_res_1 = vceqq_u8(in.i.val[1], mask); 
-  uint8x16_t cmp_res_2 = vceqq_u8(in.i.val[2], mask); 
-  uint8x16_t cmp_res_3 = vceqq_u8(in.i.val[3], mask); 
-  return neonmovemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
-#endif
-}
-
-// find all values less than or equal than the content of maxval (using unsigned arithmetic) 
-really_inline uint64_t unsigned_lteq_against_input(simd_input in, uint8_t m) {
-#ifdef __AVX2__
-  const __m256i maxval = _mm256_set1_epi8(m);
-  __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,in.lo),maxval);
-  uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
-  __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,in.hi),maxval);
-  uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
-  return res_0 | (res_1 << 32);
-#elif defined(__ARM_NEON)
-  const uint8x16_t mask = vmovq_n_u8(m); 
-  uint8x16_t cmp_res_0 = vcleq_u8(in.i.val[0], mask); 
-  uint8x16_t cmp_res_1 = vcleq_u8(in.i.val[1], mask); 
-  uint8x16_t cmp_res_2 = vcleq_u8(in.i.val[2], mask); 
-  uint8x16_t cmp_res_3 = vcleq_u8(in.i.val[3], mask); 
-  return neonmovemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
-#endif
-}
-
-// return a bitvector indicating where we have characters that end an odd-length
-// sequence of backslashes (and thus change the behavior of the next character
-// to follow). A even-length sequence of backslashes, and, for that matter, the
-// largest even-length prefix of our odd-length sequence of backslashes, simply
-// modify the behavior of the backslashes themselves.
-// We also update the prev_iter_ends_odd_backslash reference parameter to
-// indicate whether we end an iteration on an odd-length sequence of
-// backslashes, which modifies our subsequent search for odd-length
-// sequences of backslashes in an obvious way.
-really_inline uint64_t
-find_odd_backslash_sequences(simd_input in,
-                             uint64_t &prev_iter_ends_odd_backslash) {
-  const uint64_t even_bits = 0x5555555555555555ULL;
-  const uint64_t odd_bits = ~even_bits;
-  uint64_t bs_bits = cmp_mask_against_input(in, '\\');
-  uint64_t start_edges = bs_bits & ~(bs_bits << 1);
-  // flip lowest if we have an odd-length run at the end of the prior
-  // iteration
-  uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
-  uint64_t even_starts = start_edges & even_start_mask;
-  uint64_t odd_starts = start_edges & ~even_start_mask;
-  uint64_t even_carries = bs_bits + even_starts;
-
-  uint64_t odd_carries;
-  // must record the carry-out of our odd-carries out of bit 63; this
-  // indicates whether the sense of any edge going to the next iteration
-  // should be flipped
-  bool iter_ends_odd_backslash =
-      add_overflow(bs_bits, odd_starts, &odd_carries);
-
-  odd_carries |=
-      prev_iter_ends_odd_backslash;  // push in bit zero as a potential end
-                                     // if we had an odd-numbered run at the
-                                     // end of the previous iteration
-  prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
-  uint64_t even_carry_ends = even_carries & ~bs_bits;
-  uint64_t odd_carry_ends = odd_carries & ~bs_bits;
-  uint64_t even_start_odd_end = even_carry_ends & odd_bits;
-  uint64_t odd_start_even_end = odd_carry_ends & even_bits;
-  uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
-  return odd_ends;
-}
-
-// return both the quote mask (which is a half-open mask that covers the first
-// quote
-// in an unescaped quote pair and everything in the quote pair) and the quote
-// bits, which are the simple
-// unescaped quoted bits. We also update the prev_iter_inside_quote value to
-// tell the next iteration
-// whether we finished the final iteration inside a quote pair; if so, this
-// inverts our behavior of
-// whether we're inside quotes for the next iteration.
-// Note that we don't do any error checking to see if we have backslash
-// sequences outside quotes; these
-// backslash sequences (of any length) will be detected elsewhere.
-really_inline uint64_t find_quote_mask_and_bits(simd_input in, uint64_t odd_ends,
-    uint64_t &prev_iter_inside_quote, uint64_t &quote_bits, uint64_t &error_mask) {
-  quote_bits = cmp_mask_against_input(in, '"');
-  quote_bits = quote_bits & ~odd_ends;
-  // remove from the valid quoted region the unescapted characters.
-#ifdef __AVX2__
-  uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
-      _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
-#elif defined(__ARM_NEON)
-  uint64_t quote_mask = vmull_p64( -1ULL, quote_bits);
-#endif
-  quote_mask ^= prev_iter_inside_quote;
-  // All Unicode characters may be placed within the
-  // quotation marks, except for the characters that MUST be escaped:
-  // quotation mark, reverse solidus, and the control characters (U+0000
-  //through U+001F).
-  // https://tools.ietf.org/html/rfc8259
-  uint64_t unescaped = unsigned_lteq_against_input(in, 0x1F);
-  error_mask |= quote_mask & unescaped;
-  // right shift of a signed value expected to be well-defined and standard
-  // compliant as of C++20,
-  // John Regher from Utah U. says this is fine code
-  prev_iter_inside_quote =
-      static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63);
-  return quote_mask;
-}
-
-really_inline void find_whitespace_and_structurals(simd_input in,
-                                                   uint64_t &whitespace,
-                                                   uint64_t &structurals) {
-  // do a 'shufti' to detect structural JSON characters
-  // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
-  // these go into the first 3 buckets of the comparison (1/2/4)
-
-  // we are also interested in the four whitespace characters
-  // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
-  // these go into the next 2 buckets of the comparison (8/16)
-#ifdef __AVX2__
-  const __m256i low_nibble_mask = _mm256_setr_epi8(
-      16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 
-      16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
-  const __m256i high_nibble_mask = _mm256_setr_epi8(
-      8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 
-      8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
-
-  __m256i structural_shufti_mask = _mm256_set1_epi8(0x7);
-  __m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
-
-  __m256i v_lo = _mm256_and_si256(
-      _mm256_shuffle_epi8(low_nibble_mask, in.lo),
-      _mm256_shuffle_epi8(high_nibble_mask,
-                          _mm256_and_si256(_mm256_srli_epi32(in.lo, 4),
-                                           _mm256_set1_epi8(0x7f))));
-
-  __m256i v_hi = _mm256_and_si256(
-      _mm256_shuffle_epi8(low_nibble_mask, in.hi),
-      _mm256_shuffle_epi8(high_nibble_mask,
-                          _mm256_and_si256(_mm256_srli_epi32(in.hi, 4),
-                                           _mm256_set1_epi8(0x7f))));
-  __m256i tmp_lo = _mm256_cmpeq_epi8(
-      _mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
-  __m256i tmp_hi = _mm256_cmpeq_epi8(
-      _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
-
-  uint64_t structural_res_0 =
-      static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
-  uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
-  structurals = ~(structural_res_0 | (structural_res_1 << 32));
-
-  __m256i tmp_ws_lo = _mm256_cmpeq_epi8(
-      _mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
-  __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
-      _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
-
-  uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
-  uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
-  whitespace = ~(ws_res_0 | (ws_res_1 << 32));
-#elif defined(__ARM_NEON)
-#ifndef FUNKY_BAD_TABLE
-  const uint8x16_t low_nibble_mask = (uint8x16_t){ 
-      16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
-  const uint8x16_t high_nibble_mask = (uint8x16_t){ 
-      8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
-  const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7); 
-  const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18); 
-  const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf); 
-
-  uint8x16_t nib_0_lo = vandq_u8(in.i.val[0], low_nib_and_mask);
-  uint8x16_t nib_0_hi = vshrq_n_u8(in.i.val[0], 4);
-  uint8x16_t shuf_0_lo = vqtbl1q_u8(low_nibble_mask, nib_0_lo);
-  uint8x16_t shuf_0_hi = vqtbl1q_u8(high_nibble_mask, nib_0_hi);
-  uint8x16_t v_0 = vandq_u8(shuf_0_lo, shuf_0_hi);
-
-  uint8x16_t nib_1_lo = vandq_u8(in.i.val[1], low_nib_and_mask);
-  uint8x16_t nib_1_hi = vshrq_n_u8(in.i.val[1], 4);
-  uint8x16_t shuf_1_lo = vqtbl1q_u8(low_nibble_mask, nib_1_lo);
-  uint8x16_t shuf_1_hi = vqtbl1q_u8(high_nibble_mask, nib_1_hi);
-  uint8x16_t v_1 = vandq_u8(shuf_1_lo, shuf_1_hi);
-
-  uint8x16_t nib_2_lo = vandq_u8(in.i.val[2], low_nib_and_mask);
-  uint8x16_t nib_2_hi = vshrq_n_u8(in.i.val[2], 4);
-  uint8x16_t shuf_2_lo = vqtbl1q_u8(low_nibble_mask, nib_2_lo);
-  uint8x16_t shuf_2_hi = vqtbl1q_u8(high_nibble_mask, nib_2_hi);
-  uint8x16_t v_2 = vandq_u8(shuf_2_lo, shuf_2_hi);
-
-  uint8x16_t nib_3_lo = vandq_u8(in.i.val[3], low_nib_and_mask);
-  uint8x16_t nib_3_hi = vshrq_n_u8(in.i.val[3], 4);
-  uint8x16_t shuf_3_lo = vqtbl1q_u8(low_nibble_mask, nib_3_lo);
-  uint8x16_t shuf_3_hi = vqtbl1q_u8(high_nibble_mask, nib_3_hi);
-  uint8x16_t v_3 = vandq_u8(shuf_3_lo, shuf_3_hi);
-
-  uint8x16_t tmp_0 = vtstq_u8(v_0, structural_shufti_mask);
-  uint8x16_t tmp_1 = vtstq_u8(v_1, structural_shufti_mask);
-  uint8x16_t tmp_2 = vtstq_u8(v_2, structural_shufti_mask);
-  uint8x16_t tmp_3 = vtstq_u8(v_3, structural_shufti_mask);
-  structurals = neonmovemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
-
-  uint8x16_t tmp_ws_0 = vtstq_u8(v_0, whitespace_shufti_mask);
-  uint8x16_t tmp_ws_1 = vtstq_u8(v_1, whitespace_shufti_mask);
-  uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask);
-  uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
-  whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
-#else
-  // I think this one is garbage. In order to save the expense
-  // of another shuffle, I use an equally expensive shift, and 
-  // this gets glued to the end of the dependency chain. Seems a bit
-  // slower for no good reason.
-  //
-  // need to use a weird arrangement. Bytes in this bitvector
-  // are in conventional order, but bits are reversed as we are
-  // using a signed left shift (that is a +ve value from 0..7) to
-  // shift upwards to 0x80 in the bit. So we need to reverse bits.
-  
-  // note no structural/whitespace has the high bit on
-  // so it's OK to put the high 5 bits into our TBL shuffle
-  //
-
-  // structurals are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
-  // or in 5 bit, 3 bit form thats
-  // (15,3) (15, 5) (7,2) (11,3) (11,5) (5,4) 
-  // bit-reversing (subtract low 3 bits from 7) yields:
-  // (15,4) (15, 2) (7,5) (11,4) (11,2) (5,3) 
-  
-  const uint8x16_t structural_bitvec = (uint8x16_t){ 
-      0, 0, 0, 0, 
-      0, 8, 0, 32, 
-      0, 0, 0, 20, 
-      0, 0, 0, 20};
-  // we are also interested in the four whitespace characters
-  // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
-  // (4,0) (1, 2) (1, 1) (1, 5)
-  // bit-reversing (subtract low 3 bits from 7) yields:
-  // (4,7) (1, 5) (1, 6) (1, 2)
-  
-  const uint8x16_t whitespace_bitvec = (uint8x16_t){ 
-      0, 100, 0, 0, 
-      128, 0, 0, 0, 
-      0, 0, 0, 0, 
-      0, 0, 0, 0};
-  const uint8x16_t low_3bits_and_mask = vmovq_n_u8(0x7); 
-  const uint8x16_t high_1bit_tst_mask = vmovq_n_u8(0x80); 
-
-  int8x16_t low_3bits_0 = vreinterpretq_s8_u8(vandq_u8(in.i.val[0], low_3bits_and_mask));
-  uint8x16_t high_5bits_0 = vshrq_n_u8(in.i.val[0], 3);
-  uint8x16_t shuffle_structural_0 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_0), low_3bits_0);
-  uint8x16_t shuffle_ws_0 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_0), low_3bits_0);
-  uint8x16_t tmp_0 = vtstq_u8(shuffle_structural_0, high_1bit_tst_mask);
-  uint8x16_t tmp_ws_0 = vtstq_u8(shuffle_ws_0, high_1bit_tst_mask);
-
-  int8x16_t low_3bits_1 = vreinterpretq_s8_u8(vandq_u8(in.i.val[1], low_3bits_and_mask));
-  uint8x16_t high_5bits_1 = vshrq_n_u8(in.i.val[1], 3);
-  uint8x16_t shuffle_structural_1 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_1), low_3bits_1);
-  uint8x16_t shuffle_ws_1 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_1), low_3bits_1);
-  uint8x16_t tmp_1 = vtstq_u8(shuffle_structural_1, high_1bit_tst_mask);
-  uint8x16_t tmp_ws_1 = vtstq_u8(shuffle_ws_1, high_1bit_tst_mask);
-
-  int8x16_t low_3bits_2 = vreinterpretq_s8_u8(vandq_u8(in.i.val[2], low_3bits_and_mask));
-  uint8x16_t high_5bits_2 = vshrq_n_u8(in.i.val[2], 3);
-  uint8x16_t shuffle_structural_2 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_2), low_3bits_2);
-  uint8x16_t shuffle_ws_2 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_2), low_3bits_2);
-  uint8x16_t tmp_2 = vtstq_u8(shuffle_structural_2, high_1bit_tst_mask);
-  uint8x16_t tmp_ws_2 = vtstq_u8(shuffle_ws_2, high_1bit_tst_mask);
-
-  int8x16_t low_3bits_3 = vreinterpretq_s8_u8(vandq_u8(in.i.val[3], low_3bits_and_mask));
-  uint8x16_t high_5bits_3 = vshrq_n_u8(in.i.val[3], 3);
-  uint8x16_t shuffle_structural_3 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_3), low_3bits_3);
-  uint8x16_t shuffle_ws_3 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_3), low_3bits_3);
-  uint8x16_t tmp_3 = vtstq_u8(shuffle_structural_3, high_1bit_tst_mask);
-  uint8x16_t tmp_ws_3 = vtstq_u8(shuffle_ws_3, high_1bit_tst_mask);
-
-  structurals = neonmovemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
-  whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
-#endif
-
-#endif
-}
-
-// flatten out values in 'bits' assuming that they are are to have values of idx
-// plus their position in the bitvector, and store these indexes at
-// base_ptr[base] incrementing base as we go
-// will potentially store extra values beyond end of valid bits, so base_ptr
-// needs to be large enough to handle this
-really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
-                                uint32_t idx, uint64_t bits) {
-  uint32_t cnt = hamming(bits);
-  uint32_t next_base = base + cnt;
-  while (bits != 0u) {
-    base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(bits);
-    bits = bits & (bits - 1);
-    base += 8;
-  }
-  base = next_base;
-}
-
-// return a updated structural bit vector with quoted contents cleared out and
-// pseudo-structural characters added to the mask
-// updates prev_iter_ends_pseudo_pred which tells us whether the previous
-// iteration ended on a whitespace or a structural character (which means that
-// the next iteration
-// will have a pseudo-structural character at its start)
-really_inline uint64_t finalize_structurals(
-    uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
-    uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
-  // mask off anything inside quotes
-  structurals &= ~quote_mask;
-  // add the real quote bits back into our bitmask as well, so we can
-  // quickly traverse the strings we've spent all this trouble gathering
-  structurals |= quote_bits;
-  // Now, establish "pseudo-structural characters". These are non-whitespace
-  // characters that are (a) outside quotes and (b) have a predecessor that's
-  // either whitespace or a structural character. This means that subsequent
-  // passes will get a chance to encounter the first character of every string
-  // of non-whitespace and, if we're parsing an atom like true/false/null or a
-  // number we can stop at the first whitespace or structural character
-  // following it.
-
-  // a qualified predecessor is something that can happen 1 position before an
-  // psuedo-structural character
-  uint64_t pseudo_pred = structurals | whitespace;
-
-  uint64_t shifted_pseudo_pred =
-      (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
-  prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
-  uint64_t pseudo_structurals =
-      shifted_pseudo_pred & (~whitespace) & (~quote_mask);
-  structurals |= pseudo_structurals;
-
-  // now, we've used our close quotes all we need to. So let's switch them off
-  // they will be off in the quote mask and on in quote bits.
-  structurals &= ~(quote_bits & ~quote_mask);
-  return structurals;
-}
-
-WARN_UNUSED
-/*never_inline*/ bool find_structural_bits(const uint8_t *buf, size_t len,
-                                           ParsedJson &pj) {
-  if (len > pj.bytecapacity) {
-    std::cerr << "Your ParsedJson object only supports documents up to "
-         << pj.bytecapacity << " bytes but you are trying to process " << len
-         << " bytes" << std::endl;
-    return false;
-  }
-  uint32_t *base_ptr = pj.structural_indexes;
-  uint32_t base = 0;
-#ifdef SIMDJSON_UTF8VALIDATE
-  __m256i has_error = _mm256_setzero_si256();
-  struct avx_processed_utf_bytes previous {};
-  previous.rawbytes = _mm256_setzero_si256();
-  previous.high_nibbles = _mm256_setzero_si256();
-  previous.carried_continuations = _mm256_setzero_si256();
-#endif
-
-  // we have padded the input out to 64 byte multiple with the remainder being
-  // zeros
-
-  // persistent state across loop
-  // does the last iteration end with an odd-length sequence of backslashes? 
-  // either 0 or 1, but a 64-bit value
-  uint64_t prev_iter_ends_odd_backslash = 0ULL;
-  // does the previous iteration end inside a double-quote pair?
-  uint64_t prev_iter_inside_quote = 0ULL;  // either all zeros or all ones
-  // does the previous iteration end on something that is a predecessor of a
-  // pseudo-structural character - i.e. whitespace or a structural character
-  // effectively the very first char is considered to follow "whitespace" for
-  // the
-  // purposes of pseudo-structural character detection so we initialize to 1
-  uint64_t prev_iter_ends_pseudo_pred = 1ULL;
-
-  // structurals are persistent state across loop as we flatten them on the
-  // subsequent iteration into our array pointed to be base_ptr.
-  // This is harmless on the first iteration as structurals==0
-  // and is done for performance reasons; we can hide some of the latency of the
-  // expensive carryless multiply in the previous step with this work
-  uint64_t structurals = 0;
-
-  size_t lenminus64 = len < 64 ? 0 : len - 64;
-  size_t idx = 0;
-  uint64_t error_mask = 0; // for unescaped characters within strings (ASCII code points < 0x20)
-
-  for (; idx < lenminus64; idx += 64) {
-#ifndef _MSC_VER
-    __builtin_prefetch(buf + idx + 128);
-#endif
-    simd_input in = fill_input(buf+idx);
-#ifdef SIMDJSON_UTF8VALIDATE
-    check_utf8(in, has_error, previous);
-#endif
-    // detect odd sequences of backslashes
-    uint64_t odd_ends = find_odd_backslash_sequences(
-        in, prev_iter_ends_odd_backslash);
-
-    // detect insides of quote pairs ("quote_mask") and also our quote_bits
-    // themselves
-    uint64_t quote_bits;
-    uint64_t quote_mask = find_quote_mask_and_bits(
-        in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
-
-    // take the previous iterations structural bits, not our current iteration,
-    // and flatten
-    flatten_bits(base_ptr, base, idx, structurals);
-
-    uint64_t whitespace;
-    find_whitespace_and_structurals(in, whitespace, structurals);
-
-    // fixup structurals to reflect quotes and add pseudo-structural characters
-    structurals = finalize_structurals(structurals, whitespace, quote_mask,
-                                       quote_bits, prev_iter_ends_pseudo_pred);
-  }
-
-  ////////////////
-  /// we use a giant copy-paste which is ugly.
-  /// but otherwise the string needs to be properly padded or else we
-  /// risk invalidating the UTF-8 checks.
-  ////////////
-  if (idx < len) {
-    uint8_t tmpbuf[64];
-    memset(tmpbuf, 0x20, 64);
-    memcpy(tmpbuf, buf + idx, len - idx);
-    simd_input in = fill_input(tmpbuf);
-#ifdef SIMDJSON_UTF8VALIDATE
-    check_utf8(in, has_error, previous);
-#endif
-
-    // detect odd sequences of backslashes
-    uint64_t odd_ends = find_odd_backslash_sequences(
-        in, prev_iter_ends_odd_backslash);
-
-    // detect insides of quote pairs ("quote_mask") and also our quote_bits
-    // themselves
-    uint64_t quote_bits;
-    uint64_t quote_mask = find_quote_mask_and_bits(
-        in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
-
-    // take the previous iterations structural bits, not our current iteration,
-    // and flatten
-    flatten_bits(base_ptr, base, idx, structurals);
-
-    uint64_t whitespace;
-    find_whitespace_and_structurals(in, whitespace, structurals);
-
-    // fixup structurals to reflect quotes and add pseudo-structural characters
-    structurals = finalize_structurals(structurals, whitespace, quote_mask,
-                                       quote_bits, prev_iter_ends_pseudo_pred);
-    idx += 64;
-  }
-
-  // is last string quote closed?
-  if (prev_iter_inside_quote) {
-      return false;
-  }
-
-  // finally, flatten out the remaining structurals from the last iteration
-  flatten_bits(base_ptr, base, idx, structurals);
-
-  pj.n_structural_indexes = base;
-  // a valid JSON file cannot have zero structural indexes - we should have
-  // found something
-  if (pj.n_structural_indexes == 0u) {
-printf("wacky exit\n");
-    return false;
-  }
-  if (base_ptr[pj.n_structural_indexes - 1] > len) {
-    fprintf(stderr, "Internal bug\n");
-    return false;
-  }
-  if (len != base_ptr[pj.n_structural_indexes - 1]) {
-    // the string might not be NULL terminated, but we add a virtual NULL ending
-    // character.
-    base_ptr[pj.n_structural_indexes++] = len;
-  }
-  // make it safe to dereference one beyond this array
-  base_ptr[pj.n_structural_indexes] = 0;  
-  if (error_mask) {
-printf("had error mask\n");
-    return false;
-  }
-#ifdef SIMDJSON_UTF8VALIDATE
-  return _mm256_testz_si256(has_error, has_error) != 0;
-#else
-  return true;
-#endif
-}
-
-bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
-  return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
-}
+// File kept in case we want to reuse it soon. (many configuration files to edit)
 /* end file src/stage1_find_marks.cpp */
 /* begin file src/stage2_build_tape.cpp */
 #include <cassert>
@@ -1014,6 +402,7 @@ bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
 #include <iostream>
 #define PATH_SEP '/'
 
+namespace simdjson {
 
 WARN_UNUSED
 really_inline bool is_valid_true_atom(const uint8_t *loc) {
@@ -1073,16 +462,20 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
  * The JSON is parsed to a tape, see the accompanying tape.md file
  * for documentation.
  ***********/
-WARN_UNUSED  ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
+WARN_UNUSED  ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
 int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
+#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN
+  memset((uint8_t*)buf + len, 0, SIMDJSON_PADDING); // to please valgrind
+#endif
   uint32_t i = 0; // index of the structural character (0,1,2,3...)
   uint32_t idx;   // location of the structural character in the input (buf)
   uint8_t c; // used to track the (structural) character we are looking at, updated
         // by UPDATE_CHAR macro
   uint32_t depth = 0; // could have an arbitrary starting depth
-  pj.init();
+  pj.init(); // sets isvalid to false
   if(pj.bytecapacity < len) {
-      return simdjson::CAPACITY;
+      pj.errorcode = CAPACITY;
+      return pj.errorcode;
   }
 // this macro reads the next structural character, updating idx, i and c.
 #define UPDATE_CHAR()                                                          \
@@ -1103,7 +496,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
   // the root is used, if nothing else, to capture the size of the tape
   depth++; // everything starts at depth = 1, depth = 0 is just for the root, the root may contain an object, an array or something else.
   if (depth >= pj.depthcapacity) {
-    return simdjson::DEPTH_ERROR;
+    goto fail;
   }
 
   UPDATE_CHAR();
@@ -1117,7 +510,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
 #endif
     depth++;
     if (depth >= pj.depthcapacity) {
-      return simdjson::DEPTH_ERROR;
+      goto fail;
     }
     pj.write_tape(0, c); // strangely, moving this to object_begin slows things down
     goto object_begin;
@@ -1130,7 +523,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
 #endif    
     depth++;
     if (depth >= pj.depthcapacity) {
-      return simdjson::DEPTH_ERROR;
+      goto fail;
     }
     pj.write_tape(0, c);
     goto array_begin;
@@ -1149,14 +542,15 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
     break;
   }
   case 't': {
-    // we need to make a copy to make sure that the string is NULL terminated.
+    // we need to make a copy to make sure that the string is space terminated.
     // this only applies to the JSON document made solely of the true value.
     // this will almost never be called in practice
     char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if(copy == nullptr) { goto fail;
-}
+    if(copy == nullptr) { 
+      goto fail;
+    }
     memcpy(copy, buf, len);
-    copy[len] = '\0';
+    copy[len] = ' ';
     if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
       free(copy);
       goto fail;
@@ -1166,14 +560,15 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
     break;
   }
   case 'f': {
-    // we need to make a copy to make sure that the string is NULL terminated.
+    // we need to make a copy to make sure that the string is space terminated.
     // this only applies to the JSON document made solely of the false value.
     // this will almost never be called in practice
     char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if(copy == nullptr) { goto fail;
-}
+    if(copy == nullptr) { 
+      goto fail;
+    }
     memcpy(copy, buf, len);
-    copy[len] = '\0';
+    copy[len] = ' ';
     if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
       free(copy);
       goto fail;
@@ -1183,14 +578,15 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
     break;
   }
   case 'n': {
-    // we need to make a copy to make sure that the string is NULL terminated.
+    // we need to make a copy to make sure that the string is space terminated.
     // this only applies to the JSON document made solely of the null value.
     // this will almost never be called in practice
     char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if(copy == nullptr) { goto fail;
-}
+    if(copy == nullptr) { 
+      goto fail;
+    }
     memcpy(copy, buf, len);
-    copy[len] = '\0';
+    copy[len] = ' ';
     if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
       free(copy);
       goto fail;
@@ -1209,14 +605,17 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
   case '7':
   case '8':
   case '9': {
-    // we need to make a copy to make sure that the string is NULL terminated.
+    // we need to make a copy to make sure that the string is space terminated.
     // this is done only for JSON documents made of a sole number
-    // this will almost never be called in practice
+    // this will almost never be called in practice. We terminate with a space
+    // because we do not want to allow NULLs in the middle of a number (whereas a
+    // space in the middle of a number would be identified in stage 1).
     char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if(copy == nullptr) { goto fail;
-}
+    if(copy == nullptr) { 
+      goto fail;
+    }
     memcpy(copy, buf, len);
-    copy[len] = '\0';
+    copy[len] = ' ';
     if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
       free(copy);
       goto fail;
@@ -1229,8 +628,9 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
     // this is done only for JSON documents made of a sole number
     // this will almost never be called in practice
     char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if(copy == nullptr) { goto fail;
-}
+    if(copy == nullptr) { 
+      goto fail;
+    }
     memcpy(copy, buf, len);
     copy[len] = '\0';
     if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
@@ -1277,7 +677,7 @@ object_key_state:
   switch (c) {
   case '"': {
     if (!parse_string(buf, len, pj, depth, idx)) {
-      goto fail;
+      goto fail; 
     }
     break;
   }
@@ -1332,7 +732,7 @@ object_key_state:
     // we found an object inside an object, so we need to increment the depth
     depth++;
     if (depth >= pj.depthcapacity) {
-      return simdjson::DEPTH_ERROR;
+      goto fail;
     }
 
     goto object_begin;
@@ -1349,7 +749,7 @@ object_key_state:
     // we found an array inside an object, so we need to increment the depth
     depth++;
     if (depth >= pj.depthcapacity) {
-      return simdjson::DEPTH_ERROR;
+      goto fail;
     }
     goto array_begin;
   }
@@ -1366,7 +766,7 @@ object_continue:
       goto fail;
     } else {
       if (!parse_string(buf, len, pj, depth, idx)) {
-        goto fail;
+        goto fail; 
       }
       goto object_key_state;
     }
@@ -1464,7 +864,7 @@ main_array_switch:
     // we found an object inside an array, so we need to increment the depth
     depth++;
     if (depth >= pj.depthcapacity) {
-      return simdjson::DEPTH_ERROR;
+      goto fail;
     }
 
     goto object_begin;
@@ -1481,7 +881,7 @@ main_array_switch:
     // we found an array inside an array, so we need to increment the depth
     depth++;
     if (depth >= pj.depthcapacity) {
-      return simdjson::DEPTH_ERROR;
+      goto fail;
     }
     goto array_begin;
   }
@@ -1517,21 +917,62 @@ succeed:
                           pj.get_current_loc());
   pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
 
-
-
   pj.isvalid  = true;
-  return simdjson::SUCCESS;
-
+  pj.errorcode = SUCCESS;
+  return pj.errorcode;
 fail:
-  return simdjson::TAPE_ERROR;
+  // we do not need the next line because this is done by pj.init(), pessimistically.
+  // pj.isvalid  = false;
+  // At this point in the code, we have all the time in the world.
+  // Note that we know exactly where we are in the document so we could,
+  // without any overhead on the processing code, report a specific location.
+  // We could even trigger special code paths to assess what happened carefully,
+  // all without any added cost.
+  if (depth >= pj.depthcapacity) {
+    pj.errorcode = DEPTH_ERROR;
+    return pj.errorcode;
+  }
+  switch(c) {
+    case '"':
+      pj.errorcode = STRING_ERROR; 
+      return pj.errorcode;
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9': 
+    case '-': 
+      pj.errorcode = NUMBER_ERROR;
+      return pj.errorcode;
+    case 't':
+      pj.errorcode = T_ATOM_ERROR;
+      return pj.errorcode;
+    case 'n':
+      pj.errorcode = N_ATOM_ERROR;
+      return pj.errorcode;
+    case 'f':
+      pj.errorcode = F_ATOM_ERROR;
+      return pj.errorcode;
+    default: 
+      break;
+  }
+  pj.errorcode = TAPE_ERROR;
+  return pj.errorcode; 
 }
 
 int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
   return unified_machine(reinterpret_cast<const uint8_t*>(buf), len, pj);
 }
+}
 /* end file src/stage2_build_tape.cpp */
 /* begin file src/parsedjson.cpp */
 
+namespace simdjson {
 ParsedJson::ParsedJson() : 
         structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
         ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
@@ -1606,7 +1047,13 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
 
       return false;
     }
-
+    /*
+    // We do not need to initialize this content for parsing, though we could
+    // need to initialize it for safety.
+    memset(string_buf, 0 , localstringcapacity); 
+    memset(structural_indexes, 0, max_structures * sizeof(uint32_t)); 
+    memset(tape, 0, localtapecapacity * sizeof(uint64_t)); 
+    */
     bytecapacity = len;
     depthcapacity = maxdepth;
     tapecapacity = localtapecapacity;
@@ -1618,6 +1065,14 @@ bool ParsedJson::isValid() const {
     return isvalid;
 }
 
+int ParsedJson::getErrorCode() const {
+    return errorcode;
+}
+
+std::string ParsedJson::getErrorMsg() const {
+  return errorMsg(errorcode);
+}
+
 void ParsedJson::deallocate() {
     bytecapacity = 0;
     depthcapacity = 0;
@@ -1836,10 +1291,12 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
     os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
     return true;
 }
+}
 /* end file src/parsedjson.cpp */
 /* begin file src/parsedjsoniterator.cpp */
 #include <iterator>
 
+namespace simdjson {
 ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
         if(!pj.isValid()) {
             throw InvalidJSON();
@@ -1889,240 +1346,6 @@ ParsedJson::iterator::iterator(iterator &&o):
         o.depthindex = nullptr;// we take ownership
 }
 
-WARN_UNUSED
-bool ParsedJson::iterator::isOk() const {
-      return location < tape_length;
-}
-
-// useful for debuging purposes
-size_t ParsedJson::iterator::get_tape_location() const {
-    return location;
-}
-
-// useful for debuging purposes
-size_t ParsedJson::iterator::get_tape_length() const {
-    return tape_length;
-}
-
-// returns the current depth (start at 1 with 0 reserved for the fictitious root node)
-size_t ParsedJson::iterator::get_depth() const {
-    return depth;
-}
-
-// A scope is a series of nodes at the same depth, typically it is either an object ({) or an array ([).
-// The root node has type 'r'.
-uint8_t ParsedJson::iterator::get_scope_type() const {
-    return depthindex[depth].scope_type;
-}
-
-bool ParsedJson::iterator::move_forward() {
-    if(location + 1 >= tape_length) {
-        return false; // we are at the end!
-    }
-
-    if ((current_type == '[') || (current_type == '{')){
-        // We are entering a new scope
-        depth++;
-        depthindex[depth].start_of_scope = location;
-        depthindex[depth].scope_type = current_type;
-    } else if ((current_type == ']') || (current_type == '}')) {
-        // Leaving a scope.
-        depth--;
-        if(depth == 0) {
-            // Should not be necessary
-            return false;
-        }
-    } else if ((current_type == 'd') || (current_type == 'l')) {
-        // d and l types use 2 locations on the tape, not just one.
-        location += 1;
-    }
-
-    location += 1;
-    current_val = pj.tape[location];
-    current_type = (current_val >> 56);
-    return true;
-}
-
-uint8_t ParsedJson::iterator::get_type()  const {
-    return current_type;
-}
-
-
-int64_t ParsedJson::iterator::get_integer()  const {
-    if(location + 1 >= tape_length) { 
-      return 0;// default value in case of error
-    }
-    return static_cast<int64_t>(pj.tape[location + 1]);
-}
-
-double ParsedJson::iterator::get_double()  const {
-    if(location + 1 >= tape_length) { 
-      return NAN;// default value in case of error
-    }
-    double answer;
-    memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
-    return answer;
-}
-
-const char * ParsedJson::iterator::get_string() const {
-   return  reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK) + sizeof(uint32_t)) ;
-}
-
-
-uint32_t ParsedJson::iterator::get_string_length() const {
-    uint32_t answer;
-    memcpy(&answer, reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)), sizeof(uint32_t));
-    return answer;
-}
-
-bool ParsedJson::iterator::is_object_or_array() const {
-    return is_object_or_array(get_type());
-}
-
-bool ParsedJson::iterator::is_object() const {
-    return get_type() == '{';
-}
-
-bool ParsedJson::iterator::is_array() const {
-    return get_type() == '[';
-}
-
-bool ParsedJson::iterator::is_string() const {
-    return get_type() == '"';
-}
-
-bool ParsedJson::iterator::is_integer() const {
-    return get_type() == 'l';
-}
-
-bool ParsedJson::iterator::is_double() const {
-    return get_type() == 'd';
-}
-
-bool ParsedJson::iterator::is_true() const {
-    return get_type() == 't';
-}
-
-bool ParsedJson::iterator::is_false() const {
-    return get_type() == 'f';
-}
-
-bool ParsedJson::iterator::is_null() const {
-    return get_type() == 'n';
-}
-
-bool ParsedJson::iterator::is_object_or_array(uint8_t type) {
-    return (type == '[' || (type == '{'));
-}
-
-bool ParsedJson::iterator::move_to_key(const char * key) {
-    if(down()) {
-      do {
-        assert(is_string());
-        bool rightkey = (strcmp(get_string(),key)==0);// null chars would fool this
-        next();
-        if(rightkey) { 
-          return true;
-        }
-      } while(next());
-      assert(up());// not found
-    }
-    return false;
-}
-
-
- bool ParsedJson::iterator::next() {
-    if ((current_type == '[') || (current_type == '{')){
-    // we need to jump
-    size_t npos = ( current_val & JSONVALUEMASK);
-    if(npos >= tape_length) {
-        return false; // shoud never happen unless at the root
-    }
-    uint64_t nextval = pj.tape[npos];
-    uint8_t nexttype = (nextval >> 56);
-    if((nexttype == ']') || (nexttype == '}')) {
-        return false; // we reached the end of the scope
-    }
-    location = npos;
-    current_val = nextval;
-    current_type = nexttype;
-    return true;
-    } 
-    size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
-    if(location + increment >= tape_length) { return false;
-}
-    uint64_t nextval = pj.tape[location + increment];
-    uint8_t nexttype = (nextval >> 56);
-    if((nexttype == ']') || (nexttype == '}')) {
-        return false; // we reached the end of the scope
-    }
-    location = location + increment;
-    current_val = nextval;
-    current_type = nexttype;
-    return true;
-    
-}
-
-
- bool ParsedJson::iterator::prev() {
-    if(location - 1 < depthindex[depth].start_of_scope) { return false;
-}
-    location -= 1;
-    current_val = pj.tape[location];
-    current_type = (current_val >> 56);
-    if ((current_type == ']') || (current_type == '}')){
-    // we need to jump
-    size_t new_location = ( current_val & JSONVALUEMASK);
-    if(new_location < depthindex[depth].start_of_scope) {
-        return false; // shoud never happen
-    }
-    location = new_location;
-    current_val = pj.tape[location];
-    current_type = (current_val >> 56);
-    }
-    return true;
-}
-
-
- bool ParsedJson::iterator::up() {
-    if(depth == 1) {
-    return false; // don't allow moving back to root
-    }
-    to_start_scope();
-    // next we just move to the previous value
-    depth--;
-    location -= 1;
-    current_val = pj.tape[location];
-    current_type = (current_val >> 56);
-    return true;
-}
-
-
- bool ParsedJson::iterator::down() {
-    if(location + 1 >= tape_length) { return false;
-}
-    if ((current_type == '[') || (current_type == '{')) {
-    size_t npos = (current_val & JSONVALUEMASK);
-    if(npos == location + 2) {
-        return false; // we have an empty scope
-    }
-    depth++;
-    location = location + 1;
-    depthindex[depth].start_of_scope = location;
-    depthindex[depth].scope_type = current_type;
-    current_val = pj.tape[location];
-    current_type = (current_val >> 56);
-    return true;
-    }
-    return false;
-}
-
-void ParsedJson::iterator::to_start_scope()  {
-    location = depthindex[depth].start_of_scope;
-    current_val = pj.tape[location];
-    current_type = (current_val >> 56);
-}
-
 bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
     if(!isOk()) { 
       return false;
@@ -2164,4 +1387,5 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
     }
     return true;
 }
+}
 /* end file src/parsedjsoniterator.cpp */
diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h
index 9b385f93..f646e12b 100644
--- a/singleheader/simdjson.h
+++ b/singleheader/simdjson.h
@@ -1,14 +1,16 @@
-/* auto-generated on Thu May  9 20:55:13 EDT 2019. Do not edit! */
+/* auto-generated on Tue 02 Jul 2019 04:34:44 PM EDT. Do not edit! */
 /* begin file include/simdjson/simdjson_version.h */
 // /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand 
 #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 #define SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 #define SIMDJSON_VERSION 0.1.2 
+namespace simdjson {
 enum { 
     SIMDJSON_VERSION_MAJOR = 0,  
     SIMDJSON_VERSION_MINOR = 1,  
     SIMDJSON_VERSION_REVISION = 2  
 }; 
+}
 #endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION 
 /* end file include/simdjson/simdjson_version.h */
 /* begin file include/simdjson/simdjson.h */
@@ -17,17 +19,47 @@ enum {
 
 #include <string>
 
-struct simdjson {
-  enum errorValues {
-    SUCCESS = 0,
-    CAPACITY, // This ParsedJson can't support a document that big
-    MEMALLOC, // Error allocating memory, most likely out of memory
-    TAPE_ERROR, // Something went wrong while writing to the tape
-    DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
-  };
-  static const std::string& errorMsg(const int);
+namespace simdjson {
+enum class instruction_set {
+  avx2,
+  sse4_2,
+  neon,
+  none,
+// the 'native' enum class value should point at a good default on the current machine
+#ifdef __AVX2__
+  native = avx2
+#elif defined(__ARM_NEON)
+  native = neon
+#else
+  // Let us assume that we have an old x64 processor, but one that has SSE (i.e., something
+  // that came out in the second decade of the XXIst century.
+  // It would be nicer to check explicitly, but there many not be a good way to do so
+  // that is cross-platform.
+  // Under Visual Studio, there is no way to check for SSE4.2 support at compile-time.
+  native = sse4_2
+#endif
 };
 
+enum errorValues {
+  SUCCESS = 0,
+  CAPACITY, // This ParsedJson can't support a document that big
+  MEMALLOC, // Error allocating memory, most likely out of memory
+  TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this is a generic error
+  DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
+  STRING_ERROR, // Problem while parsing a string
+  T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
+  F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
+  N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
+  NUMBER_ERROR, // Problem while parsing a number
+  UTF8_ERROR, // the input is not valid UTF-8
+  UNITIALIZED, // unknown error, or uninitialized document
+  EMPTY, // no structural document found
+  UNESCAPED_CHARS, // found unescaped characters in a string.
+  UNCLOSED_STRING, // missing quote at the end
+  UNEXPECTED_ERROR // indicative of a bug in simdjson
+};
+const std::string& errorMsg(const int);
+}
 #endif
 /* end file include/simdjson/simdjson.h */
 /* begin file include/simdjson/portability.h */
@@ -40,6 +72,7 @@ struct simdjson {
 #include <iso646.h>
 #include <cstdint>
 
+namespace simdjson {
 static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
 	return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
 }
@@ -52,11 +85,11 @@ static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *resu
 }
 
 static inline int trailingzeroes(uint64_t input_num) {
-    return _tzcnt_u64(input_num);
+    return static_cast<int>(_tzcnt_u64(input_num));
 }
 
 static inline int leadingzeroes(uint64_t  input_num) {
-    return _lzcnt_u64(input_num);
+    return static_cast<int>(_lzcnt_u64(input_num));
 }
 
 static inline int hamming(uint64_t input_num) {
@@ -67,7 +100,7 @@ static inline int hamming(uint64_t input_num) {
 		__popcnt((uint32_t)(input_num >> 32)));
 #endif
 }
-
+}
 #else
 #include <cstdint>
 #include <cstdlib>
@@ -75,7 +108,7 @@ static inline int hamming(uint64_t input_num) {
 #if defined(__BMI2__) || defined(__POPCOUNT__) || defined(__AVX2__)
 #include <x86intrin.h>
 #endif
-
+namespace simdjson {
 static inline bool add_overflow(uint64_t  value1, uint64_t  value2, uint64_t *result) {
 	return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
 }
@@ -109,10 +142,11 @@ static inline int hamming(uint64_t input_num) {
 	return __builtin_popcountll(input_num);
 #endif
 }
-
+}
 #endif // _MSC_VER
 
 
+namespace simdjson {
 // portable version of  posix_memalign
 static inline void *aligned_malloc(size_t alignment, size_t size) {
 	void *p;
@@ -172,6 +206,7 @@ static inline void aligned_free(void *memblock) {
 static inline void aligned_free_char(char *memblock) {
 	aligned_free((void*)memblock);
 }
+}
 
 #endif // SIMDJSON_PORTABILITY_H
 /* end file include/simdjson/portability.h */
@@ -208,8 +243,6 @@ static inline void aligned_free_char(char *memblock) {
 #define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
 
 #ifdef _MSC_VER
-// Visual Studio won't allow it:
-//#define ALLOW_SAME_PAGE_BUFFER_OVERRUN
 #define really_inline inline
 #define never_inline __declspec(noinline)
 
@@ -225,10 +258,11 @@ static inline void aligned_free_char(char *memblock) {
 
 #else
 
-// for non-Visual Studio compilers, we assume that same-page buffer overrun is fine:
-#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN
-#define ALLOW_SAME_PAGE_BUFFER_OVERRUN
-#endif 
+// For non-Visual Studio compilers, we may assume that same-page buffer overrun is fine.
+// However, it will make it difficult to be "valgrind clean".
+//#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN
+//#define ALLOW_SAME_PAGE_BUFFER_OVERRUN
+//#endif 
 
 // The following is likely unnecessarily complex.
 #ifdef __SANITIZE_ADDRESS__
@@ -241,6 +275,12 @@ static inline void aligned_free_char(char *memblock) {
 #  endif 
 #endif 
 
+#if defined(__has_feature)
+#  if (__has_feature(memory_sanitizer))
+#define LENIENT_MEM_SANITIZER __attribute__((no_sanitize("memory")))
+#  endif
+#endif
+
 #define really_inline inline __attribute__((always_inline, unused))
 #define never_inline inline __attribute__((noinline, unused))
 
@@ -260,6 +300,10 @@ static inline void aligned_free_char(char *memblock) {
 #ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
 #define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
 #endif
+#ifndef LENIENT_MEM_SANITIZER
+#define LENIENT_MEM_SANITIZER
+#endif
+
 #endif // SIMDJSON_COMMON_DEFS_H
 /* end file include/simdjson/common_defs.h */
 /* begin file include/simdjson/padded_string.h */
@@ -267,6 +311,8 @@ static inline void aligned_free_char(char *memblock) {
 #define SIMDJSON_PADDING_STRING_H
 #include <memory>
 #include <cstring>
+
+namespace simdjson {
 // low-level function to allocate memory with padding so we can read passed the
 // "length" bytes safely. if you must provide a pointer to some data, create it
 // with this function: length is the max. size in bytes of the string caller is
@@ -327,6 +373,7 @@ private:
   size_t viable_size;
   char *data_ptr;
 };
+}
 
 #endif
 /* end file include/simdjson/padded_string.h */
@@ -335,6 +382,7 @@ private:
 #define SIMDJSON_JSONCHARUTILS_H
 
 
+namespace simdjson {
 // structural chars here are
 // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
 // we are also interested in the four whitespace characters
@@ -342,7 +390,7 @@ private:
 
 // these are the chars that can follow a true/false/null or number atom
 // and nothing else
-const uint32_t structural_or_whitespace_negated[256] = {
+const uint32_t structural_or_whitespace_or_null_negated[256] = {
     0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
@@ -359,13 +407,37 @@ const uint32_t structural_or_whitespace_negated[256] = {
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
 
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+really_inline uint32_t is_not_structural_or_whitespace_or_null(uint8_t c) {
+  return structural_or_whitespace_or_null_negated[c];
+}
+
+
+const uint32_t structural_or_whitespace_negated[256] = {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
 // return non-zero if not a structural or whitespace char
 // zero otherwise
 really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
   return structural_or_whitespace_negated[c];
 }
 
-const uint32_t structural_or_whitespace[256] = {
+const uint32_t structural_or_whitespace_or_null[256] = {
     1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -378,6 +450,24 @@ const uint32_t structural_or_whitespace[256] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
+really_inline uint32_t is_structural_or_whitespace_or_null(uint8_t c) {
+  return structural_or_whitespace_or_null[c];
+}
+
+
+const uint32_t structural_or_whitespace[256] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
 really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
   return structural_or_whitespace[c];
 }
@@ -582,6 +672,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
   // will return 0 when the code point was too large.
   return 0; // bad r
 }
+}
 
 #endif
 /* end file include/simdjson/jsoncharutils.h */
@@ -593,6 +684,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
 #include <iomanip>
 #include <iostream>
 
+namespace simdjson {
 // ends with zero char
 static inline void print_with_escapes(const unsigned char *src) {
   while (*src) {
@@ -783,6 +875,7 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
                                       size_t len) {
   print_with_escapes(reinterpret_cast<const unsigned char *>(src), os, len);
 }
+}
 
 #
 #endif
@@ -800,7 +893,7 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
 
 
 
-
+namespace simdjson {
 
 // load a file in memory...
 // get a corpus; pad out to cache line so we can always use SIMD
@@ -817,7 +910,7 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
 //        std::cout << "Could not load the file " << filename << std::endl;
 //      }
 padded_string get_corpus(const std::string& filename);
-
+}
 
 #endif
 /* end file include/simdjson/jsonioutil.h */
@@ -826,8 +919,9 @@ padded_string get_corpus(const std::string& filename);
 #define SIMDJSON_SIMDPRUNE_TABLES_H
 
 
-#ifdef __AVX__
 
+namespace simdjson {
+#ifdef __AVX__
 static const unsigned char mask128_epi8[] = {
     0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
     0xf, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
@@ -35807,13 +35901,13 @@ static const unsigned char mask128_epi32[] = {
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff,
 };
-
+}
 #endif //__SSE3__
 
 #ifdef __AVX2__
 
 #include <cstdint>
-
+namespace simdjson {
 static const uint32_t mask256_epi32[] = {
     0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,
     3, 4, 5, 6, 7, 7, 7, 0, 1, 3, 4, 5, 6, 7, 7, 1, 3, 4, 5, 6, 7, 7, 7, 0, 3,
@@ -35897,6 +35991,7 @@ static const uint32_t mask256_epi32[] = {
     3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2,
     0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+}
 #endif //__AVX2__
 
 #endif
@@ -35929,9 +36024,8 @@ static const uint32_t mask256_epi32[] = {
 
 // all byte values must be no larger than 0xF4
 
-
+namespace simdjson {
 #ifdef __AVX2__
-
 /*****************************/
 static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
   return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
@@ -36097,6 +36191,7 @@ avxcheckUTF8Bytes(__m256i current_bytes,
 #else // __AVX2__
 #warning "We require AVX2 support!"
 #endif // __AVX2__
+}
 #endif
 /* end file include/simdjson/simdutf8check.h */
 /* begin file include/simdjson/jsonminifier.h */
@@ -36106,6 +36201,7 @@ avxcheckUTF8Bytes(__m256i current_bytes,
 #include <cstddef>
 #include <cstdint>
 
+namespace simdjson {
 // Take input from buf and remove useless whitespace, write it to out; buf and
 // out can be the same pointer. Result is null terminated,
 // return the string length (minus the null termination).
@@ -36124,7 +36220,7 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
 static inline size_t jsonminify(const padded_string & p, char *out) {
     return jsonminify(p.data(), p.size(), out);
 }
-
+}
 #endif
 /* end file include/simdjson/jsonminifier.h */
 /* begin file include/simdjson/parsedjson.h */
@@ -36137,12 +36233,11 @@ static inline size_t jsonminify(const padded_string & p, char *out) {
 #include <iomanip>
 #include <iostream>
 
-
 #define JSONVALUEMASK 0xFFFFFFFFFFFFFF
 
 #define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
 
-
+namespace simdjson {
 /************
  * The JSON is parsed to a tape, see the accompanying tape.md file
  * for documentation.
@@ -36161,8 +36256,16 @@ public:
   WARN_UNUSED
   bool allocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH);
 
+  // returns true if the document parsed was valid
   bool isValid() const;
 
+  // return an error code corresponding to the last parsing attempt, see simdjson.h
+  // will return simdjson::UNITIALIZED if no parsing was attempted
+  int getErrorCode() const;
+
+  // return the string equivalent of "getErrorCode"
+  std::string getErrorMsg() const;
+
   // deallocate memory and set capacity to zero, called automatically by the
   // destructor
   void deallocate();
@@ -36231,72 +36334,124 @@ public:
 
     iterator(iterator &&o);
 
-    bool isOk() const;
+    inline bool isOk() const;
 
     // useful for debuging purposes
-    size_t get_tape_location() const;
+    inline size_t get_tape_location() const;
 
     // useful for debuging purposes
-    size_t get_tape_length() const;
+    inline size_t get_tape_length() const;
 
     // returns the current depth (start at 1 with 0 reserved for the fictitious root node)
-    size_t get_depth() const;
+    inline size_t get_depth() const;
 
     // A scope is a series of nodes at the same depth, typically it is either an object ({) or an array ([).
     // The root node has type 'r'.
-    uint8_t get_scope_type() const;
+    inline uint8_t get_scope_type() const;
 
     // move forward in document order
-    bool move_forward();
+    inline bool move_forward();
 
     // retrieve the character code of what we're looking at:
     // [{"sltfn are the possibilities
-    uint8_t get_type()  const;
+    inline uint8_t get_type()  const {
+       return current_type; // short functions should be inlined!
+    }
 
     // get the int64_t value at this node; valid only if we're at "l"
-    int64_t get_integer()  const;
+    inline int64_t get_integer()  const {
+       if(location + 1 >= tape_length) {
+         return 0;// default value in case of error
+       }
+       return static_cast<int64_t>(pj.tape[location + 1]);
+    }
 
     // get the string value at this node (NULL ended); valid only if we're at "
     // note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
     // return value is valid UTF-8
     // It may contain NULL chars within the string: get_string_length determines the true 
     // string length.
-    const char * get_string() const;
+    inline const char * get_string() const {
+      return  reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK) + sizeof(uint32_t)) ;
+    }
 
-    uint32_t get_string_length() const;
+    // return the length of the string in bytes
+    inline uint32_t get_string_length() const {
+      uint32_t answer;
+      memcpy(&answer, reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)), sizeof(uint32_t));
+      return answer;
+    }
 
     // get the double value at this node; valid only if
     // we're at "d"
-    double get_double()  const;
+    inline double get_double()  const {
+      if(location + 1 >= tape_length) {
+        return NAN;// default value in case of error
+      }
+      double answer;
+      memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
+      return answer;
+    }
 
-    bool is_object_or_array() const;
 
-    bool is_object() const;
+    inline bool is_object_or_array() const {
+      return is_object() || is_array();
+    }
 
-    bool is_array() const;
+    inline bool is_object() const {
+      return get_type() == '{';
+    }
 
-    bool is_string() const;
+    inline bool is_array() const {
+      return get_type() == '[';
+    }
 
-    bool is_integer() const;
+    inline bool is_string() const {
+      return get_type() == '"';
+    }
 
-    bool is_double() const;
+    inline bool is_integer() const {
+      return get_type() == 'l';
+    }
 
-    bool is_true() const;
+    inline bool is_double() const {
+      return get_type() == 'd';
+    }
 
-    bool is_false() const;
+    inline bool is_true() const {
+      return get_type() == 't';
+    }
 
-    bool is_null() const;
+    inline bool is_false() const {
+      return get_type() == 'f';
+    }
 
-    static bool is_object_or_array(uint8_t type);
+    inline bool is_null() const {
+      return get_type() == 'n';
+    }
+
+    static bool is_object_or_array(uint8_t type) {
+      return ((type == '[') || (type == '{'));
+    }
 
     // when at {, go one level deep, looking for a given key
     // if successful, we are left pointing at the value,
     // if not, we are still pointing at the object ({)
-    // (in case of repeated keys, this only finds the first one)
+    // (in case of repeated keys, this only finds the first one).
     // We seek the key using C's strcmp so if your JSON strings contain
     // NULL chars, this would trigger a false positive: if you expect that
     // to be the case, take extra precautions.
-    bool move_to_key(const char * key);
+    inline bool move_to_key(const char * key);
+    // when at {, go one level deep, looking for a given key
+    // if successful, we are left pointing at the value,
+    // if not, we are still pointing at the object ({)
+    // (in case of repeated keys, this only finds the first one).
+    // The string we search for can contain NULL values.
+    inline bool move_to_key(const char * key, uint32_t length);
+    
+    // when at a key location within an object, this moves to the accompanying value (located next to it).
+    // this is equivalent but much faster than calling "next()".
+    inline void move_to_value();
 
     // throughout return true if we can do the navigation, false
     // otherwise
@@ -36306,30 +36461,30 @@ public:
     // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { and [.
     // At the object ({) or at the array ([), you can issue a "down" to visit their content.
     // valid if we're not at the end of a scope (returns true).
-    bool next();
+    inline bool next();
 
     // Withing a given scope (series of nodes at the same depth within either an
     // array or an object), we move backward.
     // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true when starting at the end
     // of the scope.
     // At the object ({) or at the array ([), you can issue a "down" to visit their content.
-    bool prev();
+    inline bool prev();
 
     // Moves back to either the containing array or object (type { or [) from
     // within a contained scope.
     // Valid unless we are at the first level of the document
-    bool up();
+    inline bool up();
 
 
     // Valid if we're at a [ or { and it starts a non-empty scope; moves us to start of
     // that deeper scope if it not empty.
     // Thus, given [true, null, {"a":1}, [1,2]], if we are at the { node, we would move to the
     // "a" node.
-    bool down();
+    inline bool down();
 
     // move us to the start of our current scope,
     // a scope is a series of nodes at the same level
-    void to_start_scope();
+    inline void to_start_scope();
 
     // void to_end_scope();              // move us to
     // the start of our current scope; always succeeds
@@ -36372,6 +36527,7 @@ private:
   uint8_t *string_buf; // should be at least bytecapacity
   uint8_t *current_string_buf_loc;
   bool isvalid{false};
+  int errorcode{simdjson::UNITIALIZED};
 
 private :
 
@@ -36397,22 +36553,1031 @@ inline void dumpbits32_always(uint32_t v, const std::string &msg) {
   std::cout << " " << msg.c_str() << "\n";
 }
 
+WARN_UNUSED
+bool ParsedJson::iterator::isOk() const {
+      return location < tape_length;
+}
 
+// useful for debuging purposes
+size_t ParsedJson::iterator::get_tape_location() const {
+    return location;
+}
+
+// useful for debuging purposes
+size_t ParsedJson::iterator::get_tape_length() const {
+    return tape_length;
+}
+
+// returns the current depth (start at 1 with 0 reserved for the fictitious root node)
+size_t ParsedJson::iterator::get_depth() const {
+    return depth;
+}
+
+// A scope is a series of nodes at the same depth, typically it is either an object ({) or an array ([).
+// The root node has type 'r'.
+uint8_t ParsedJson::iterator::get_scope_type() const {
+    return depthindex[depth].scope_type;
+}
+
+bool ParsedJson::iterator::move_forward() {
+    if(location + 1 >= tape_length) {
+        return false; // we are at the end!
+    }
+
+    if ((current_type == '[') || (current_type == '{')){
+        // We are entering a new scope
+        depth++;
+        depthindex[depth].start_of_scope = location;
+        depthindex[depth].scope_type = current_type;
+    } else if ((current_type == ']') || (current_type == '}')) {
+        // Leaving a scope.
+        depth--;
+    } else if ((current_type == 'd') || (current_type == 'l')) {
+        // d and l types use 2 locations on the tape, not just one.
+        location += 1;
+    }
+
+    location += 1;
+    current_val = pj.tape[location];
+    current_type = (current_val >> 56);
+    return true;
+}
+
+void ParsedJson::iterator::move_to_value() {
+    // assume that we are on a key, so move by 1.
+    location += 1;
+    current_val = pj.tape[location];
+    current_type = (current_val >> 56);
+}
+
+
+bool ParsedJson::iterator::move_to_key(const char * key) {
+    if(down()) {
+      do {
+        assert(is_string());
+        bool rightkey = (strcmp(get_string(),key)==0);// null chars would fool this
+        move_to_value();
+        if(rightkey) { 
+          return true;
+        }
+      } while(next());
+      assert(up());// not found
+    }
+    return false;
+}
+
+bool ParsedJson::iterator::move_to_key(const char * key, uint32_t length) {
+    if(down()) {
+      do {
+        assert(is_string());
+        bool rightkey = ((get_string_length() == length) && (memcmp(get_string(),key,length)==0));
+        move_to_value();
+        if(rightkey) { 
+          return true;
+        }
+      } while(next());
+      assert(up());// not found
+    }
+    return false;
+}
+
+
+ bool ParsedJson::iterator::prev() {
+    if(location - 1 < depthindex[depth].start_of_scope) {
+      return false;
+    }
+    location -= 1;
+    current_val = pj.tape[location];
+    current_type = (current_val >> 56);
+    if ((current_type == ']') || (current_type == '}')){
+      // we need to jump
+      size_t new_location = ( current_val & JSONVALUEMASK);
+      if(new_location < depthindex[depth].start_of_scope) {
+        return false; // shoud never happen
+      }
+      location = new_location;
+      current_val = pj.tape[location];
+      current_type = (current_val >> 56);
+    }
+    return true;
+}
+
+
+ bool ParsedJson::iterator::up() {
+    if(depth == 1) {
+      return false; // don't allow moving back to root
+    }
+    to_start_scope();
+    // next we just move to the previous value
+    depth--;
+    location -= 1;
+    current_val = pj.tape[location];
+    current_type = (current_val >> 56);
+    return true;
+}
+
+
+ bool ParsedJson::iterator::down() {
+    if(location + 1 >= tape_length) {
+      return false;
+    }
+    if ((current_type == '[') || (current_type == '{')) {
+      size_t npos = (current_val & JSONVALUEMASK);
+      if(npos == location + 2) {
+        return false; // we have an empty scope
+      }
+      depth++;
+      location = location + 1;
+      depthindex[depth].start_of_scope = location;
+      depthindex[depth].scope_type = current_type;
+      current_val = pj.tape[location];
+      current_type = (current_val >> 56);
+      return true;
+    }
+    return false;
+}
+
+void ParsedJson::iterator::to_start_scope()  {
+    location = depthindex[depth].start_of_scope;
+    current_val = pj.tape[location];
+    current_type = (current_val >> 56);
+}
+
+bool ParsedJson::iterator::next() {
+    size_t npos; 
+    if ((current_type == '[') || (current_type == '{')){
+      // we need to jump
+      npos = ( current_val & JSONVALUEMASK);
+    } else {
+      npos = location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
+    }
+    uint64_t nextval = pj.tape[npos];
+    uint8_t nexttype = (nextval >> 56);
+    if((nexttype == ']') || (nexttype == '}')) {
+        return false; // we reached the end of the scope
+    }
+    location = npos;
+    current_val = nextval;
+    current_type = nexttype;
+    return true;
+}
+}
 #endif
 /* end file include/simdjson/parsedjson.h */
 /* begin file include/simdjson/stage1_find_marks.h */
 #ifndef SIMDJSON_STAGE1_FIND_MARKS_H
 #define SIMDJSON_STAGE1_FIND_MARKS_H
 
+#include <cassert>
 
-struct ParsedJson;
+#ifdef __AVX2__
 
+#ifndef SIMDJSON_SKIPUTF8VALIDATION
+#define SIMDJSON_UTF8VALIDATE
+
+#endif
+#else
+// currently we don't UTF8 validate for ARM
+// also we assume that if you're not __AVX2__ 
+// you're ARM, which is a bit dumb. TODO: Fix...
+#ifdef __ARM_NEON
+#include <arm_neon.h>
+#else
+#warning It appears that neither ARM NEON nor AVX2 are detected.
+#endif // __ARM_NEON
+#endif // __AVX2__
+
+// It seems that many parsers do UTF-8 validation.
+// RapidJSON does not do it by default, but a flag
+// allows it.
+#ifdef SIMDJSON_UTF8VALIDATE
+#endif
+
+#define TRANSPOSE
+
+namespace simdjson {
+template<simdjson::instruction_set>
+struct simd_input;
+#ifdef __AVX2__
+template<>
+struct simd_input<simdjson::instruction_set::avx2>
+{
+  __m256i lo;
+  __m256i hi;
+};
+#endif
+
+#ifdef __ARM_NEON
+template<> struct simd_input<simdjson::instruction_set::neon>
+{
+#ifndef TRANSPOSE
+  uint8x16_t i0;
+  uint8x16_t i1;
+  uint8x16_t i2;
+  uint8x16_t i3;
+#else
+  uint8x16x4_t i;
+#endif
+};
+#endif
+
+#ifdef __ARM_NEON
+really_inline
+uint16_t neonmovemask(uint8x16_t input) {
+  const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                               0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+  uint8x16_t minput = vandq_u8(input, bitmask);
+  uint8x16_t tmp = vpaddq_u8(minput, minput);
+  tmp = vpaddq_u8(tmp, tmp);
+  tmp = vpaddq_u8(tmp, tmp);
+  return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
+}
+
+really_inline
+uint64_t neonmovemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) {
+#ifndef TRANSPOSE
+  const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                               0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+  uint8x16_t t0 = vandq_u8(p0, bitmask);
+  uint8x16_t t1 = vandq_u8(p1, bitmask);
+  uint8x16_t t2 = vandq_u8(p2, bitmask);
+  uint8x16_t t3 = vandq_u8(p3, bitmask);
+  uint8x16_t sum0 = vpaddq_u8(t0, t1);
+  uint8x16_t sum1 = vpaddq_u8(t2, t3);
+  sum0 = vpaddq_u8(sum0, sum1);
+  sum0 = vpaddq_u8(sum0, sum0);
+  return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
+#else
+  const uint8x16_t bitmask1 = { 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10,
+                                0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10};
+  const uint8x16_t bitmask2 = { 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20,
+                                0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20};
+  const uint8x16_t bitmask3 = { 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40,
+                                0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40};
+  const uint8x16_t bitmask4 = { 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80,
+                                0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80};
+#if 0
+  uint8x16_t t0 = vandq_u8(p0, bitmask1);
+  uint8x16_t t1 = vandq_u8(p1, bitmask2);
+  uint8x16_t t2 = vandq_u8(p2, bitmask3);
+  uint8x16_t t3 = vandq_u8(p3, bitmask4);
+  uint8x16_t tmp = vorrq_u8(vorrq_u8(t0, t1), vorrq_u8(t2, t3));
+#else
+  uint8x16_t t0 = vandq_u8(p0, bitmask1);
+  uint8x16_t t1 = vbslq_u8(bitmask2, p1, t0);
+  uint8x16_t t2 = vbslq_u8(bitmask3, p2, t1);
+  uint8x16_t tmp = vbslq_u8(bitmask4, p3, t2);
+#endif
+  uint8x16_t sum = vpaddq_u8(tmp, tmp);
+  return vgetq_lane_u64(vreinterpretq_u64_u8(sum), 0);
+#endif
+}
+#endif
+
+template<simdjson::instruction_set T>
+uint64_t compute_quote_mask(uint64_t quote_bits);
+
+// In practice, if you have NEON or __PCLMUL__, you would
+// always want to use them, but it might be useful, for research
+// purposes, to disable it willingly, that's what SIMDJSON_AVOID_CLMUL
+// does.
+// Also: we don't know of an instance where AVX2 is supported but 
+// where clmul is not supported, so check for both, to be sure.
+#ifdef SIMDJSON_AVOID_CLMUL
+template<simdjson::instruction_set T> really_inline
+uint64_t compute_quote_mask(uint64_t quote_bits)
+{
+  uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
+  quote_mask = quote_mask ^ (quote_mask << 2);
+  quote_mask = quote_mask ^ (quote_mask << 4);
+  quote_mask = quote_mask ^ (quote_mask << 8);
+  quote_mask = quote_mask ^ (quote_mask << 16);
+  quote_mask = quote_mask ^ (quote_mask << 32);
+  return quote_mask;
+}
+#else
+template<simdjson::instruction_set>
+uint64_t compute_quote_mask(uint64_t quote_bits);
+
+#ifdef __AVX2__ 
+template<> really_inline
+uint64_t compute_quote_mask<simdjson::instruction_set::avx2>(uint64_t quote_bits) {
+  uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
+      _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
+  return quote_mask;
+}
+#endif
+
+#ifdef __ARM_NEON
+template<> really_inline
+uint64_t compute_quote_mask<simdjson::instruction_set::neon>(uint64_t quote_bits) {
+#ifdef __PCLMUL__ // Might cause problems on runtime dispatch
+  uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
+                                          _mm_set_epi64x(0ULL, quote_bits),
+                                          _mm_set1_epi8(0xFF), 0));
+#else
+  uint64_t quote_mask = vmull_p64( -1ULL, quote_bits);
+#endif
+  return quote_mask;
+}
+#endif
+#endif
+
+#ifdef SIMDJSON_UTF8VALIDATE
+template<simdjson::instruction_set T>really_inline
+void check_utf8(simd_input<T> in,
+                __m256i &has_error,
+                struct avx_processed_utf_bytes &previous) {
+  __m256i highbit = _mm256_set1_epi8(0x80);
+  if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), highbit)) == 1) {
+    // it is ascii, we just check continuation
+    has_error = _mm256_or_si256(
+        _mm256_cmpgt_epi8(
+            previous.carried_continuations,
+            _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+                             9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1)),
+        has_error);
+  } else {
+    // it is not ascii so we have to do heavy work
+    previous = avxcheckUTF8Bytes(in.lo, &previous, &has_error);
+    previous = avxcheckUTF8Bytes(in.hi, &previous, &has_error);
+  }
+}
+#endif
+
+template<simdjson::instruction_set T>
+simd_input<T> fill_input(const uint8_t * ptr);
+
+#ifdef __AVX2__
+template<> really_inline
+simd_input<simdjson::instruction_set::avx2> fill_input<simdjson::instruction_set::avx2>(const uint8_t * ptr) {
+  struct simd_input<simdjson::instruction_set::avx2> in;
+  in.lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0));
+  in.hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
+  return in;
+}
+#endif
+
+#ifdef __ARM_NEON
+template<> really_inline
+simd_input<simdjson::instruction_set::neon> fill_input<simdjson::instruction_set::neon>(const uint8_t * ptr) {
+  struct simd_input<simdjson::instruction_set::neon> in;
+#ifndef TRANSPOSE
+  in.i0 = vld1q_u8(ptr + 0);
+  in.i1 = vld1q_u8(ptr + 16);
+  in.i2 = vld1q_u8(ptr + 32);
+  in.i3 = vld1q_u8(ptr + 48);
+#else
+  in.i = vld4q_u8(ptr);
+#endif
+  return in;
+}
+#endif
+
+// a straightforward comparison of a mask against input. 5 uops; would be
+// cheaper in AVX512.
+template<simdjson::instruction_set T>
+uint64_t cmp_mask_against_input(simd_input<T> in, uint8_t m);
+
+#ifdef __AVX2__
+template<> really_inline
+uint64_t cmp_mask_against_input<simdjson::instruction_set::avx2>(simd_input<simdjson::instruction_set::avx2> in, uint8_t m) {
+
+  const __m256i mask = _mm256_set1_epi8(m);
+  __m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask);
+  uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
+  __m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask);
+  uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
+  return res_0 | (res_1 << 32);
+}
+#endif
+
+#ifdef __ARM_NEON
+template<> really_inline
+uint64_t cmp_mask_against_input<simdjson::instruction_set::neon>(simd_input<simdjson::instruction_set::neon> in, uint8_t m) {
+  const uint8x16_t mask = vmovq_n_u8(m); 
+  uint8x16_t cmp_res_0 = vceqq_u8(in.i.val[0], mask); 
+  uint8x16_t cmp_res_1 = vceqq_u8(in.i.val[1], mask); 
+  uint8x16_t cmp_res_2 = vceqq_u8(in.i.val[2], mask); 
+  uint8x16_t cmp_res_3 = vceqq_u8(in.i.val[3], mask); 
+  return neonmovemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
+}
+#endif
+
+// find all values less than or equal than the content of maxval (using unsigned arithmetic) 
+template<simdjson::instruction_set T>
+uint64_t unsigned_lteq_against_input(simd_input<T> in, uint8_t m);
+
+#ifdef __AVX2__
+template<> really_inline
+uint64_t unsigned_lteq_against_input<simdjson::instruction_set::avx2>(simd_input<simdjson::instruction_set::avx2> in, uint8_t m) {
+  const __m256i maxval = _mm256_set1_epi8(m);
+  __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,in.lo),maxval);
+  uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
+  __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,in.hi),maxval);
+  uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
+  return res_0 | (res_1 << 32);
+}
+#endif
+
+#ifdef __ARM_NEON
+template<> really_inline
+uint64_t unsigned_lteq_against_input<simdjson::instruction_set::neon>(simd_input<simdjson::instruction_set::neon> in, uint8_t m) {
+  const uint8x16_t mask = vmovq_n_u8(m); 
+  uint8x16_t cmp_res_0 = vcleq_u8(in.i.val[0], mask); 
+  uint8x16_t cmp_res_1 = vcleq_u8(in.i.val[1], mask); 
+  uint8x16_t cmp_res_2 = vcleq_u8(in.i.val[2], mask); 
+  uint8x16_t cmp_res_3 = vcleq_u8(in.i.val[3], mask); 
+  return neonmovemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
+}
+#endif
+
+// return a bitvector indicating where we have characters that end an odd-length
+// sequence of backslashes (and thus change the behavior of the next character
+// to follow). A even-length sequence of backslashes, and, for that matter, the
+// largest even-length prefix of our odd-length sequence of backslashes, simply
+// modify the behavior of the backslashes themselves.
+// We also update the prev_iter_ends_odd_backslash reference parameter to
+// indicate whether we end an iteration on an odd-length sequence of
+// backslashes, which modifies our subsequent search for odd-length
+// sequences of backslashes in an obvious way.
+template<simdjson::instruction_set T> really_inline
+uint64_t find_odd_backslash_sequences(simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash) {
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  const uint64_t odd_bits = ~even_bits;
+  uint64_t bs_bits = cmp_mask_against_input(in, '\\');
+  uint64_t start_edges = bs_bits & ~(bs_bits << 1);
+  // flip lowest if we have an odd-length run at the end of the prior
+  // iteration
+  uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
+  uint64_t even_starts = start_edges & even_start_mask;
+  uint64_t odd_starts = start_edges & ~even_start_mask;
+  uint64_t even_carries = bs_bits + even_starts;
+
+  uint64_t odd_carries;
+  // must record the carry-out of our odd-carries out of bit 63; this
+  // indicates whether the sense of any edge going to the next iteration
+  // should be flipped
+  bool iter_ends_odd_backslash =
+      add_overflow(bs_bits, odd_starts, &odd_carries);
+
+  odd_carries |=
+      prev_iter_ends_odd_backslash;  // push in bit zero as a potential end
+                                     // if we had an odd-numbered run at the
+                                     // end of the previous iteration
+  prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
+  uint64_t even_carry_ends = even_carries & ~bs_bits;
+  uint64_t odd_carry_ends = odd_carries & ~bs_bits;
+  uint64_t even_start_odd_end = even_carry_ends & odd_bits;
+  uint64_t odd_start_even_end = odd_carry_ends & even_bits;
+  uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
+  return odd_ends;
+}
+
+// return both the quote mask (which is a half-open mask that covers the first
+// quote
+// in an unescaped quote pair and everything in the quote pair) and the quote
+// bits, which are the simple
+// unescaped quoted bits. We also update the prev_iter_inside_quote value to
+// tell the next iteration
+// whether we finished the final iteration inside a quote pair; if so, this
+// inverts our behavior of
+// whether we're inside quotes for the next iteration.
+// Note that we don't do any error checking to see if we have backslash
+// sequences outside quotes; these
+// backslash sequences (of any length) will be detected elsewhere.
+template<simdjson::instruction_set T> really_inline
+uint64_t find_quote_mask_and_bits(simd_input<T> in, uint64_t odd_ends,
+    uint64_t &prev_iter_inside_quote, uint64_t &quote_bits, uint64_t &error_mask) {
+  quote_bits = cmp_mask_against_input<T>(in, '"');
+  quote_bits = quote_bits & ~odd_ends;
+  uint64_t quote_mask = compute_quote_mask<T>(quote_bits);
+  quote_mask ^= prev_iter_inside_quote;
+  // All Unicode characters may be placed within the
+  // quotation marks, except for the characters that MUST be escaped:
+  // quotation mark, reverse solidus, and the control characters (U+0000
+  //through U+001F).
+  // https://tools.ietf.org/html/rfc8259
+  uint64_t unescaped = unsigned_lteq_against_input<T>(in, 0x1F);
+  error_mask |= quote_mask & unescaped;
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20,
+  // John Regher from Utah U. says this is fine code
+  prev_iter_inside_quote =
+      static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63);
+  return quote_mask;
+}
+
+// do a 'shufti' to detect structural JSON characters
+// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
+// these go into the first 3 buckets of the comparison (1/2/4)
+
+// we are also interested in the four whitespace characters
+// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
+// these go into the next 2 buckets of the comparison (8/16)
+template<simdjson::instruction_set T>
+void find_whitespace_and_structurals(simd_input<T> in,
+                                     uint64_t &whitespace,
+                                     uint64_t &structurals);
+
+#ifdef __AVX2__
+template<> really_inline
+void find_whitespace_and_structurals<simdjson::instruction_set::avx2>(simd_input<simdjson::instruction_set::avx2> in,
+                                                     uint64_t &whitespace,
+                                                     uint64_t &structurals) {
+#ifdef SIMDJSON_NAIVE_STRUCTURAL
+  // You should never need this naive approach, but it can be useful
+  // for research purposes
+  const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
+  __m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
+  __m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
+  const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
+  struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_close_brace));
+  struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_close_brace));
+  const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
+  struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_open_bracket));
+  struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_open_bracket));
+  const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
+  struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_close_bracket));
+  struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_close_bracket));
+  const __m256i mask_column = _mm256_set1_epi8(0x3a);
+  struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_column));
+  struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_column));
+  const __m256i mask_comma = _mm256_set1_epi8(0x2c);
+  struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_comma));
+  struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_comma));
+  uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
+  uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
+  structurals = (structural_res_0 | (structural_res_1 << 32));
+
+  const __m256i mask_space = _mm256_set1_epi8(0x20);
+  __m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
+  __m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
+  const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
+  space_lo = _mm256_or_si256(space_lo,_mm256_cmpeq_epi8(in.lo, mask_linefeed));
+  space_hi = _mm256_or_si256(space_hi,_mm256_cmpeq_epi8(in.hi, mask_linefeed));
+  const __m256i mask_tab = _mm256_set1_epi8(0x09);
+  space_lo = _mm256_or_si256(space_lo,_mm256_cmpeq_epi8(in.lo, mask_tab));
+  space_hi = _mm256_or_si256(space_hi,_mm256_cmpeq_epi8(in.hi, mask_tab));
+  const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
+  space_lo = _mm256_or_si256(space_lo,_mm256_cmpeq_epi8(in.lo, mask_carriage));
+  space_hi = _mm256_or_si256(space_hi,_mm256_cmpeq_epi8(in.hi, mask_carriage));
+
+  uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
+  uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
+  whitespace = (ws_res_0 | (ws_res_1 << 32));
+  // end of naive approach
+
+#else // SIMDJSON_NAIVE_STRUCTURAL
+  const __m256i low_nibble_mask = _mm256_setr_epi8(
+      16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 
+      16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
+  const __m256i high_nibble_mask = _mm256_setr_epi8(
+      8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 
+      8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
+
+  __m256i structural_shufti_mask = _mm256_set1_epi8(0x7);
+  __m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
+
+  __m256i v_lo = _mm256_and_si256(
+      _mm256_shuffle_epi8(low_nibble_mask, in.lo),
+      _mm256_shuffle_epi8(high_nibble_mask,
+                          _mm256_and_si256(_mm256_srli_epi32(in.lo, 4),
+                                           _mm256_set1_epi8(0x7f))));
+
+  __m256i v_hi = _mm256_and_si256(
+      _mm256_shuffle_epi8(low_nibble_mask, in.hi),
+      _mm256_shuffle_epi8(high_nibble_mask,
+                          _mm256_and_si256(_mm256_srli_epi32(in.hi, 4),
+                                           _mm256_set1_epi8(0x7f))));
+  __m256i tmp_lo = _mm256_cmpeq_epi8(
+      _mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
+  __m256i tmp_hi = _mm256_cmpeq_epi8(
+      _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
+
+  uint64_t structural_res_0 =
+      static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
+  uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
+  structurals = ~(structural_res_0 | (structural_res_1 << 32));
+
+  __m256i tmp_ws_lo = _mm256_cmpeq_epi8(
+      _mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
+  __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
+      _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
+
+  uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
+  uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
+  whitespace = ~(ws_res_0 | (ws_res_1 << 32));
+#endif // SIMDJSON_NAIVE_STRUCTURAL
+}
+#endif
+
+#ifdef __ARM_NEON
+template<> really_inline
+void find_whitespace_and_structurals<simdjson::instruction_set::neon>(
+                                                  simd_input<simdjson::instruction_set::neon> in,
+                                                  uint64_t &whitespace,
+                                                  uint64_t &structurals) {
+#ifndef FUNKY_BAD_TABLE
+  const uint8x16_t low_nibble_mask = (uint8x16_t){ 
+      16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
+  const uint8x16_t high_nibble_mask = (uint8x16_t){ 
+      8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
+  const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7); 
+  const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18); 
+  const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf); 
+
+  uint8x16_t nib_0_lo = vandq_u8(in.i.val[0], low_nib_and_mask);
+  uint8x16_t nib_0_hi = vshrq_n_u8(in.i.val[0], 4);
+  uint8x16_t shuf_0_lo = vqtbl1q_u8(low_nibble_mask, nib_0_lo);
+  uint8x16_t shuf_0_hi = vqtbl1q_u8(high_nibble_mask, nib_0_hi);
+  uint8x16_t v_0 = vandq_u8(shuf_0_lo, shuf_0_hi);
+
+  uint8x16_t nib_1_lo = vandq_u8(in.i.val[1], low_nib_and_mask);
+  uint8x16_t nib_1_hi = vshrq_n_u8(in.i.val[1], 4);
+  uint8x16_t shuf_1_lo = vqtbl1q_u8(low_nibble_mask, nib_1_lo);
+  uint8x16_t shuf_1_hi = vqtbl1q_u8(high_nibble_mask, nib_1_hi);
+  uint8x16_t v_1 = vandq_u8(shuf_1_lo, shuf_1_hi);
+
+  uint8x16_t nib_2_lo = vandq_u8(in.i.val[2], low_nib_and_mask);
+  uint8x16_t nib_2_hi = vshrq_n_u8(in.i.val[2], 4);
+  uint8x16_t shuf_2_lo = vqtbl1q_u8(low_nibble_mask, nib_2_lo);
+  uint8x16_t shuf_2_hi = vqtbl1q_u8(high_nibble_mask, nib_2_hi);
+  uint8x16_t v_2 = vandq_u8(shuf_2_lo, shuf_2_hi);
+
+  uint8x16_t nib_3_lo = vandq_u8(in.i.val[3], low_nib_and_mask);
+  uint8x16_t nib_3_hi = vshrq_n_u8(in.i.val[3], 4);
+  uint8x16_t shuf_3_lo = vqtbl1q_u8(low_nibble_mask, nib_3_lo);
+  uint8x16_t shuf_3_hi = vqtbl1q_u8(high_nibble_mask, nib_3_hi);
+  uint8x16_t v_3 = vandq_u8(shuf_3_lo, shuf_3_hi);
+
+  uint8x16_t tmp_0 = vtstq_u8(v_0, structural_shufti_mask);
+  uint8x16_t tmp_1 = vtstq_u8(v_1, structural_shufti_mask);
+  uint8x16_t tmp_2 = vtstq_u8(v_2, structural_shufti_mask);
+  uint8x16_t tmp_3 = vtstq_u8(v_3, structural_shufti_mask);
+  structurals = neonmovemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
+
+  uint8x16_t tmp_ws_0 = vtstq_u8(v_0, whitespace_shufti_mask);
+  uint8x16_t tmp_ws_1 = vtstq_u8(v_1, whitespace_shufti_mask);
+  uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask);
+  uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
+  whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
+#else
+  // I think this one is garbage. In order to save the expense
+  // of another shuffle, I use an equally expensive shift, and 
+  // this gets glued to the end of the dependency chain. Seems a bit
+  // slower for no good reason.
+  //
+  // need to use a weird arrangement. Bytes in this bitvector
+  // are in conventional order, but bits are reversed as we are
+  // using a signed left shift (that is a +ve value from 0..7) to
+  // shift upwards to 0x80 in the bit. So we need to reverse bits.
+  
+  // note no structural/whitespace has the high bit on
+  // so it's OK to put the high 5 bits into our TBL shuffle
+  //
+
+  // structurals are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
+  // or in 5 bit, 3 bit form thats
+  // (15,3) (15, 5) (7,2) (11,3) (11,5) (5,4) 
+  // bit-reversing (subtract low 3 bits from 7) yields:
+  // (15,4) (15, 2) (7,5) (11,4) (11,2) (5,3) 
+  
+  const uint8x16_t structural_bitvec = (uint8x16_t){ 
+      0, 0, 0, 0, 
+      0, 8, 0, 32, 
+      0, 0, 0, 20, 
+      0, 0, 0, 20};
+  // we are also interested in the four whitespace characters
+  // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
+  // (4,0) (1, 2) (1, 1) (1, 5)
+  // bit-reversing (subtract low 3 bits from 7) yields:
+  // (4,7) (1, 5) (1, 6) (1, 2)
+  
+  const uint8x16_t whitespace_bitvec = (uint8x16_t){ 
+      0, 100, 0, 0, 
+      128, 0, 0, 0, 
+      0, 0, 0, 0, 
+      0, 0, 0, 0};
+  const uint8x16_t low_3bits_and_mask = vmovq_n_u8(0x7); 
+  const uint8x16_t high_1bit_tst_mask = vmovq_n_u8(0x80); 
+
+  int8x16_t low_3bits_0 = vreinterpretq_s8_u8(vandq_u8(in.i.val[0], low_3bits_and_mask));
+  uint8x16_t high_5bits_0 = vshrq_n_u8(in.i.val[0], 3);
+  uint8x16_t shuffle_structural_0 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_0), low_3bits_0);
+  uint8x16_t shuffle_ws_0 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_0), low_3bits_0);
+  uint8x16_t tmp_0 = vtstq_u8(shuffle_structural_0, high_1bit_tst_mask);
+  uint8x16_t tmp_ws_0 = vtstq_u8(shuffle_ws_0, high_1bit_tst_mask);
+
+  int8x16_t low_3bits_1 = vreinterpretq_s8_u8(vandq_u8(in.i.val[1], low_3bits_and_mask));
+  uint8x16_t high_5bits_1 = vshrq_n_u8(in.i.val[1], 3);
+  uint8x16_t shuffle_structural_1 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_1), low_3bits_1);
+  uint8x16_t shuffle_ws_1 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_1), low_3bits_1);
+  uint8x16_t tmp_1 = vtstq_u8(shuffle_structural_1, high_1bit_tst_mask);
+  uint8x16_t tmp_ws_1 = vtstq_u8(shuffle_ws_1, high_1bit_tst_mask);
+
+  int8x16_t low_3bits_2 = vreinterpretq_s8_u8(vandq_u8(in.i.val[2], low_3bits_and_mask));
+  uint8x16_t high_5bits_2 = vshrq_n_u8(in.i.val[2], 3);
+  uint8x16_t shuffle_structural_2 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_2), low_3bits_2);
+  uint8x16_t shuffle_ws_2 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_2), low_3bits_2);
+  uint8x16_t tmp_2 = vtstq_u8(shuffle_structural_2, high_1bit_tst_mask);
+  uint8x16_t tmp_ws_2 = vtstq_u8(shuffle_ws_2, high_1bit_tst_mask);
+
+  int8x16_t low_3bits_3 = vreinterpretq_s8_u8(vandq_u8(in.i.val[3], low_3bits_and_mask));
+  uint8x16_t high_5bits_3 = vshrq_n_u8(in.i.val[3], 3);
+  uint8x16_t shuffle_structural_3 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_3), low_3bits_3);
+  uint8x16_t shuffle_ws_3 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_3), low_3bits_3);
+  uint8x16_t tmp_3 = vtstq_u8(shuffle_structural_3, high_1bit_tst_mask);
+  uint8x16_t tmp_ws_3 = vtstq_u8(shuffle_ws_3, high_1bit_tst_mask);
+
+  structurals = neonmovemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
+  whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
+#endif
+}
+#endif
+
+
+#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
+//
+// This is just a naive implementation. It should be normally
+// disable, but can be used for research purposes to compare
+// again our optimized version.
+really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
+                                uint32_t idx, uint64_t bits) {
+  uint32_t * out_ptr = base_ptr + base;
+  idx -= 64;
+  while(bits != 0) {
+      out_ptr[0] = idx + trailingzeroes(bits);
+      bits = bits & (bits - 1);
+      out_ptr++;
+  }
+  base = (out_ptr - base_ptr);
+}
+
+#else 
+// flatten out values in 'bits' assuming that they are are to have values of idx
+// plus their position in the bitvector, and store these indexes at
+// base_ptr[base] incrementing base as we go
+// will potentially store extra values beyond end of valid bits, so base_ptr
+// needs to be large enough to handle this
+really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
+                                uint32_t idx, uint64_t bits) {
+  // In some instances, the next branch is expensive because it is mispredicted. 
+  // Unfortunately, in other cases,
+  // it helps tremendously.
+  if(bits == 0) return; 
+  uint32_t cnt = hamming(bits);
+  uint32_t next_base = base + cnt;
+  idx -= 64;
+  base_ptr += base;
+  { 
+    base_ptr[0] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[1] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[2] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[3] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[4] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[5] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[6] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[7] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr += 8;
+  }
+  // We hope that the next branch is easily predicted.
+  if (cnt > 8) {
+    base_ptr[0] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[1] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[2] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[3] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[4] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[5] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[6] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr[7] = idx + trailingzeroes(bits);
+    bits = bits & (bits - 1);
+    base_ptr += 8;
+  }
+  if (cnt > 16) { // unluckly: we rarely get here
+    // since it means having one structural or pseudo-structral element 
+    // every 4 characters (possible with inputs like "","","",...).
+    do {
+      base_ptr[0] = idx + trailingzeroes(bits);
+      bits = bits & (bits - 1);
+      base_ptr++;
+    } while(bits != 0);
+  }
+  base = next_base;
+}
+#endif
+
+// return a updated structural bit vector with quoted contents cleared out and
+// pseudo-structural characters added to the mask
+// updates prev_iter_ends_pseudo_pred which tells us whether the previous
+// iteration ended on a whitespace or a structural character (which means that
+// the next iteration
+// will have a pseudo-structural character at its start)
+really_inline uint64_t finalize_structurals(
+    uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
+    uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
+  // mask off anything inside quotes
+  structurals &= ~quote_mask;
+  // add the real quote bits back into our bitmask as well, so we can
+  // quickly traverse the strings we've spent all this trouble gathering
+  structurals |= quote_bits;
+  // Now, establish "pseudo-structural characters". These are non-whitespace
+  // characters that are (a) outside quotes and (b) have a predecessor that's
+  // either whitespace or a structural character. This means that subsequent
+  // passes will get a chance to encounter the first character of every string
+  // of non-whitespace and, if we're parsing an atom like true/false/null or a
+  // number we can stop at the first whitespace or structural character
+  // following it.
+
+  // a qualified predecessor is something that can happen 1 position before an
+  // pseudo-structural character
+  uint64_t pseudo_pred = structurals | whitespace;
+
+  uint64_t shifted_pseudo_pred =
+      (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
+  prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
+  uint64_t pseudo_structurals =
+      shifted_pseudo_pred & (~whitespace) & (~quote_mask);
+  structurals |= pseudo_structurals;
+
+  // now, we've used our close quotes all we need to. So let's switch them off
+  // they will be off in the quote mask and on in quote bits.
+  structurals &= ~(quote_bits & ~quote_mask);
+  return structurals;
+}
+
+template<simdjson::instruction_set T = simdjson::instruction_set::native>
 WARN_UNUSED
-bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
+/*never_inline*/ int find_structural_bits(const uint8_t *buf, size_t len,
+                                           ParsedJson &pj) {
+  if (len > pj.bytecapacity) {
+    std::cerr << "Your ParsedJson object only supports documents up to "
+         << pj.bytecapacity << " bytes but you are trying to process " << len
+         << " bytes" << std::endl;
+    return simdjson::CAPACITY;
+  }
+  uint32_t *base_ptr = pj.structural_indexes;
+  uint32_t base = 0;
+#ifdef SIMDJSON_UTF8VALIDATE
+  __m256i has_error = _mm256_setzero_si256();
+  struct avx_processed_utf_bytes previous {};
+  previous.rawbytes = _mm256_setzero_si256();
+  previous.high_nibbles = _mm256_setzero_si256();
+  previous.carried_continuations = _mm256_setzero_si256();
+#endif
 
+  // we have padded the input out to 64 byte multiple with the remainder being
+  // zeros
+
+  // persistent state across loop
+  // does the last iteration end with an odd-length sequence of backslashes? 
+  // either 0 or 1, but a 64-bit value
+  uint64_t prev_iter_ends_odd_backslash = 0ULL;
+  // does the previous iteration end inside a double-quote pair?
+  uint64_t prev_iter_inside_quote = 0ULL;  // either all zeros or all ones
+  // does the previous iteration end on something that is a predecessor of a
+  // pseudo-structural character - i.e. whitespace or a structural character
+  // effectively the very first char is considered to follow "whitespace" for
+  // the
+  // purposes of pseudo-structural character detection so we initialize to 1
+  uint64_t prev_iter_ends_pseudo_pred = 1ULL;
+
+  // structurals are persistent state across loop as we flatten them on the
+  // subsequent iteration into our array pointed to be base_ptr.
+  // This is harmless on the first iteration as structurals==0
+  // and is done for performance reasons; we can hide some of the latency of the
+  // expensive carryless multiply in the previous step with this work
+  uint64_t structurals = 0;
+
+  size_t lenminus64 = len < 64 ? 0 : len - 64;
+  size_t idx = 0;
+  uint64_t error_mask = 0; // for unescaped characters within strings (ASCII code points < 0x20)
+
+  for (; idx < lenminus64; idx += 64) {
+#ifndef _MSC_VER
+    __builtin_prefetch(buf + idx + 128);
+#endif
+    simd_input<T> in = fill_input<T>(buf+idx);
+#ifdef SIMDJSON_UTF8VALIDATE
+    check_utf8(in, has_error, previous);
+#endif
+    // detect odd sequences of backslashes
+    uint64_t odd_ends = find_odd_backslash_sequences<T>(
+        in, prev_iter_ends_odd_backslash);
+
+    // detect insides of quote pairs ("quote_mask") and also our quote_bits
+    // themselves
+    uint64_t quote_bits;
+    uint64_t quote_mask = find_quote_mask_and_bits<T>(
+        in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
+
+    // take the previous iterations structural bits, not our current iteration,
+    // and flatten
+    flatten_bits(base_ptr, base, idx, structurals);
+
+    uint64_t whitespace;
+    find_whitespace_and_structurals<T>(in, whitespace, structurals);
+
+    // fixup structurals to reflect quotes and add pseudo-structural characters
+    structurals = finalize_structurals(structurals, whitespace, quote_mask,
+                                       quote_bits, prev_iter_ends_pseudo_pred);
+  }
+
+  ////////////////
+  /// we use a giant copy-paste which is ugly.
+  /// but otherwise the string needs to be properly padded or else we
+  /// risk invalidating the UTF-8 checks.
+  ////////////
+  if (idx < len) {
+    uint8_t tmpbuf[64];
+    memset(tmpbuf, 0x20, 64);
+    memcpy(tmpbuf, buf + idx, len - idx);
+    simd_input<T> in = fill_input<T>(tmpbuf);
+#ifdef SIMDJSON_UTF8VALIDATE
+    check_utf8(in, has_error, previous);
+#endif
+
+    // detect odd sequences of backslashes
+    uint64_t odd_ends = find_odd_backslash_sequences<T>(
+        in, prev_iter_ends_odd_backslash);
+
+    // detect insides of quote pairs ("quote_mask") and also our quote_bits
+    // themselves
+    uint64_t quote_bits;
+    uint64_t quote_mask = find_quote_mask_and_bits<T>(
+        in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
+
+    // take the previous iterations structural bits, not our current iteration,
+    // and flatten
+    flatten_bits(base_ptr, base, idx, structurals);
+
+    uint64_t whitespace;
+    find_whitespace_and_structurals<T>(in, whitespace, structurals);
+
+    // fixup structurals to reflect quotes and add pseudo-structural characters
+    structurals = finalize_structurals(structurals, whitespace, quote_mask,
+                                       quote_bits, prev_iter_ends_pseudo_pred);
+    idx += 64;
+  }
+
+  // is last string quote closed?
+  if (prev_iter_inside_quote) {
+      return simdjson::UNCLOSED_STRING;
+  }
+
+  // finally, flatten out the remaining structurals from the last iteration
+  flatten_bits(base_ptr, base, idx, structurals);
+
+  pj.n_structural_indexes = base;
+  // a valid JSON file cannot have zero structural indexes - we should have
+  // found something
+  if (pj.n_structural_indexes == 0u) {
+    fprintf(stderr, "Empty document?\n");
+    return simdjson::EMPTY;
+  }
+  if (base_ptr[pj.n_structural_indexes - 1] > len) {
+    fprintf(stderr, "Internal bug\n");
+    return simdjson::UNEXPECTED_ERROR;
+  }
+  if (len != base_ptr[pj.n_structural_indexes - 1]) {
+    // the string might not be NULL terminated, but we add a virtual NULL ending
+    // character.
+    base_ptr[pj.n_structural_indexes++] = len;
+  }
+  // make it safe to dereference one beyond this array
+  base_ptr[pj.n_structural_indexes] = 0;  
+  if (error_mask) {
+    fprintf(stderr, "Unescaped characters\n");
+    return simdjson::UNESCAPED_CHARS;
+  }
+#ifdef SIMDJSON_UTF8VALIDATE
+    return _mm256_testz_si256(has_error, has_error) == 0 ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
+#else
+  return simdjson::SUCCESS;
+#endif
+}
+
+template<simdjson::instruction_set T = simdjson::instruction_set::native>
 WARN_UNUSED
-bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj);
-
+int find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
+  return find_structural_bits<T>(reinterpret_cast<const uint8_t *>(buf), len, pj);
+}
+}
 #endif
 /* end file include/simdjson/stage1_find_marks.h */
 /* begin file include/simdjson/stringparsing.h */
@@ -36420,7 +37585,7 @@ bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj);
 #define SIMDJSON_STRINGPARSING_H
 
 
-
+namespace simdjson {
 // begin copypasta
 // These chars yield themselves: " \ /
 // b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
@@ -36491,7 +37656,7 @@ really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, uint8_t **d
 #include <arm_neon.h>
 #endif
 
-WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
+WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
 really_inline  bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
                                 ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset) {
 #ifdef SIMDJSON_SKIPSTRINGPARSING // for performance analysis, it is sometimes useful to skip parsing
@@ -36565,7 +37730,7 @@ really_inline  bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
       ////////////////////////
 
       
-      // we advance the point, accounting for the fact that we have a NULl termination
+      // we advance the point, accounting for the fact that we have a NULL termination
       pj.current_string_buf_loc = dst + quote_dist + 1;
 
 #ifdef JSON_TEST_STRINGS // for unit testing
@@ -36616,7 +37781,7 @@ really_inline  bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
   return true;
 #endif // SIMDJSON_SKIPSTRINGPARSING
 }
-
+}
 
 #endif
 /* end file include/simdjson/stringparsing.h */
@@ -36625,6 +37790,11 @@ really_inline  bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
 #define SIMDJSON_NUMBERPARSING_H
 
 
+namespace simdjson {
+// Allowable floating-point values range from  std::numeric_limits<double>::lowest() 
+// to std::numeric_limits<double>::max(), so from 
+// -1.7976e308 all the way to 1.7975e308 in binary64. The lowest non-zero
+// normal values is std::numeric_limits<double>::min() or about 2.225074e-308.
 static const double power_of_ten[] = {
     1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
     1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
@@ -36705,7 +37875,7 @@ static inline bool is_integer(char c) {
 // probably frequent and it is hard than it looks. We are building all of this
 // just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)...
 const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
@@ -36718,7 +37888,7 @@ const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
 
 really_inline bool
-is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(unsigned char c) {
+is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
   return structural_or_whitespace_or_exponent_or_decimal_negated[c];
 }
 
@@ -36728,7 +37898,6 @@ is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(unsigned char c)
 
 #ifdef SWAR_NUMBER_PARSING
 
-#ifdef _MSC_VER
 // check quickly whether the next 8 chars are made of digits
 // at a glance, it looks better than Mula's
 // http://0x80.pl/articles/swar-digits-validate.html
@@ -36746,19 +37915,6 @@ static inline bool is_made_of_eight_digits_fast(const char *chars) {
            (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
           0x3333333333333333);
 }
-#else
-// this is more efficient apparently than the scalar code above (fewer instructions)
-static inline bool is_made_of_eight_digits_fast(const char *chars) {
-  __m64 val;
-  // this can read up to 7 bytes beyond the buffer size, but we require 
-  // SIMDJSON_PADDING of padding
-  static_assert(7 <= SIMDJSON_PADDING);
-  memcpy(&val, chars, 8);
-  __m64 base = _mm_sub_pi8(val,_mm_set1_pi8('0'));
-  __m64 basecmp = _mm_subs_pu8(base,_mm_set1_pi8(9));
-  return _mm_cvtm64_si64(basecmp) == 0;
-}
-#endif
 
 // clang-format off
 /***
@@ -36796,6 +37952,15 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
 
 #endif
 
+//
+// This function computes base * 10 ^ (- negative_exponent ).
+// It is only even going to be used when negative_exponent is tiny.
+static double subnormal_power10(double base, int negative_exponent) {
+  // this is probably not going to be fast
+  return base * 1e-308 * pow(10, negative_exponent + 308); 
+}
+
+
 // called by parse_number when we know that the output is a float,
 // but where there might be some integer overflow. The trick here is to
 // parse using floats from the start.
@@ -36891,15 +38056,27 @@ parse_float(const uint8_t *const buf,
 #endif
       return false;
     }
-    if (expnumber > 308) {
+    if (unlikely(expnumber > 308)) {
+      // this path is unlikely
+      if(negexp) { 
+        // We either have zero or a subnormal. 
+        // We expect this to be uncommon so we go through a slow path.
+        i = subnormal_power10(i, - expnumber);
+      } else {
+// We know for sure that we have a number that is too large,
 // we refuse to parse this
 #ifdef JSON_TEST_NUMBERS // for unit testing
-      foundInvalidNumber(buf + offset);
+        foundInvalidNumber(buf + offset);
 #endif
-      return false;
-    }
-    int exponent = (negexp ? -expnumber : expnumber);
-    i *= power_of_ten[308 + exponent];
+        return false;
+      }
+    } else {
+      int exponent = (negexp ? -expnumber : expnumber);
+      // we have that expnumber is [0,308] so that 
+      // exponent is [-308,308] so that 
+      // 308 + exponent is in [0, 2 * 308]
+      i *= power_of_ten[308 + exponent];
+   }
   }
   if(is_not_structural_or_whitespace(*p)) {
     return false;
@@ -36988,6 +38165,12 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
 
 // parse the number at buf + offset
 // define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON document
+// is made of a single number), then it is necessary to copy the content and append
+// a space before calling this function.
+//
 static really_inline bool parse_number(const uint8_t *const buf,
                                        ParsedJson &pj,
                                        const uint32_t offset,
@@ -37013,7 +38196,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
   uint64_t i; // an unsigned int avoids signed overflows (which are bad)
   if (*p == '0') { // 0 cannot be followed by an integer
     ++p;
-    if (is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(*p)) {
+    if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) {
 #ifdef JSON_TEST_NUMBERS // for unit testing
       foundInvalidNumber(buf + offset);
 #endif
@@ -37038,10 +38221,10 @@ static really_inline bool parse_number(const uint8_t *const buf,
       ++p;
     }
   }
-
   int64_t exponent = 0;
-
+  bool is_float = false;
   if ('.' == *p) {
+    is_float = true;
     ++p;
     const char *const firstafterperiod = p;
     if(is_integer(*p)) {
@@ -37070,9 +38253,9 @@ static really_inline bool parse_number(const uint8_t *const buf,
     exponent = firstafterperiod - p;
   }
   int digitcount = p - startdigits - 1;
-
   int64_t expnumber = 0; // exponential part
   if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
     ++p;
     bool negexp = false;
     if ('-' == *p) {
@@ -37107,16 +38290,9 @@ static really_inline bool parse_number(const uint8_t *const buf,
 #endif
       return false;
     }
-    if(expnumber > 308) {
-// we refuse to parse this
-#ifdef JSON_TEST_NUMBERS // for unit testing
-        foundInvalidNumber(buf + offset);
-#endif
-        return false;       
-    }
     exponent += (negexp ? -expnumber : expnumber);
   }
-  if ((exponent != 0) || (expnumber != 0)) {
+  if (is_float) {
     if (unlikely(digitcount >= 19)) { // this is uncommon!!!
       // this is almost never going to get called!!!
       // we start anew, going slowly!!!
@@ -37134,7 +38310,15 @@ static really_inline bool parse_number(const uint8_t *const buf,
     } else {
       double d = i;
       d = negative ? -d : d;
-      d *= power_of_ten[308 + exponent];
+      uint64_t powerindex = 308 + exponent;
+      if(likely(powerindex <= 2 * 308)) {
+        // common case
+        d *= power_of_ten[powerindex];
+      } else {
+        // this is uncommon so let us move this special case out
+        // of the main loop
+        return parse_float(buf, pj, offset,found_minus);
+      }
       pj.write_tape_double(d);
 #ifdef JSON_TEST_NUMBERS // for unit testing
       foundFloat(d, buf + offset);
@@ -37154,7 +38338,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
   return  is_structural_or_whitespace(*p);
 #endif // SIMDJSON_SKIPNUMBERPARSING
 }
-
+}
 #endif
 /* end file include/simdjson/numberparsing.h */
 /* begin file include/simdjson/stage2_build_tape.h */
@@ -37162,6 +38346,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
 #define SIMDJSON_STAGE2_BUILD_TAPE_H
 
 
+namespace simdjson {
 struct ParsedJson;
 
 void init_state_machine();
@@ -37171,6 +38356,7 @@ int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
 
 WARN_UNUSED
 int unified_machine(const char *buf, size_t len, ParsedJson &pj);
+}
 
 #endif
 /* end file include/simdjson/stage2_build_tape.h */
@@ -37178,10 +38364,80 @@ int unified_machine(const char *buf, size_t len, ParsedJson &pj);
 #ifndef SIMDJSON_JSONPARSER_H
 #define SIMDJSON_JSONPARSER_H
 #include <string>
+#ifdef _MSC_VER
+#include <windows.h>
+#include <sysinfoapi.h>
+#else
+#include <unistd.h>
+#endif
+
+namespace simdjson {
+// The function that users are expected to call is json_parse.
+// We have more than one such function because we want to support several 
+// instruction sets.
+
+// function pointer type for json_parse
+using json_parse_functype = int (const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded);
+
+// Pointer that holds the json_parse implementation corresponding to the available SIMD instruction set
+extern json_parse_functype *json_parse_ptr;
+
+
+// json_parse_implementation is the generic function, it is specialized for various 
+// SIMD instruction sets, e.g., as json_parse_implementation<instruction_set::avx2>
+// or json_parse_implementation<instruction_set::neon> 
+template<instruction_set T>
+int json_parse_implementation(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
+  if (pj.bytecapacity < len) {
+    return simdjson::CAPACITY;
+  }
+  bool reallocated = false;
+  if(reallocifneeded) {
+#ifdef ALLOW_SAME_PAGE_BUFFER_OVERRUN
+    // realloc is needed if the end of the memory crosses a page
+#ifdef _MSC_VER
+    SYSTEM_INFO sysInfo; 
+    GetSystemInfo(&sysInfo); 
+    long pagesize = sysInfo.dwPageSize;
+#else
+    long pagesize = sysconf (_SC_PAGESIZE); 
+#endif
+    //////////////
+    // We want to check that buf + len - 1 and buf + len - 1 + SIMDJSON_PADDING
+    // are in the same page.
+    // That is, we want to check that  
+    // (buf + len - 1) / pagesize == (buf + len - 1 + SIMDJSON_PADDING) / pagesize
+    // That's true if (buf + len - 1) % pagesize + SIMDJSON_PADDING < pagesize.
+    ///////////
+    if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) + SIMDJSON_PADDING < static_cast<uintptr_t>(pagesize) ) {
+#else // SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN
+    if(true) { // if not SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN, we always reallocate
+#endif
+      const uint8_t *tmpbuf  = buf;
+      buf = (uint8_t *) allocate_padded_buffer(len);
+      if(buf == NULL) return simdjson::MEMALLOC;
+      memcpy((void*)buf,tmpbuf,len);
+      reallocated = true;
+    } // if (true) OR if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) + SIMDJSON_PADDING < static_cast<uintptr_t>(pagesize) ) {
+  } // if(reallocifneeded) {
+  int stage1_is_ok = find_structural_bits<T>(buf, len, pj);
+  if(stage1_is_ok != simdjson::SUCCESS) {
+    pj.errorcode = stage1_is_ok;
+    return pj.errorcode;
+  } 
+  int res = unified_machine(buf, len, pj);
+  if(reallocated) { aligned_free((void*)buf);}
+  return res;
+}
 
 // Parse a document found in buf. 
 // You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
-// Return 0 on success, an error code from simdjson/simdjson.h otherwise
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from 
+// simdjson/simdjson.h in case of failure such as  simdjson::CAPACITY, simdjson::MEMALLOC, 
+// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes 
+// into a string). 
+//
 // You can also check validity by calling pj.isValid(). The same ParsedJson can be reused for other documents.
 //
 // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@@ -37189,12 +38445,20 @@ int unified_machine(const char *buf, size_t len, ParsedJson &pj);
 // The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
 // all bytes at and after buf + len  are ignored (can be garbage).
 // The ParsedJson object can be reused.
-WARN_UNUSED
-int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
+
+inline int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
+  return json_parse_ptr(buf, len, pj, reallocifneeded);
+}
 
 // Parse a document found in buf.
 // You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
-// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from 
+// simdjson/simdjson.h in case of failure such as  simdjson::CAPACITY, simdjson::MEMALLOC, 
+// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes 
+// into a string). 
+//
+// You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
 //
 // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@@ -37202,45 +38466,37 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
 // The input buf should be readable up to buf + len + SIMDJSON_PADDING  if reallocifneeded is false,
 // all bytes at and after buf + len  are ignored (can be garbage).
 // The ParsedJson object can be reused.
-WARN_UNUSED
 inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
-  return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
+  return json_parse_ptr(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
 }
 
-// Parse a document found in buf.
-// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
-// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
-// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
-//
-// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
-// (a copy of the input string is made).
-// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING  if reallocifneeded is false,
-// all bytes at and after s.data()+s.size() are ignored (can be garbage).
-// The ParsedJson object can be reused.
-//WARN_UNUSED
-//inline int json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
-//  return json_parse(s.data(), s.size(), pj, reallocifneeded);
-//}
-
-
+// We do not want to allow implicit conversion from C string to std::string.
+int json_parse(const char * buf, ParsedJson &pj) = delete;
 
 // Parse a document found in in string s.
 // You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
-// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
-// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from 
+// simdjson/simdjson.h in case of failure such as  simdjson::CAPACITY, simdjson::MEMALLOC, 
+// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes 
+// into a string). 
 //
 // A temporary buffer is created when needed during processing
 // (a copy of the input string is made).
-WARN_UNUSED
 inline int json_parse(const std::string &s, ParsedJson &pj) {
   return json_parse(s.data(), s.length(), pj, true);
 }
 
 // Parse a document found in in string s.
 // You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
-// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success or an error code from 
+// simdjson/simdjson.h in case of failure such as  simdjson::CAPACITY, simdjson::MEMALLOC, 
+// simdjson::DEPTH_ERROR and so forth; the simdjson::errorMsg function converts these error codes 
+// into a string). 
+//
+// You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
-WARN_UNUSED
 inline int json_parse(const padded_string &s, ParsedJson &pj) {
   return json_parse(s.data(), s.length(), pj, false);
 }
@@ -37253,6 +38509,8 @@ inline int json_parse(const padded_string &s, ParsedJson &pj) {
 //
 // the input buf should be readable up to buf + len + SIMDJSON_PADDING  if reallocifneeded is false,
 // all bytes at and after buf + len  are ignored (can be garbage).
+//
+// This is a convenience function which calls json_parse.
 WARN_UNUSED
 ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneeded = true);
 
@@ -37263,29 +38521,26 @@ WARN_UNUSED
 // (a copy of the input string is made).
 // The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
 // all bytes at and after buf + len  are ignored (can be garbage).
+//
+// This is a convenience function which calls json_parse.
 inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
   return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
 }
 
-// convenience function
-WARN_UNUSED
-// Build a ParsedJson object. You can check validity
-// by calling pj.isValid(). This does the memory allocation needed for ParsedJson.
-// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
-// (a copy of the input string is made).
-// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING  if reallocifneeded is false,
-// all bytes at and after s.data()+s.size() are ignored (can be garbage).
-//inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
- // return build_parsed_json(s.data(), s.size(), reallocifneeded);
-//}
+
+// We do not want to allow implicit conversion from C string to std::string.
+ParsedJson build_parsed_json(const char *buf) = delete;
+
 
 // Parse a document found in in string s.
 // You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
-// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
+// Return SUCCESS (an integer = 0) in case of a success. You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
 //
 // A temporary buffer is created when needed during processing
 // (a copy of the input string is made).
+//
+// This is a convenience function which calls json_parse.
 WARN_UNUSED
 inline ParsedJson build_parsed_json(const std::string &s) {
   return build_parsed_json(s.data(), s.length(), true);
@@ -37294,14 +38549,14 @@ inline ParsedJson build_parsed_json(const std::string &s) {
 
 // Parse a document found in in string s.
 // You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
-// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
+// Return SUCCESS (an integer = 0) in case of a success. You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
+//
+// This is a convenience function which calls json_parse.
 WARN_UNUSED
 inline ParsedJson build_parsed_json(const padded_string &s) {
   return build_parsed_json(s.data(), s.length(), false);
 }
-
-
-
+}
 #endif
 /* end file include/simdjson/jsonparser.h */