Remove unnamed namespace from ondemand

2020-09-07 00:24:42 -07:00 · 2020-09-07 00:24:42 -07:00 · b234d74f43
parent cd49ff330d
commit b234d74f43
90 changed files with 1601 additions and 187 deletions
--- a/benchmark/kostya/iter.h
+++ b/benchmark/kostya/iter.h
@ -6,7 +6,6 @@
 #include "kostya.h"

 namespace kostya {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -54,10 +53,8 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(Kostya, Iter);

-} // unnamed namespace

 namespace sum {
-namespace {

 class Iter {
 public:
@ -95,7 +92,6 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(KostyaSum, Iter);

-} // unnamed namespace
 } // namespace sum
 } // namespace kostya

--- a/benchmark/kostya/ondemand.h
+++ b/benchmark/kostya/ondemand.h
@ -6,7 +6,6 @@
 #include "kostya.h"

 namespace kostya {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -40,10 +39,8 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(Kostya, OnDemand);

-} // unnamed namespace

 namespace sum {
-namespace {

 class OnDemand {
 public:
@ -75,7 +72,6 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(KostyaSum, OnDemand);

-} // unnamed namespace
 } // namespace sum
 } // namespace kostya

--- a/benchmark/largerandom/iter.h
+++ b/benchmark/largerandom/iter.h
@ -6,7 +6,6 @@
 #include "largerandom.h"

 namespace largerandom {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -51,10 +50,8 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(LargeRandom, Iter);

-} // unnamed namespace

 namespace sum {
-namespace {

 class Iter {
 public:
@ -91,7 +88,6 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(LargeRandomSum, Iter);

-} // unnamed namespace
 } // namespace sum
 } // namespace largerandom

--- a/benchmark/largerandom/ondemand.h
+++ b/benchmark/largerandom/ondemand.h
@ -6,7 +6,6 @@
 #include "largerandom.h"

 namespace largerandom {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -37,10 +36,8 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(LargeRandom, OnDemand);

-} // unnamed namespace

 namespace sum {
-namespace {

 class OnDemand {
 public:
@ -72,7 +69,6 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(LargeRandomSum, OnDemand);

-} // unnamed namespace
 } // namespace sum
 } // namespace largerandom

--- a/benchmark/largerandom/sax.h
+++ b/benchmark/largerandom/sax.h
@ -6,7 +6,6 @@
 #include "largerandom.h"

 namespace largerandom {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -121,7 +120,6 @@ error_code Sax::Allocate(size_t new_capacity) {

 BENCHMARK_TEMPLATE(LargeRandom, Sax);

-}
 } // namespace largerandom

 #endif // SIMDJSON_EXCEPTIONS
--- a/benchmark/partial_tweets/iter.h
+++ b/benchmark/partial_tweets/iter.h
@ -6,7 +6,6 @@
 #include "partial_tweets.h"

 namespace partial_tweets {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -91,7 +90,6 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(PartialTweets, Iter);

-}
 } // namespace partial_tweets

 #endif // SIMDJSON_EXCEPTIONS
--- a/benchmark/partial_tweets/ondemand.h
+++ b/benchmark/partial_tweets/ondemand.h
@ -6,7 +6,6 @@
 #include "partial_tweets.h"

 namespace partial_tweets {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -56,7 +55,6 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {

 BENCHMARK_TEMPLATE(PartialTweets, OnDemand);

-}
 } // namespace partial_tweets

 #endif // SIMDJSON_EXCEPTIONS
--- a/benchmark/partial_tweets/sax.h
+++ b/benchmark/partial_tweets/sax.h
@ -6,7 +6,6 @@
 #include "sax_tweet_reader_visitor.h"

 namespace partial_tweets {
-namespace {

 using namespace simdjson;
 using namespace SIMDJSON_IMPLEMENTATION;
@ -67,7 +66,6 @@ error_code Sax::Allocate(size_t new_capacity) {

 BENCHMARK_TEMPLATE(PartialTweets, Sax);

-}
 } // namespace partial_tweets

 #endif // SIMDJSON_IMPLEMENTATION
--- a/src/arm64/bitmanipulation.h
+++ b/src/arm64/bitmanipulation.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_ARM64_BITMANIPULATION_H
 #define SIMDJSON_ARM64_BITMANIPULATION_H

+namespace SIMDJSON_IMPLEMENTATION {
 namespace {
-namespace arm64 {

 // We sometimes call trailing_zero on inputs that are zero,
 // but the algorithms do not end up using the returned value.
@ -55,7 +55,7 @@ simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint6
 #endif
 }

-} // namespace arm64
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_ARM64_BITMANIPULATION_H
--- a/src/arm64/bitmask.h
+++ b/src/arm64/bitmask.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_ARM64_BITMASK_H
 #define SIMDJSON_ARM64_BITMASK_H

+namespace SIMDJSON_IMPLEMENTATION {
 namespace {
-namespace arm64 {

 //
 // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
--- a/src/arm64/dom_parser_implementation.cpp
+++ b/src/arm64/dom_parser_implementation.cpp
@ -5,8 +5,8 @@
 //
 // Stage 1
 //
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 using namespace simd;

@ -98,8 +98,8 @@ simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t>
    return is_third_byte ^ is_fourth_byte;
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "generic/stage1/utf8_lookup4_algorithm.h"
 #include "generic/stage1/json_structural_indexer.h"
@ -116,8 +116,8 @@ simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t>
 //
 // Implementation-specific overrides
 //
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
@ -128,6 +128,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backs
 }

 } // namespace stage1
+} // unnamed namespace

 SIMDJSON_WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
  return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
@ -158,6 +159,5 @@ SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "arm64/end_implementation.h"
--- a/src/arm64/implementation.cpp
+++ b/src/arm64/implementation.cpp
@ -1,7 +1,6 @@
 #include "arm64/begin_implementation.h"
 #include "arm64/dom_parser_implementation.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
@ -17,6 +16,5 @@ SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "arm64/end_implementation.h"
--- a/src/arm64/implementation.h
+++ b/src/arm64/implementation.h
@ -4,7 +4,6 @@
 #include "simdjson.h"
 #include "isadetection.h"

-namespace {
 namespace arm64 {

 using namespace simdjson;
@ -23,6 +22,5 @@ public:
 };

 } // namespace arm64
-} // unnamed namespace

 #endif // SIMDJSON_ARM64_IMPLEMENTATION_H
--- a/src/arm64/numberparsing.h
+++ b/src/arm64/numberparsing.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_ARM64_NUMBERPARSING_H
 #define SIMDJSON_ARM64_NUMBERPARSING_H

+namespace SIMDJSON_IMPLEMENTATION {
 namespace {
-namespace arm64 {

 // we don't have SSE, so let us use a scalar function
 // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
@ -14,8 +14,8 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t
  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
 }

-} // namespace arm64
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #define SWAR_NUMBER_PARSING

--- a/src/arm64/simd.h
+++ b/src/arm64/simd.h
@ -7,8 +7,8 @@
 #include <type_traits>


+namespace SIMDJSON_IMPLEMENTATION {
 namespace {
-namespace arm64 {
 namespace simd {

 #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
@ -484,7 +484,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1,  int8_t x2,  int8_t x
  }; // struct simd8x64<T>

 } // namespace simd
-} // namespace arm64
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_ARM64_SIMD_H
--- a/src/arm64/stringparsing.h
+++ b/src/arm64/stringparsing.h
@ -5,8 +5,8 @@
 #include "arm64/simd.h"
 #include "arm64/bitmanipulation.h"

+namespace SIMDJSON_IMPLEMENTATION {
 namespace {
-namespace arm64 {

 using namespace simd;

@ -43,8 +43,8 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co
  };
 }

-} // namespace arm64
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "generic/stage2/stringparsing.h"

--- a/src/fallback/bitmanipulation.h
+++ b/src/fallback/bitmanipulation.h
@ -4,8 +4,8 @@
 #include "simdjson.h"
 #include <limits>

+namespace SIMDJSON_IMPLEMENTATION {
 namespace {
-namespace fallback {

 #if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
 static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
@ -39,7 +39,7 @@ simdjson_really_inline int leading_zeroes(uint64_t input_num) {
 #endif// _MSC_VER
 }

-} // namespace fallback
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_FALLBACK_BITMANIPULATION_H
--- a/src/fallback/dom_parser_implementation.cpp
+++ b/src/fallback/dom_parser_implementation.cpp
@ -7,8 +7,8 @@
 //
 #include "generic/stage1/find_next_document_index.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 class structural_scanner {
@ -179,6 +179,7 @@ private:
 }; // structural_scanner

 } // namespace stage1
+} // unnamed namespace

 SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool partial) noexcept {
  this->buf = _buf;
@ -308,7 +309,6 @@ SIMDJSON_WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 //
 // Stage 2
@ -317,7 +317,6 @@ SIMDJSON_WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t
 #include "fallback/numberparsing.h"
 #include "generic/stage2/tape_builder.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
@ -335,6 +334,5 @@ SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "fallback/end_implementation.h"
--- a/src/fallback/implementation.cpp
+++ b/src/fallback/implementation.cpp
@ -1,7 +1,6 @@
 #include "fallback/begin_implementation.h"
 #include "fallback/dom_parser_implementation.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
@ -17,6 +16,5 @@ SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "fallback/end_implementation.h"
--- a/src/fallback/implementation.h
+++ b/src/fallback/implementation.h
@ -4,7 +4,6 @@
 #include "simdjson.h"
 #include "isadetection.h"

-namespace {
 namespace fallback {

 using namespace simdjson;
@ -27,6 +26,5 @@ public:
 };

 } // namespace fallback
-} // unnamed namespace

 #endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H
--- a/src/fallback/numberparsing.h
+++ b/src/fallback/numberparsing.h
@ -8,8 +8,8 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf);
 void found_float(double result, const uint8_t *buf);
 #endif

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) {
  uint32_t result = 0;
  for (int i=0;i<8;i++) {
@ -21,8 +21,8 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t
  return parse_eight_digits_unrolled((const char *)chars);
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #define SWAR_NUMBER_PARSING
 #include "generic/stage2/numberparsing.h"
--- a/src/fallback/stringparsing.h
+++ b/src/fallback/stringparsing.h
@ -3,8 +3,8 @@

 #include "simdjson.h"

+namespace SIMDJSON_IMPLEMENTATION {
 namespace {
-namespace fallback {

 // Holds backslashes and quotes locations.
 struct backslash_and_quote {
@ -26,8 +26,8 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co
  return { src[0] };
 }

-} // namespace fallback
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "generic/stage2/stringparsing.h"

--- a/src/generic/dom_parser_implementation.h
+++ b/src/generic/dom_parser_implementation.h
@ -1,7 +1,6 @@
 #include "simdjson.h"
 #include "isadetection.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 // expectation: sizeof(open_container) = 64/8.
@ -41,12 +40,10 @@ public:
 };

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "generic/stage1/allocate.h"
 #include "generic/stage2/allocate.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
@ -69,4 +66,3 @@ SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace
--- a/src/generic/implementation_simdjson_result_base-inl.h
+++ b/src/generic/implementation_simdjson_result_base-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 //
@ -74,5 +73,4 @@ template<typename T>
 simdjson_really_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base() noexcept
    : implementation_simdjson_result_base(T{}, UNINITIALIZED) {}

-} // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/implementation_simdjson_result_base.h
+++ b/src/generic/implementation_simdjson_result_base.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 // This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair
@ -118,5 +117,4 @@ struct implementation_simdjson_result_base {
  error_code second;
 }; // struct implementation_simdjson_result_base

-} // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/ondemand/array-inl.h
+++ b/src/generic/ondemand/array-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -73,7 +72,6 @@ simdjson_really_inline array_iterator array::end() & noexcept {

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 namespace simdjson {

--- a/src/generic/ondemand/array.h
+++ b/src/generic/ondemand/array.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -61,7 +60,6 @@ protected:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/array_iterator-inl.h
+++ b/src/generic/ondemand/array_iterator-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -23,7 +22,6 @@ simdjson_really_inline array_iterator &array_iterator::operator++() noexcept {

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 namespace simdjson {

--- a/src/generic/ondemand/array_iterator.h
+++ b/src/generic/ondemand/array_iterator.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -49,7 +48,6 @@ private:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 namespace simdjson {

--- a/src/generic/ondemand/document-inl.h
+++ b/src/generic/ondemand/document-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -94,7 +93,6 @@ simdjson_really_inline simdjson_result<value> document::operator[](const char *k

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/document.h
+++ b/src/generic/ondemand/document.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -83,7 +82,6 @@ protected:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/field-inl.h
+++ b/src/generic/ondemand/field-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -31,7 +30,6 @@ simdjson_really_inline value &field::value() noexcept {

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/field.h
+++ b/src/generic/ondemand/field.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -27,7 +26,6 @@ protected:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/json_iterator-inl.h
+++ b/src/generic/ondemand/json_iterator-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -324,7 +323,6 @@ simdjson_really_inline bool json_iterator_ref::is_active() const noexcept {

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/json_iterator.h
+++ b/src/generic/ondemand/json_iterator.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -184,7 +183,6 @@ private:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/logger-inl.h
+++ b/src/generic/ondemand/logger-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {
 namespace logger {
@ -72,4 +71,3 @@ simdjson_really_inline void log_line(const json_iterator &iter, const char *titl
 } // namespace logger
 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace
--- a/src/generic/ondemand/logger.h
+++ b/src/generic/ondemand/logger.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -19,4 +18,3 @@ static simdjson_really_inline void log_error(const json_iterator &iter, const ch
 } // namespace logger
 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace
--- a/src/generic/ondemand/object-inl.h
+++ b/src/generic/ondemand/object-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -122,7 +121,6 @@ simdjson_really_inline object_iterator object::end() noexcept {

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/object.h
+++ b/src/generic/ondemand/object.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -64,7 +63,6 @@ protected:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/object_iterator-inl.h
+++ b/src/generic/ondemand/object_iterator-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -28,7 +27,6 @@ simdjson_really_inline object_iterator &object_iterator::operator++() noexcept {

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 namespace simdjson {

--- a/src/generic/ondemand/object_iterator.h
+++ b/src/generic/ondemand/object_iterator.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -43,7 +42,6 @@ private:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 namespace simdjson {

--- a/src/generic/ondemand/parser-inl.h
+++ b/src/generic/ondemand/parser-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -42,7 +41,6 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<json_iterator> parse

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/parser.h
+++ b/src/generic/ondemand/parser.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -36,7 +35,6 @@ private:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/raw_json_string-inl.h
+++ b/src/generic/ondemand/raw_json_string-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -35,7 +34,6 @@ SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/raw_json_string.h
+++ b/src/generic/ondemand/raw_json_string.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -33,7 +32,6 @@ SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/token_iterator-inl.h
+++ b/src/generic/ondemand/token_iterator-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -41,7 +40,6 @@ simdjson_really_inline bool token_iterator::operator<=(const token_iterator &oth

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/token_iterator.h
+++ b/src/generic/ondemand/token_iterator.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -74,7 +73,6 @@ protected:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/value-inl.h
+++ b/src/generic/ondemand/value-inl.h
@ -1,4 +1,3 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -167,7 +166,6 @@ simdjson_really_inline void value::log_error(const char *message) const noexcept

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/ondemand/value.h
+++ b/src/generic/ondemand/value.h
@ -1,6 +1,5 @@
 #include "simdjson/error.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
 namespace ondemand {

@ -89,7 +88,6 @@ protected:

 } // namespace ondemand
 } // namespace SIMDJSON_IMPLEMENTATION
-} // namespace {

 namespace simdjson {

--- a/src/generic/stage1/allocate.h
+++ b/src/generic/stage1/allocate.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {
 namespace allocate {

@ -17,5 +17,5 @@ simdjson_really_inline error_code set_capacity(internal::dom_parser_implementati

 } // namespace allocate
 } // namespace stage1
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/buf_block_reader.h
+++ b/src/generic/stage1/buf_block_reader.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 // Walks through a buffer in block-sized increments, loading the last part with spaces
 template<size_t STEP_SIZE>
@ -86,5 +86,5 @@ simdjson_really_inline void buf_block_reader<STEP_SIZE>::advance() {
  idx += STEP_SIZE;
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/find_next_document_index.h
+++ b/src/generic/stage1/find_next_document_index.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 /**
  * This algorithm is used to quickly identify the last structural position that
@ -67,5 +67,5 @@ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementati
  return 0;
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/json_minifier.h
+++ b/src/generic/stage1/json_minifier.h
@ -3,8 +3,8 @@
 // We assume the file in which it is included already includes
 // "simdjson/stage1.h" (this simplifies amalgation)

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 class json_minifier {
@ -88,5 +88,5 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
 }

 } // namespace stage1
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/json_scanner.h
+++ b/src/generic/stage1/json_scanner.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 /**
@ -142,5 +142,5 @@ simdjson_really_inline error_code json_scanner::finish(bool streaming) {
 }

 } // namespace stage1
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/json_string_scanner.h
+++ b/src/generic/stage1/json_string_scanner.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 struct json_string_block {
@ -139,5 +139,5 @@ simdjson_really_inline error_code json_string_scanner::finish(bool streaming) {
 }

 } // namespace stage1
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/json_structural_indexer.h
+++ b/src/generic/stage1/json_structural_indexer.h
@ -9,8 +9,8 @@
 #include "generic/stage1/json_minifier.h"
 #include "generic/stage1/find_next_document_index.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 class bit_indexer {
@ -226,5 +226,5 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp
 }

 } // namespace stage1
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/utf8_fastvalidate_algorithm.h
+++ b/src/generic/stage1/utf8_fastvalidate_algorithm.h
@ -0,0 +1,184 @@
+namespace simdjson {
+namespace SIMDJSON_IMPLEMENTATION {
+
+/*
+ * legal utf-8 byte sequence
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
+ *
+ *  Code Points        1st       2s       3s       4s
+ * U+0000..U+007F     00..7F
+ * U+0080..U+07FF     C2..DF   80..BF
+ * U+0800..U+0FFF     E0       A0..BF   80..BF
+ * U+1000..U+CFFF     E1..EC   80..BF   80..BF
+ * U+D000..U+D7FF     ED       80..9F   80..BF
+ * U+E000..U+FFFF     EE..EF   80..BF   80..BF
+ * U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
+ * U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
+ * U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
+ *
+ */
+
+// all byte values must be no larger than 0xF4
+
+using namespace simd;
+
+struct processed_utf_bytes {
+  simd8<uint8_t> raw_bytes;
+  simd8<int8_t> high_nibbles;
+  simd8<int8_t> carried_continuations;
+};
+
+struct utf8_checker {
+  simd8<uint8_t> has_error;
+  processed_utf_bytes previous;
+
+  // all byte values must be no larger than 0xF4
+  simdjson_really_inline void check_smaller_than_0xF4(const simd8<uint8_t> current_bytes) {
+    // unsigned, saturates to 0 below max
+    this->has_error |= current_bytes.saturating_sub(0xF4u);
+  }
+
+  simdjson_really_inline simd8<int8_t> continuation_lengths(const simd8<int8_t> high_nibbles) {
+    return high_nibbles.lookup_16<int8_t>(
+      1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
+      0, 0, 0, 0,             // 10xx (continuation)
+      2, 2,                   // 110x
+      3,                      // 1110
+      4);                     // 1111, next should be 0 (not checked here)
+  }
+
+  simdjson_really_inline simd8<int8_t> carry_continuations(const simd8<int8_t>& initial_lengths) {
+    simd8<int8_t> prev_carried_continuations = initial_lengths.prev(this->previous.carried_continuations);
+    simd8<int8_t> right1 = simd8<int8_t>(simd8<uint8_t>(prev_carried_continuations).saturating_sub(1));
+    simd8<int8_t> sum = initial_lengths + right1;
+
+    simd8<int8_t> prev2_carried_continuations = sum.prev<2>(this->previous.carried_continuations);
+    simd8<int8_t> right2 = simd8<int8_t>(simd8<uint8_t>(prev2_carried_continuations).saturating_sub(2));
+    return sum + right2;
+  }
+
+  simdjson_really_inline void check_continuations(const simd8<int8_t>& initial_lengths, const simd8<int8_t>& carries) {
+    // overlap || underlap
+    // carry > length && length > 0 || !(carry > length) && !(length > 0)
+    // (carries > length) == (lengths > 0)
+    // (carries > current) == (current > 0)
+    this->has_error |= simd8<uint8_t>(
+      (carries > initial_lengths) == (initial_lengths > simd8<int8_t>::zero()));
+  }
+
+  simdjson_really_inline void check_carried_continuations() {
+    static const int8_t last_1[32] = {
+      9, 9, 9, 9, 9, 9, 9, 9,
+      9, 9, 9, 9, 9, 9, 9, 9,
+      9, 9, 9, 9, 9, 9, 9, 9,
+      9, 9, 9, 9, 9, 9, 9, 1
+    };
+    this->has_error |= simd8<uint8_t>(this->previous.carried_continuations > simd8<int8_t>(last_1 + 32 - sizeof(simd8<int8_t>)));
+  }
+
+  // when 0xED is found, next byte must be no larger than 0x9F
+  // when 0xF4 is found, next byte must be no larger than 0x8F
+  // next byte must be continuation, ie sign bit is set, so signed < is ok
+  simdjson_really_inline void check_first_continuation_max(const simd8<uint8_t> current_bytes,
+                                                  const simd8<uint8_t> off1_current_bytes) {
+    simd8<bool> prev_ED = off1_current_bytes == 0xEDu;
+    simd8<bool> prev_F4 = off1_current_bytes == 0xF4u;
+    // Check if ED is followed by A0 or greater
+    simd8<bool> ED_too_large = (simd8<int8_t>(current_bytes) > simd8<int8_t>::splat(0x9Fu)) & prev_ED;
+    // Check if F4 is followed by 90 or greater
+    simd8<bool> F4_too_large = (simd8<int8_t>(current_bytes) > simd8<int8_t>::splat(0x8Fu)) & prev_F4;
+    // These will also error if ED or F4 is followed by ASCII, but that's an error anyway
+    this->has_error |= simd8<uint8_t>(ED_too_large | F4_too_large);
+  }
+
+  // map off1_hibits => error condition
+  // hibits     off1    cur
+  // C       => < C2 && true
+  // E       => < E1 && < A0
+  // F       => < F1 && < 90
+  // else      false && false
+  simdjson_really_inline void check_overlong(const simd8<uint8_t> current_bytes,
+                                    const simd8<uint8_t> off1_current_bytes,
+                                    const simd8<int8_t>& high_nibbles) {
+    simd8<int8_t> off1_high_nibbles = high_nibbles.prev(this->previous.high_nibbles);
+
+    // Two-byte characters must start with at least C2
+    // Three-byte characters must start with at least E1
+    // Four-byte characters must start with at least F1
+    simd8<int8_t> initial_mins = off1_high_nibbles.lookup_16<int8_t>(
+      -128, -128, -128, -128, -128, -128, -128, -128, // 0xxx -> false
+      -128, -128, -128, -128,                         // 10xx -> false
+      0xC2, -128,                                     // 1100 -> C2
+      0xE1,                                           // 1110
+      0xF1                                            // 1111
+    );
+    simd8<bool> initial_under = initial_mins > simd8<int8_t>(off1_current_bytes);
+
+    // Two-byte characters starting with at least C2 are always OK
+    // Three-byte characters starting with at least E1 must be followed by at least A0
+    // Four-byte characters starting with at least F1 must be followed by at least 90
+    simd8<int8_t> second_mins = off1_high_nibbles.lookup_16<int8_t>(
+      -128, -128, -128, -128, -128, -128, -128, -128, -128, // 0xxx => false
+      -128, -128, -128,                                     // 10xx => false
+      127, 127,                                             // 110x => true
+      0xA0,                                                 // 1110
+      0x90                                                  // 1111
+    );
+    simd8<bool> second_under = second_mins > simd8<int8_t>(current_bytes);
+    this->has_error |= simd8<uint8_t>(initial_under & second_under);
+  }
+
+  simdjson_really_inline void count_nibbles(simd8<uint8_t> bytes, struct processed_utf_bytes *answer) {
+    answer->raw_bytes = bytes;
+    answer->high_nibbles = simd8<int8_t>(bytes.shr<4>());
+  }
+
+  // check whether the current bytes are valid UTF-8
+  // at the end of the function, previous gets updated
+  simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> current_bytes) {
+    struct processed_utf_bytes pb {};
+    this->count_nibbles(current_bytes, &pb);
+
+    this->check_smaller_than_0xF4(current_bytes);
+
+    simd8<int8_t> initial_lengths = this->continuation_lengths(pb.high_nibbles);
+
+    pb.carried_continuations = this->carry_continuations(initial_lengths);
+
+    this->check_continuations(initial_lengths, pb.carried_continuations);
+
+    simd8<uint8_t> off1_current_bytes = pb.raw_bytes.prev(this->previous.raw_bytes);
+    this->check_first_continuation_max(current_bytes, off1_current_bytes);
+
+    this->check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles);
+    this->previous = pb;
+  }
+
+  simdjson_really_inline void check_next_input(Dconst simd8<uint8_t> in) {
+    if (simdjson_likely(!in.any_bits_set_anywhere(0x80u))) {
+      this->check_carried_continuations();
+    } else {
+      this->check_utf8_bytes(in);
+    }
+  }
+
+  simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& in) {
+    simd8<uint8_t> bits = in.reduce_or();
+    if (simdjson_likely(!bits.any_bits_set_anywhere(0x80u))) {
+      // it is ascii, we just check carried continuations.
+      this->check_carried_continuations();
+    } else {
+      // it is not ascii so we have to do heavy work
+      for (int i=0; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) {
+        this->check_utf8_bytes(in.chunks[i]);
+      }
+    }
+  }
+
+  simdjson_really_inline error_code errors() {
+    return this->has_error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
+  }
+}; // struct utf8_checker
+
+} // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/utf8_lookup2_algorithm.h
+++ b/src/generic/stage1/utf8_lookup2_algorithm.h
@ -0,0 +1,225 @@
+namespace SIMDJSON_IMPLEMENTATION {
+namespace {
+namespace utf8_validation {
+
+//
+// Detect Unicode errors.
+//
+// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic
+// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits
+// are straight up concatenated into the final value. The first byte of a multibyte character is a
+// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
+// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
+// start with 0, because that's what ASCII looks like. Here's what each size looks like:
+//
+// - ASCII (7 bits):              0_______
+// - 2 byte character (11 bits):  110_____ 10______
+// - 3 byte character (17 bits):  1110____ 10______ 10______
+// - 4 byte character (23 bits):  11110___ 10______ 10______ 10______
+// - 5+ byte character (illegal): 11111___ <illegal>
+//
+// There are 5 classes of error that can happen in Unicode:
+//
+// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation).
+//   We detect this by looking for new characters (lead bytes) inside the range of a multibyte
+//   character.
+//
+//   e.g. 11000000 01100001 (2-byte character where second byte is ASCII)
+//
+// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation).
+//   We detect this by requiring that the next byte after your multibyte character be a new
+//   character--so a continuation after your character is wrong.
+//
+//   e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte)
+//
+// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large.
+//
+//   e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF).
+//
+// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have
+//   used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is
+//   technically possible, but UTF-8 disallows it so that there is only one way to write an "a".
+//
+//   e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001)
+//
+// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and
+//   WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8.
+//
+//   e.g. 11101101 10100000 10000000 (U+D800)
+//
+// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not
+//   support values with more than 23 bits (which a 4-byte character supports).
+//
+//   e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000)
+//
+// Legal utf-8 byte sequences per  http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
+//
+//   Code Points        1st       2s       3s       4s
+//  U+0000..U+007F     00..7F
+//  U+0080..U+07FF     C2..DF   80..BF
+//  U+0800..U+0FFF     E0       A0..BF   80..BF
+//  U+1000..U+CFFF     E1..EC   80..BF   80..BF
+//  U+D000..U+D7FF     ED       80..9F   80..BF
+//  U+E000..U+FFFF     EE..EF   80..BF   80..BF
+//  U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
+//  U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
+//  U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
+//
+using namespace simd;
+
+  // For a detailed description of the lookup2 algorithm, see the file HACKING.md under "UTF-8 validation (lookup2)".
+
+  //
+  // Find special case UTF-8 errors where the character is technically readable (has the right length)
+  // but the *value* is disallowed.
+  //
+  // This includes overlong encodings, surrogates and values too large for Unicode.
+  //
+  // It turns out the bad character ranges can all be detected by looking at the first 12 bits of the
+  // UTF-8 encoded character (i.e. all of byte 1, and the high 4 bits of byte 2). This algorithm does a
+  // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together.
+  // If all 3 lookups detect the same error, it's an error.
+  //
+  simdjson_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+    //
+    // These are the errors we're going to match for bytes 1-2, by looking at the first three
+    // nibbles of the character: <high bits of byte 1>> & <low bits of byte 1> & <high bits of byte 2>
+    //
+    static const int OVERLONG_2  = 0x01; // 1100000_ 10______ (technically we match 10______ but we could match ________, they both yield errors either way)
+    static const int OVERLONG_3  = 0x02; // 11100000 100_____ ________
+    static const int OVERLONG_4  = 0x04; // 11110000 1000____ ________ ________
+    static const int SURROGATE   = 0x08; // 11101101 [101_]____
+    static const int TOO_LARGE   = 0x10; // 11110100 (1001|101_)____
+    static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______
+
+    // After processing the rest of byte 1 (the low bits), we're still not done--we have to check
+    // byte 2 to be sure which things are errors and which aren't.
+    // Since high_bits is byte 5, byte 2 is high_bits.prev<3>
+    static const int CARRY = OVERLONG_2 | TOO_LARGE_2;
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+        // ASCII: ________ [0___]____
+        CARRY, CARRY, CARRY, CARRY,
+        // ASCII: ________ [0___]____
+        CARRY, CARRY, CARRY, CARRY,
+        // Continuations: ________ [10__]____
+        CARRY | OVERLONG_3 | OVERLONG_4, // ________ [1000]____
+        CARRY | OVERLONG_3 | TOO_LARGE,  // ________ [1001]____
+        CARRY | TOO_LARGE  | SURROGATE,  // ________ [1010]____
+        CARRY | TOO_LARGE  | SURROGATE,  // ________ [1011]____
+        // Multibyte Leads: ________ [11__]____
+        CARRY, CARRY, CARRY, CARRY
+    );
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // [0___]____ (ASCII)
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      // [10__]____ (continuation)
+      0, 0, 0, 0,
+      // [11__]____ (2+-byte leads)
+      OVERLONG_2, 0,                       // [110_]____ (2-byte lead)
+      OVERLONG_3 | SURROGATE,              // [1110]____ (3-byte lead)
+      OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 // [1111]____ (4+-byte lead)
+    );
+
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____[00__] ________
+      OVERLONG_2 | OVERLONG_3 | OVERLONG_4, // ____[0000] ________
+      OVERLONG_2,                           // ____[0001] ________
+      0, 0,
+      // ____[01__] ________
+      TOO_LARGE,                            // ____[0100] ________
+      TOO_LARGE_2,
+      TOO_LARGE_2,
+      TOO_LARGE_2,
+      // ____[10__] ________
+      TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2,
+      // ____[11__] ________
+      TOO_LARGE_2,
+      TOO_LARGE_2 | SURROGATE,                            // ____[1101] ________
+      TOO_LARGE_2, TOO_LARGE_2
+    );
+
+    return byte_1_high & byte_1_low & byte_2_high;
+  }
+
+  simdjson_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input, const simd8<uint8_t> prev_input, const simd8<uint8_t> prev1) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+
+    // Cont is 10000000-101111111 (-65...-128)
+    simd8<bool> is_continuation = simd8<int8_t>(input) < int8_t(-64);
+    // must_be_continuation is architecture-specific because Intel doesn't have unsigned comparisons
+    return simd8<uint8_t>(must_be_continuation(prev1, prev2, prev3) ^ is_continuation);
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
+    };
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      this->error |= check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, prev1);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short.
+    simdjson_really_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+    simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if (likely(is_ascii(input))) {
+        // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+        // possibly finish them.
+        this->error |= this->prev_incomplete;
+      } else {
+        this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        for (int i=1; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) {
+          this->check_utf8_bytes(input.chunks[i], input.chunks[i-1]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+
+    simdjson_really_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+} // unnamed namespace
+
+using utf8_validation::utf8_checker;
+
+} // namespace SIMDJSON_IMPLEMENTATION
+} // namespace simdjson
--- a/src/generic/stage1/utf8_lookup3_algorithm.h
+++ b/src/generic/stage1/utf8_lookup3_algorithm.h
@ -0,0 +1,245 @@
+namespace SIMDJSON_IMPLEMENTATION {
+namespace {
+namespace utf8_validation {
+
+//
+// Detect Unicode errors.
+//
+// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic
+// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits
+// are straight up concatenated into the final value. The first byte of a multibyte character is a
+// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
+// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
+// start with 0, because that's what ASCII looks like. Here's what each size looks like:
+//
+// - ASCII (7 bits):              0_______
+// - 2 byte character (11 bits):  110_____ 10______
+// - 3 byte character (17 bits):  1110____ 10______ 10______
+// - 4 byte character (23 bits):  11110___ 10______ 10______ 10______
+// - 5+ byte character (illegal): 11111___ <illegal>
+//
+// There are 5 classes of error that can happen in Unicode:
+//
+// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation).
+//   We detect this by looking for new characters (lead bytes) inside the range of a multibyte
+//   character.
+//
+//   e.g. 11000000 01100001 (2-byte character where second byte is ASCII)
+//
+// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation).
+//   We detect this by requiring that the next byte after your multibyte character be a new
+//   character--so a continuation after your character is wrong.
+//
+//   e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte)
+//
+// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large.
+//
+//   e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF).
+//
+// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have
+//   used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is
+//   technically possible, but UTF-8 disallows it so that there is only one way to write an "a".
+//
+//   e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001)
+//
+// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and
+//   WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8.
+//
+//   e.g. 11101101 10100000 10000000 (U+D800)
+//
+// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not
+//   support values with more than 23 bits (which a 4-byte character supports).
+//
+//   e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000)
+//
+// Legal utf-8 byte sequences per  http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
+//
+//   Code Points        1st       2s       3s       4s
+//  U+0000..U+007F     00..7F
+//  U+0080..U+07FF     C2..DF   80..BF
+//  U+0800..U+0FFF     E0       A0..BF   80..BF
+//  U+1000..U+CFFF     E1..EC   80..BF   80..BF
+//  U+D000..U+D7FF     ED       80..9F   80..BF
+//  U+E000..U+FFFF     EE..EF   80..BF   80..BF
+//  U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
+//  U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
+//  U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
+//
+using namespace simd;
+
+  // For a detailed description of the lookup2 algorithm, see the file HACKING.md under "UTF-8 validation (lookup2)".
+
+  //
+  // Find special case UTF-8 errors where the character is technically readable (has the right length)
+  // but the *value* is disallowed.
+  //
+  // This includes overlong encodings, surrogates and values too large for Unicode.
+  //
+  // It turns out the bad character ranges can all be detected by looking at the first 12 bits of the
+  // UTF-8 encoded character (i.e. all of byte 1, and the high 4 bits of byte 2). This algorithm does a
+  // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together.
+  // If all 3 lookups detect the same error, it's an error.
+  //
+  simdjson_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+    //
+    // These are the errors we're going to match for bytes 1-2, by looking at the first three
+    // nibbles of the character: <high bits of byte 1>> & <low bits of byte 1> & <high bits of byte 2>
+    //
+    static const int OVERLONG_2  = 0x01; // 1100000_ 10______ (technically we match 10______ but we could match ________, they both yield errors either way)
+    static const int OVERLONG_3  = 0x02; // 11100000 100_____ ________
+    static const int OVERLONG_4  = 0x04; // 11110000 1000____ ________ ________
+    static const int SURROGATE   = 0x08; // 11101101 [101_]____
+    static const int TOO_LARGE   = 0x10; // 11110100 (1001|101_)____
+    static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______
+
+    // New with lookup3. We want to catch the case where an non-continuation 
+    // follows a leading byte
+    static const int TOO_SHORT_2_3_4 = 0x40; //  (110_|1110|1111) ____    (0___|110_|1111) ____
+    // We also want to catch a continuation that is preceded by an ASCII byte
+    static const int LONELY_CONTINUATION = 0x80; //  0___ ____    01__ ____
+
+    // After processing the rest of byte 1 (the low bits), we're still not done--we have to check
+    // byte 2 to be sure which things are errors and which aren't.
+    // Since high_bits is byte 5, byte 2 is high_bits.prev<3>
+    static const int CARRY = OVERLONG_2 | TOO_LARGE_2;
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+        // ASCII: ________ [0___]____
+        CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
+        CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
+        // ASCII: ________ [0___]____
+        CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
+        CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,
+        // Continuations: ________ [10__]____
+        CARRY | OVERLONG_3 | OVERLONG_4 | LONELY_CONTINUATION, // ________ [1000]____
+        CARRY | OVERLONG_3 | TOO_LARGE | LONELY_CONTINUATION,  // ________ [1001]____
+        CARRY | TOO_LARGE  | SURROGATE | LONELY_CONTINUATION,  // ________ [1010]____
+        CARRY | TOO_LARGE  | SURROGATE | LONELY_CONTINUATION,  // ________ [1011]____
+        // Multibyte Leads: ________ [11__]____
+        CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4,  // 110_
+        CARRY | TOO_SHORT_2_3_4, CARRY | TOO_SHORT_2_3_4
+    );
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // [0___]____ (ASCII)
+      LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION,
+      LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION, LONELY_CONTINUATION,
+      // [10__]____ (continuation)
+      0, 0, 0, 0,
+      // [11__]____ (2+-byte leads)
+      OVERLONG_2 | TOO_SHORT_2_3_4, TOO_SHORT_2_3_4,         // [110_]____ (2-byte lead)
+      OVERLONG_3 | SURROGATE | TOO_SHORT_2_3_4,              // [1110]____ (3-byte lead)
+      OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 | TOO_SHORT_2_3_4 // [1111]____ (4+-byte lead)
+    );
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____[00__] ________
+      OVERLONG_2 | OVERLONG_3 | OVERLONG_4 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION, // ____[0000] ________
+      OVERLONG_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,                           // ____[0001] ________
+      TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      // ____[01__] ________
+      TOO_LARGE | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,                            // ____[0100] ________
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      // ____[10__] ________
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      // ____[11__] ________
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,
+      TOO_LARGE_2 | SURROGATE | TOO_SHORT_2_3_4 | LONELY_CONTINUATION,              // ____[1101] ________
+      TOO_LARGE_2 | TOO_SHORT_2_3_4| LONELY_CONTINUATION,
+      TOO_LARGE_2 | TOO_SHORT_2_3_4 | LONELY_CONTINUATION
+    );
+    return byte_1_high & byte_1_low & byte_2_high;
+  }
+
+  simdjson_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input, const simd8<uint8_t> prev_input,
+      simd8<uint8_t> prev1) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    // is_2_3_continuation uses one more instruction than lookup2
+    simd8<bool> is_2_3_continuation = (simd8<int8_t>(input).max(simd8<int8_t>(prev1))) < int8_t(-64);
+    // must_be_2_3_continuation has two fewer instructions than lookup 2
+    return simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3) ^ is_2_3_continuation);
+  }
+
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
+    };
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      this->error |= check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, prev1);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short.
+    simdjson_really_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+    simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+        // possibly finish them.
+        this->error |= this->prev_incomplete;
+      } else {
+       // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+            "We support either two or four chunks per 64-byte block.");
+        if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        } 
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+
+    simdjson_really_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+} // unnamed namespace
+
+using utf8_validation::utf8_checker;
+
+} // namespace SIMDJSON_IMPLEMENTATION
+} // namespace simdjson
--- a/src/generic/stage1/utf8_lookup4_algorithm.h
+++ b/src/generic/stage1/utf8_lookup4_algorithm.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace utf8_validation {

 using namespace simd;
@ -179,5 +179,5 @@ using namespace simd;

 using utf8_validation::utf8_checker;

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/utf8_lookup_algorithm.h
+++ b/src/generic/stage1/utf8_lookup_algorithm.h
@ -0,0 +1,303 @@
+namespace simdjson {
+namespace SIMDJSON_IMPLEMENTATION {
+namespace utf8_validation {
+
+//
+// Detect Unicode errors.
+//
+// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic
+// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits
+// are straight up concatenated into the final value. The first byte of a multibyte character is a
+// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
+// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
+// start with 0, because that's what ASCII looks like. Here's what each size 
+//
+// - ASCII (7 bits):              0_______
+// - 2 byte character (11 bits):  110_____ 10______
+// - 3 byte character (17 bits):  1110____ 10______ 10______
+// - 4 byte character (23 bits):  11110___ 10______ 10______ 10______
+// - 5+ byte character (illegal): 11111___ <illegal>
+//
+// There are 5 classes of error that can happen in Unicode:
+//
+// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation).
+//   We detect this by looking for new characters (lead bytes) inside the range of a multibyte
+//   character.
+//
+//   e.g. 11000000 01100001 (2-byte character where second byte is ASCII)
+//
+// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation).
+//   We detect this by requiring that the next byte after your multibyte character be a new
+//   character--so a continuation after your character is wrong.
+//
+//   e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte)
+//
+// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large.
+//
+//   e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF).
+//
+// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have
+//   used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is
+//   technically possible, but UTF-8 disallows it so that there is only one way to write an "a".
+//
+//   e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001)
+//
+// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and
+//   WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8.
+//
+//   e.g. 11101101 10100000 10000000 (U+D800)
+//
+// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not
+//   support values with more than 23 bits (which a 4-byte character supports).
+//
+//   e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000)
+//   
+// Legal utf-8 byte sequences per  http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
+// 
+//   Code Points        1st       2s       3s       4s
+//  U+0000..U+007F     00..7F
+//  U+0080..U+07FF     C2..DF   80..BF
+//  U+0800..U+0FFF     E0       A0..BF   80..BF
+//  U+1000..U+CFFF     E1..EC   80..BF   80..BF
+//  U+D000..U+D7FF     ED       80..9F   80..BF
+//  U+E000..U+FFFF     EE..EF   80..BF   80..BF
+//  U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
+//  U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
+//  U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
+//
+using namespace simd;
+
+struct utf8_checker {
+  // If this is nonzero, there has been a UTF-8 error.
+  simd8<uint8_t> error;
+  // The last input we received.
+  simd8<uint8_t> prev_input_block;
+  // If there were leads at the end of the previous block, to be continued in the next.
+  simd8<uint8_t> prev_incomplete;
+
+  //
+  // These are the bits in lead_flags. Its main purpose is to tell you what kind of lead character
+  // it is (1,2,3 or 4--or none if it's continuation), but it also maps 4 other bytes that will be
+  // used to detect other kinds of errors.
+  //
+  // LEAD_4 is first because we use a << trick in get_byte_3_4_5_errors to turn LEAD_2 -> LEAD_3,
+  // LEAD_3 -> LEAD_4, and we want LEAD_4 to turn into nothing since there is no LEAD_5. This trick
+  // lets us use one constant table instead of 3, possibly saving registers on systems with fewer
+  // registers.
+  //
+  static const uint8_t LEAD_4      = 0x01; // [1111]____ 10______ 10______ 10______ (0_|11)__
+  static const uint8_t LEAD_3      = 0x02; // [1110]____ 10______ 10______ (0|11)__
+  static const uint8_t LEAD_2      = 0x04; // [110_]____ 10______ (0|11)__
+  static const uint8_t LEAD_1      = 0x08; // [0___]____ (0|11)__
+  static const uint8_t LEAD_2_PLUS = 0x10; // [11__]____ ...
+  static const uint8_t LEAD_1100   = 0x20; // [1100]____ ...
+  static const uint8_t LEAD_1110   = 0x40; // [1110]____ ...
+  static const uint8_t LEAD_1111   = 0x80; // [1111]____ ...
+
+  // Prepare fast_path_error in case the next block is ASCII
+  simdjson_really_inline void set_fast_path_error() {
+    // If any of the last 3 bytes in the input needs a continuation at the start of the next input,
+    // it is an error for the next input to be ASCII.
+    // static const uint8_t incomplete_long[32] = {
+    //   0, 0, 0, 0, 0, 0, 0, 0,
+    //   0, 0, 0, 0, 0, 0, 0, 0,
+    //   0, 0, 0, 0, 0, 0, 0, 0,
+    //   0, 0, 0, 0, 0, LEAD_4, LEAD_4 | LEAD_3, LEAD_4 | LEAD_3 | LEAD_2
+    // };
+    // const simd8<uint8_t> incomplete(&incomplete_long[sizeof(incomplete_long) - sizeof(simd8<uint8_t>)]);
+    // this->prev_incomplete = lead_flags & incomplete;
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+    static const uint8_t last_len[32] = {
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
+    };
+    const simd8<uint8_t> max_value(&last_len[sizeof(last_len)-sizeof(simd8<uint8_t>)]);
+    // If anything is > the desired value, there will be a nonzero value in the result.
+    this->prev_incomplete = this->prev_input_block.saturating_sub(max_value);
+  }
+
+  simdjson_really_inline simd8<uint8_t> get_lead_flags(const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) {
+    // Total: 2 instructions, 1 constant
+    // - 1 byte shift (shuffle)
+    // - 1 table lookup (shuffle)
+    // - 1 table constant
+
+    // high_bits is byte 5, so lead is high_bits.prev<4>()
+    return high_bits.prev<4>(prev_high_bits).lookup_16<uint8_t>(
+      LEAD_1, LEAD_1, LEAD_1, LEAD_1,   // [0___]____ (ASCII)
+      LEAD_1, LEAD_1, LEAD_1, LEAD_1,   // [0___]____ (ASCII)
+      0,      0,      0,      0,        // [10__]____ (continuation)
+      LEAD_2 | LEAD_2_PLUS | LEAD_1100, // [1100]____
+      LEAD_2 | LEAD_2_PLUS,             // [110_]____
+      LEAD_3 | LEAD_2_PLUS | LEAD_1110, // [1110]____
+      LEAD_4 | LEAD_2_PLUS | LEAD_1111  // [1111]____
+    );
+  }
+
+  // Find errors in bytes 1 and 2 together (one single multi-nibble &)
+  simdjson_really_inline simd8<uint8_t> get_byte_1_2_errors(const simd8<uint8_t> input, const simd8<uint8_t> prev_input, const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) {
+    //
+    // These are the errors we're going to match for bytes 1-2, by looking at the first three
+    // nibbles of the character: lead_flags & <low bits of byte 1> & <high bits of byte 2>
+    //
+    // The important thing here is that these constants all take up *different* bits, since they
+    // match different patterns. This is why there are 2 LEAD_4 and 2 LEAD_3s in lead_flags, among
+    // other things.
+    //
+    static const int TOO_SHORT_2 = LEAD_2_PLUS; // 11______ (0___|11__)____
+    static const int TOO_LONG_1  = LEAD_1;      // 0_______ 10______
+    static const int OVERLONG_2  = LEAD_1100;   // 1100000_ ________ (technically we match 10______ but we could match ________, they both yield errors either way)
+    static const int OVERLONG_3  = LEAD_3;      // 11100000 100_____ ________
+    static const int OVERLONG_4  = LEAD_4;      // 11110000 1000____ ________ ________
+    static const int TOO_LARGE   = LEAD_1111;   // 11110100 (1001|101_)____
+    static const int SURROGATE   = LEAD_1110;   // 11101101 [101_]____
+
+    // Total: 4 instructions, 2 constants
+    // - 2 table lookups (shuffles)
+    // - 1 byte shift (shuffle)
+    // - 1 "and"
+    // - 2 table constants
+
+    // After processing the rest of byte 1 (the low bits), we're still not done--we have to check
+    // byte 2 to be sure which things are errors and which aren't.
+    // Since input is byte 5, byte 1 is input.prev<4>
+    const simd8<uint8_t> byte_1_flags = (input.prev<4>(prev_input) & 0x0F).lookup_16<uint8_t>(
+      // ____[00__] ________
+      TOO_SHORT_2 | TOO_LONG_1 | OVERLONG_2 | OVERLONG_3 | OVERLONG_4, // ____[0000] ________
+      TOO_SHORT_2 | TOO_LONG_1 | OVERLONG_2,                           // ____[0001] ________
+      TOO_SHORT_2 | TOO_LONG_1, TOO_SHORT_2 | TOO_LONG_1,
+      // ____[01__] ________
+      TOO_SHORT_2 | TOO_LONG_1 | TOO_LARGE,                            // ____[0100] ________
+      TOO_SHORT_2 | TOO_LONG_1, TOO_SHORT_2 | TOO_LONG_1, TOO_SHORT_2 | TOO_LONG_1,
+      // ____[10__] ________
+      TOO_SHORT_2 | TOO_LONG_1, TOO_SHORT_2 | TOO_LONG_1, TOO_SHORT_2 | TOO_LONG_1, TOO_SHORT_2 | TOO_LONG_1,
+      // ____[11__] ________
+      TOO_SHORT_2 | TOO_LONG_1,
+      TOO_SHORT_2 | TOO_LONG_1 | SURROGATE,                            // ____[1101] ________
+      TOO_SHORT_2 | TOO_LONG_1, TOO_SHORT_2 | TOO_LONG_1
+    );
+    // Since high_bits is byte 5, byte 2 is high_bits.prev<3>
+    const simd8<uint8_t> byte_2_flags = high_bits.prev<3>(prev_high_bits).lookup_16<uint8_t>(
+        // ASCII: ________ [0___]____
+        OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2,
+        // ASCII: ________ [0___]____
+        OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2,
+        // Continuations: ________ [10__]____
+        OVERLONG_2 | TOO_LONG_1 | OVERLONG_3 | OVERLONG_4, // ________ [1000]____
+        OVERLONG_2 | TOO_LONG_1 | OVERLONG_3 | SURROGATE,  // ________ [1001]____
+        OVERLONG_2 | TOO_LONG_1 | TOO_LARGE  | SURROGATE,  // ________ [1010]____
+        OVERLONG_2 | TOO_LONG_1 | TOO_LARGE  | SURROGATE,  // ________ [1011]____
+        // Multibyte Leads: ________ [11__]____
+        OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2, OVERLONG_2 | TOO_SHORT_2
+    );
+    return byte_1_flags & byte_2_flags;
+  }
+
+  simdjson_really_inline simd8<uint8_t> get_byte_3_4_5_errors(const simd8<uint8_t> high_bits, const simd8<uint8_t> prev_high_bits) {
+    // Total 7 instructions, 3 simd constants:
+    // - 3 table lookups (shuffles)
+    // - 2 byte shifts (shuffles)
+    // - 2 "or"
+    // - 1 table constant
+
+    const simd8<uint8_t> byte_3_table = simd8<uint8_t>::repeat_16(
+        // TOO_SHORT ASCII:           111_____ ________ [0___]____
+        LEAD_3, LEAD_3, LEAD_3, LEAD_3,
+        LEAD_3, LEAD_3, LEAD_3, LEAD_3,
+        // TOO_LONG  Continuations:   110_____ ________ [10__]____
+        LEAD_2, LEAD_2, LEAD_2, LEAD_2,
+        // TOO_SHORT Multibyte Leads: 111_____ ________ [11__]____
+        LEAD_3, LEAD_3, LEAD_3, LEAD_3
+    );
+    const simd8<uint8_t> byte_4_table = byte_3_table.shr<1>(); // TOO_SHORT: LEAD_4, TOO_LONG: LEAD_3
+    const simd8<uint8_t> byte_5_table = byte_3_table.shr<2>(); // TOO_SHORT: <none>, TOO_LONG: LEAD_4
+
+    // high_bits is byte 5, high_bits.prev<2> is byte 3 and high_bits.prev<1> is byte 4
+    return high_bits.prev<2>(prev_high_bits).lookup_16(byte_3_table) |
+           high_bits.prev<1>(prev_high_bits).lookup_16(byte_4_table) |
+           high_bits.lookup_16(byte_5_table);
+  }
+
+  // Check whether the current bytes are valid UTF-8.
+  // At the end of the function, previous gets updated
+  // This should come down to 22 instructions if table definitions are in registers--30 if not.
+  simdjson_really_inline simd8<uint8_t> check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+    // When we process bytes M through N, we look for lead characters in M-4 through N-4. This allows
+    // us to look for all errors related to any lead character at one time (since UTF-8 characters
+    // can only be up to 4 bytes, and the next byte after a character finishes must be another lead,
+    // we never need to look more than 4 bytes past the current one to fully validate).
+    // This way, we have all relevant bytes around and can save ourselves a little overflow and
+    // several instructions on each loop.
+
+    // Total: 22 instructions, 7 simd constants
+    // Local: 8 instructions, 1 simd constant
+    // - 2 bit shifts
+    // - 1 byte shift (shuffle)
+    // - 3 "or"
+    // - 1 "and"
+    // - 1 saturating_sub
+    // - 1 constant (0b11111000-1)
+    // lead_flags: 2 instructions, 1 simd constant
+    // - 1 byte shift (shuffle)
+    // - 1 table lookup (shuffle)
+    // - 1 table constant
+    // byte_1_2_errors: 5 instructions, 2 simd constants
+    // - 2 table lookups (shuffles)
+    // - 2 byte shifts (shuffles)
+    // - 1 "and"
+    // - 2 table constants
+    // byte_3_4_5_errors: 7 instructions, 3 simd constants
+    // - 3 table lookups (shuffles)
+    // - 2 byte shifts (shuffles)
+    // - 2 "or"
+    // - 3 table constants
+
+    const simd8<uint8_t> high_bits = input.shr<4>();
+    const simd8<uint8_t> prev_high_bits = prev_input.shr<4>();
+    const simd8<uint8_t> lead_flags = get_lead_flags(high_bits, prev_high_bits);
+    const simd8<uint8_t> byte_1_2_errors = get_byte_1_2_errors(input, prev_input, high_bits, prev_high_bits);
+    const simd8<uint8_t> byte_3_4_5_errors = get_byte_3_4_5_errors(high_bits, prev_high_bits);
+    // Detect illegal 5-byte+ Unicode values. We can't do this as part of byte_1_2_errors  because
+    // it would need a third lead_flag = 1111, and we've already used up all 8 between
+    // byte_1_2_errors and byte_3_4_5_errors.
+    const simd8<uint8_t> too_large = input.saturating_sub(0b11111000-1); // too-large values will be nonzero
+    return too_large | (lead_flags & (byte_1_2_errors | byte_3_4_5_errors));
+  }
+
+  // TODO special case start of file, too, so that small documents are efficient! No shifting needed ...
+
+  // The only problem that can happen at EOF is that a multibyte character is too short.
+  simdjson_really_inline void check_eof() {
+    // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+    // possibly finish them.
+    this->error |= this->prev_incomplete;
+  }
+
+  simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
+    simd8<uint8_t> bits = input.reduce_or();
+    if (simdjson_likely(!bits.any_bits_set_anywhere(0b10000000u))) {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    } else {
+      this->error |= this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+      for (int i=1; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) {
+        this->error |= this->check_utf8_bytes(input.chunks[i], input.chunks[i-1]);
+      }
+      this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      this->set_fast_path_error();
+    }
+  }
+
+  simdjson_really_inline error_code errors() {
+    return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
+  }
+
+}; // struct utf8_checker
+
+} // namespace utf8_validation
+} // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/utf8_range_algorithm.h
+++ b/src/generic/stage1/utf8_range_algorithm.h
@ -0,0 +1,186 @@
+namespace simdjson {
+namespace SIMDJSON_IMPLEMENTATION {
+
+/*
+ * legal utf-8 byte sequence
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
+ *
+ *  Code Points        1st       2s       3s       4s
+ * U+0000..U+007F     00..7F
+ * U+0080..U+07FF     C2..DF   80..BF
+ * U+0800..U+0FFF     E0       A0..BF   80..BF
+ * U+1000..U+CFFF     E1..EC   80..BF   80..BF
+ * U+D000..U+D7FF     ED       80..9F   80..BF
+ * U+E000..U+FFFF     EE..EF   80..BF   80..BF
+ * U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
+ * U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
+ * U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
+ *
+ */
+
+// all byte values must be no larger than 0xF4
+
+using namespace simd;
+
+struct processed_utf_bytes {
+  simd8<uint8_t> raw_bytes;
+  simd8<uint8_t> first_len;
+};
+
+struct utf8_checker {
+  simd8<bool> has_error;
+  processed_utf_bytes previous;
+
+  simdjson_really_inline void check_carried_continuations() {
+    static const int8_t last_len[32] = {
+      9, 9, 9, 9, 9, 9, 9, 9,
+      9, 9, 9, 9, 9, 9, 9, 9,
+      9, 9, 9, 9, 9, 9, 9, 9,
+      9, 9, 9, 9, 9, 2, 1, 0
+    };
+    this->has_error |= simd8<int8_t>(this->previous.first_len) > simd8<int8_t>(last_len + 32 - sizeof(simd8<int8_t>));
+  }
+
+  // check whether the current bytes are valid UTF-8
+  // at the end of the function, previous gets updated
+  simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> current_bytes) {
+
+    /* high_nibbles = input >> 4 */
+    const simd8<uint8_t> high_nibbles = current_bytes.shr<4>();
+
+    /*
+    * Map high nibble of "First Byte" to legal character length minus 1
+    * 0x00 ~ 0xBF --> 0
+    * 0xC0 ~ 0xDF --> 1
+    * 0xE0 ~ 0xEF --> 2
+    * 0xF0 ~ 0xFF --> 3
+    */
+    /* first_len = legal character length minus 1 */
+    /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
+    /* first_len = first_len_tbl[high_nibbles] */
+    simd8<uint8_t> first_len = high_nibbles.lookup_16<uint8_t>(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3);
+
+    /* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */
+    /* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
+    /* range = first_range_tbl[high_nibbles] */
+    simd8<uint8_t> range     = high_nibbles.lookup_16<uint8_t>(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8);
+
+    /* Second Byte: set range index to first_len */
+    /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
+    /* range |= (first_len, previous->first_len) << 1 byte */
+    range |= first_len.prev(this->previous.first_len);
+
+    /* Third Byte: set range index to saturate_sub(first_len, 1) */
+    /* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
+    /* range |= (first_len - 1) << 2 bytes */
+    range |= first_len.saturating_sub(1).prev<2>(this->previous.first_len.saturating_sub(1));
+
+    /* Fourth Byte: set range index to saturate_sub(first_len, 2) */
+    /* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */
+    /* range |= (first_len - 2) << 3 bytes */
+    range |= first_len.saturating_sub(2).prev<3>(this->previous.first_len.saturating_sub(2));
+
+    /*
+      * Now we have below range indices caluclated
+      * Correct cases:
+      * - 8 for C0~FF
+      * - 3 for 1st byte after F0~FF
+      * - 2 for 1st byte after E0~EF or 2nd byte after F0~FF
+      * - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or
+      *         3rd byte after F0~FF
+      * - 0 for others
+      * Error cases:
+      *   9,10,11 if non ascii First Byte overlaps
+      *   E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error
+      */
+
+    /* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */
+    /* Overlaps lead to index 9~15, which are illegal in range table */
+    /* shift1 = (input, previous->input) << 1 byte */
+    simd8<uint8_t> shift1 = current_bytes.prev(this->previous.raw_bytes);
+    /*
+      * shift1:  | EF  F0 ... FE | FF  00  ... ...  DE | DF  E0 ... EE |
+      * pos:     | 0   1      15 | 16  17           239| 240 241    255|
+      * pos-240: | 0   0      0  | 0   0            0  | 0   1      15 |
+      * pos+112: | 112 113    127|       >= 128        |     >= 128    |
+      */
+    simd8<uint8_t> pos = shift1 - 0xEF;
+
+    /*
+    * Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after
+    * which the Second Byte are not 80~BF. It contains "range index adjustment".
+    * +------------+---------------+------------------+----------------+
+    * | First Byte | original range| range adjustment | adjusted range |
+    * +------------+---------------+------------------+----------------+
+    * | E0         | 2             | 2                | 4              |
+    * +------------+---------------+------------------+----------------+
+    * | ED         | 2             | 3                | 5              |
+    * +------------+---------------+------------------+----------------+
+    * | F0         | 3             | 3                | 6              |
+    * +------------+---------------+------------------+----------------+
+    * | F4         | 4             | 4                | 8              |
+    * +------------+---------------+------------------+----------------+
+    */
+    /* index1 -> E0, index14 -> ED */
+    simd8<uint8_t> range2 = pos.saturating_sub(240).lookup_16<uint8_t>(0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0);
+    /* index1 -> F0, index5 -> F4 */
+    range2 += pos.saturating_add(112).lookup_16<uint8_t>(0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    range += range2;
+
+    /* Load min and max values per calculated range index */
+    /*
+    * Range table, map range index to min and max values
+    * Index 0    : 00 ~ 7F (First Byte, ascii)
+    * Index 1,2,3: 80 ~ BF (Second, Third, Fourth Byte)
+    * Index 4    : A0 ~ BF (Second Byte after E0)
+    * Index 5    : 80 ~ 9F (Second Byte after ED)
+    * Index 6    : 90 ~ BF (Second Byte after F0)
+    * Index 7    : 80 ~ 8F (Second Byte after F4)
+    * Index 8    : C2 ~ F4 (First Byte, non ascii)
+    * Index 9~15 : illegal: i >= 127 && i <= -128
+    */
+    simd8<uint8_t> minv = range.lookup_16<uint8_t>(
+      0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
+      0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F
+    );
+    simd8<uint8_t> maxv = range.lookup_16<uint8_t>(
+      0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
+      0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
+    );
+
+    // We're fine with high-bit wraparound here, so we use int comparison since it's faster on Intel
+    this->has_error |= simd8<int8_t>(minv) > simd8<int8_t>(current_bytes);
+    this->has_error |= simd8<int8_t>(current_bytes) > simd8<int8_t>(maxv);
+
+    this->previous.raw_bytes = current_bytes;
+    this->previous.first_len = first_len;
+  }
+
+  simdjson_really_inline void check_next_input(const simd8<uint8_t> in) {
+    if (simdjson_likely(!in.any_bits_set_anywhere(0x80u))) {
+      this->check_carried_continuations();
+    } else {
+      this->check_utf8_bytes(in);
+    }
+  }
+
+  simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& in) {
+    simd8<uint8_t> bits = in.reduce_or();
+    if (simdjson_likely(!bits.any_bits_set_anywhere(0x80u))) {
+      // it is ascii, we just check carried continuations.
+      this->check_carried_continuations();
+    } else {
+      // it is not ascii so we have to do heavy work
+      for (int i=0; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) {
+        this->check_utf8_bytes(in.chunks[i]);
+      }
+    }
+  }
+
+  simdjson_really_inline error_code errors() {
+    return this->has_error.any() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
+  }
+}; // struct utf8_checker
+
+} // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/utf8_validator.h
+++ b/src/generic/stage1/utf8_validator.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 /**
@ -28,5 +28,5 @@ bool generic_validate_utf8(const char * input, size_t length) {
 }

 } // namespace stage1
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage1/utf8_zwegner_algorithm.h
+++ b/src/generic/stage1/utf8_zwegner_algorithm.h
@ -0,0 +1,365 @@
+namespace simdjson {
+namespace SIMDJSON_IMPLEMENTATION {
+//
+// Detect UTF-8 errors.
+//
+// Copied and adapted from algorithm by @zwegner: https://github.com/zwegner/faster-utf8-validator
+//
+// UTF-8 Refresher
+// ---------------
+//
+// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic
+// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits
+// are straight up concatenated into the final value. The first byte of a multibyte character is a
+// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
+// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
+// start with 0, because that's what ASCII looks like. Here's what each size 
+//
+// | Character Length            | UTF-8 Byte Sequence                   |
+// |-----------------------------|---------------------------------------|
+// | ASCII (7 bits):             | `0_______`                            |
+// | 2 byte character (11 bits)  | `110_____ 10______`                   |
+// | 3 byte character (17 bits)  | `1110____ 10______ 10______`          |
+// | 4 byte character (23 bits)  | `11110___ 10______ 10______ 10______` |
+// | 5+ byte character (illegal) | `11111___` <illegal>                  |
+//
+// UTF-8 Error Classes
+// -------------------
+//
+// There are 5 classes of error that can happen in UTF-8:
+//
+// ### Too short (missing continuations)
+//
+// TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation).
+// We detect this by looking for new characters (lead bytes) inside the range of a multibyte
+// character.
+//
+// e.g. `11000000 01100001` (2-byte character where second byte is ASCII)
+//
+// ### Too long (stray continuations)
+//
+// TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation).
+// We detect this by requiring that the next byte after your multibyte character be a new
+// character--so a continuation after your character is wrong.
+//
+// e.g. `11011111 10111111 10111111` (2-byte character followed by *another* continuation byte)
+//
+// ### Too large (out of range for unicode)
+//
+// TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large.
+//
+// e.g. `11110111 10111111 10111111 10111111` (bigger than 10FFFF).
+//
+// ### Overlong encoding (used more bytes than needed)
+//
+// Multibyte characters with a bunch of leading zeroes, where you could have
+// used fewer bytes to make the same character, are considered *overlong encodings*. They are
+// disallowed in UTF-8 to ensure there is only one way to write a single Unicode codepoint, making strings
+// easier to search. Like encoding an ASCII character in 2 bytes is technically possible, but UTF-8
+// disallows it so that you only have to search for the ASCII character `a` to find it.
+//
+// e.g. `11000001 10100001` (2-byte encoding of "a", which only requires 1 byte: 01100001)
+//
+// ### Surrogate characters
+//
+// Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and WTF-8 encodings
+// for characters with > 2 bytes. These are illegal in pure UTF-8.
+//
+// e.g. `11101101 10100000 10000000` (U+D800)
+//
+// ### 5+ byte characters
+// 
+// INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not
+// support values with more than 23 bits (which a 4-byte character supports).
+//
+// Even if these were supported, anything with 5 bytes would be either too large (bigger than the
+// Unicode max value), or overlong (could fit in 4+ bytes).
+//
+// e.g. `11111000 10100000 10000000 10000000 10000000` (U+800000)
+//   
+// Legal utf-8 byte sequences per  http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
+// 
+//  |  Code Points       |  1st   |  2nd   |   3s   |   4s   |
+//  |--------------------|--------|--------|--------|--------|
+//  | U+0000..U+007F     | 00..7F |        |        |        |
+//  | U+0080..U+07FF     | C2..DF | 80..BF |        |        |
+//  | U+0800..U+0FFF     | E0     | A0..BF | 80..BF |        |
+//  | U+1000..U+CFFF     | E1..EC | 80..BF | 80..BF |        |
+//  | U+D000..U+D7FF     | ED     | 80..9F | 80..BF |        |
+//  | U+E000..U+FFFF     | EE..EF | 80..BF | 80..BF |        |
+//  | U+10000..U+3FFFF   | F0     | 90..BF | 80..BF | 80..BF |
+//  | U+40000..U+FFFFF   | F1..F3 | 80..BF | 80..BF | 80..BF |
+//  | U+100000..U+10FFFF | F4     | 80..8F | 80..BF | 80..BF |
+//
+// Algorithm
+// ---------
+//
+// This validator works in two basic steps: checking continuation bytes, and
+// handling special cases. Each step works on one vector's worth of input
+// bytes at a time.
+//
+using namespace simd;
+
+using vmask_t = simd8<bool>::bitmask_t;
+using vmask2_t = simd8<bool>::bitmask2_t;
+
+struct utf8_checker {
+  simd8<uint8_t> special_case_errors;
+  simd8<uint8_t> prev_bytes;
+  vmask2_t last_cont;
+  vmask_t length_errors;
+
+  //
+  // Check for missing / extra continuation bytes.
+  //
+  // The continuation bytes are handled in a fairly straightforward manner in
+  // the scalar domain. A mask is created from the input byte vector for each
+  // of the highest four bits of every byte. The first mask allows us to quickly
+  // skip pure ASCII input vectors, which have no bits set. The first and
+  // (inverted) second masks together give us every continuation byte (10xxxxxx).
+  // The other masks are used to find prefixes of multi-byte code points (110,
+  // 1110, 11110). For these, we keep a "required continuation" mask, by shifting
+  // these masks 1, 2, and 3 bits respectively forward in the byte stream. That
+  // is, we take a mask of all bytes that start with 11, and shift it left one
+  // bit forward to get the mask of all the first continuation bytes, then do the
+  // same for the second and third continuation bytes. Here's an example input
+  // sequence along with the corresponding masks:
+  //
+  //   bytes:        61 C3 80 62 E0 A0 80 63 F0 90 80 80 00
+  //   code points:  61|C3 80|62|E0 A0 80|63|F0 90 80 80|00
+  //   # of bytes:   1 |2  - |1 |3  -  - |1 |4  -  -  - |1
+  //   cont. mask 1: -  -  1  -  -  1  -  -  -  1  -  -  -
+  //   cont. mask 2: -  -  -  -  -  -  1  -  -  -  1  -  -
+  //   cont. mask 3: -  -  -  -  -  -  -  -  -  -  -  1  -
+  //   cont. mask *: 0  0  1  0  0  1  1  0  0  1  1  1  0
+  //
+  // The final required continuation mask is then compared with the mask of
+  // actual continuation bytes, and must match exactly in valid UTF-8. The only
+  // complication in this step is that the shifted masks can cross vector
+  // boundaries, so we need to keep a "carry" mask of the bits that were shifted
+  // past the boundary in the last loop iteration.
+  //
+  simdjson_really_inline void check_length_errors(const simd8<uint8_t> bytes, const vmask_t bit_7) {
+    // Compute the continuation byte mask by finding bytes that start with
+    // 11x, 111x, and 1111. For each of these prefixes, we get a bitmask
+    // and shift it forward by 1, 2, or 3. This loop should be unrolled by
+    // the compiler, and the (n == 1) branch inside eliminated.
+    //
+    // NOTE (@jkeiser): I unrolled the for(i=1..3) loop because I don't trust compiler unrolling
+    // anymore. This should be exactly equivalent and yield the same optimizations (and also lets
+    // us rearrange statements if we so desire).
+
+    // We add the shifted mask here instead of ORing it, which would
+    // be the more natural operation, so that this line can be done
+    // with one lea. While adding could give a different result due
+    // to carries, this will only happen for invalid UTF-8 sequences,
+    // and in a way that won't cause it to pass validation. Reasoning:
+    // Any bits for required continuation bytes come after the bits
+    // for their leader bytes, and are all contiguous. For a carry to
+    // happen, two of these bit sequences would have to overlap. If
+    // this is the case, there is a leader byte before the second set
+    // of required continuation bytes (and thus before the bit that
+    // will be cleared by a carry). This leader byte will not be
+    // in the continuation mask, despite being required. QEDish.
+    // Which bytes are required to be continuation bytes
+    vmask2_t cont_required = this->last_cont;
+
+    // 2-byte lead: 11______
+    const vmask_t bit_6 = bytes.get_bit<6>();
+    const vmask_t lead_2_plus = bit_7 & bit_6;       // 11______
+    cont_required += vmask2_t(lead_2_plus) << 1;
+
+    // 3-byte lead: 111_____
+    const vmask_t bit_5 = bytes.get_bit<5>();
+    const vmask_t lead_3_plus = lead_2_plus & bit_5; // 111_____
+    cont_required += vmask2_t(lead_3_plus) << 2;
+
+    // 4-byte lead: 1111____
+    const vmask_t bit_4 = bytes.get_bit<4>();
+    const vmask_t lead_4_plus = lead_3_plus & bit_4;
+    cont_required += vmask2_t(lead_4_plus) << 3;
+
+    const vmask_t cont = bit_7 ^ lead_2_plus;        // 10______ TODO &~ bit_6 might be fine, and involve less data dependency
+
+    // Check that continuation bytes match. We must cast req from vmask2_t
+    // (which holds the carry mask in the upper half) to vmask_t, which
+    // zeroes out the upper bits
+    //
+    // NOTE (@jkeiser): I turned the if() statement here into this->has_error for performance in
+    // success cases: instead of spending time testing the result and introducing a branch (which
+    // can affect performance even if it's easily predictable), we test once at the end.
+    // The ^ is equivalent to !=, however, leaving a 1 where the bits are different and 0 where they
+    // are the same.
+    this->length_errors |= cont ^ vmask_t(cont_required);
+
+    this->last_cont = cont_required >> sizeof(simd8<uint8_t>);
+  }
+
+  //
+  // These constants define the set of error flags in check_special_cases().
+  //
+  static const uint8_t OVERLONG_2  = 0x01; // 1100000_         ________         Could have been encoded in 1 byte
+  static const uint8_t OVERLONG_3  = 0x02; // 11100000         100_____         Could have been encoded in 2 bytes
+  static const uint8_t SURROGATE   = 0x04; // 11101010         101_____         Surrogate pairs
+  static const uint8_t TOO_LARGE   = 0x08; // 11110100         (1001|101_)____ > U+10FFFF
+  static const uint8_t TOO_LARGE_2 = 0x10; // 1111(0101..1111) ________       > U+10FFFF
+  static const uint8_t OVERLONG_4  = 0x20; // 11110000         1000____         Could have been encoded in 3 bytes
+
+  //
+  // Check for special-case errors with table lookups on the first 3 nibbles (first 2 bytes).
+  //
+  // Besides the basic prefix coding of UTF-8, there are several invalid byte
+  // sequences that need special handling. These are due to three factors:
+  // code points that could be described in fewer bytes, code points that are
+  // part of a surrogate pair (which are only valid in UTF-16), and code points
+  // that are past the highest valid code point U+10FFFF.
+  //
+  // All of the invalid sequences can be detected by independently observing
+  // the first three nibbles of each code point. Since AVX2 can do a 4-bit/16-byte
+  // lookup in parallel for all 32 bytes in a vector, we can create bit masks
+  // for all of these error conditions, look up the bit masks for the three
+  // nibbles for all input bytes, and AND them together to get a final error mask,
+  // that must be all zero for valid UTF-8. This is somewhat complicated by
+  // needing to shift the error masks from the first and second nibbles forward in
+  // the byte stream to line up with the third nibble.
+  //
+  // We have these possible values for valid UTF-8 sequences, broken down
+  // by the first three nibbles:
+  //
+  //   1st   2nd   3rd   comment
+  //   0..7  0..F        ASCII
+  //   8..B  0..F        continuation bytes
+  //   C     2..F  8..B  C0 xx and C1 xx can be encoded in 1 byte
+  //   D     0..F  8..B  D0..DF are valid with a continuation byte
+  //   E     0     A..B  E0 8x and E0 9x can be encoded with 2 bytes
+  //         1..C  8..B  E1..EC are valid with continuation bytes
+  //         D     8..9  ED Ax and ED Bx correspond to surrogate pairs
+  //         E..F  8..B  EE..EF are valid with continuation bytes
+  //   F     0     9..B  F0 8x can be encoded with 3 bytes
+  //         1..3  8..B  F1..F3 are valid with continuation bytes
+  //         4     8     F4 8F BF BF is the maximum valid code point
+  //
+  // That leaves us with these invalid sequences, which would otherwise fit
+  // into UTF-8's prefix encoding. Each of these invalid sequences needs to
+  // be detected separately, with their own bits in the error mask.
+  //
+  //   1st   2nd   3rd   error bit
+  //   C     0..1  0..F  0x01
+  //   E     0     8..9  0x02
+  //         D     A..B  0x04
+  //   F     0     0..8  0x08
+  //         4     9..F  0x10
+  //         5..F  0..F  0x20
+  //
+  // For every possible value of the first, second, and third nibbles, we keep
+  // a lookup table that contains the bitwise OR of all errors that that nibble
+  // value can cause. For example, the first nibble has zeroes in every entry
+  // except for C, E, and F, and the third nibble lookup has the 0x21 bits in
+  // every entry, since those errors don't depend on the third nibble. After
+  // doing a parallel lookup of the first/second/third nibble values for all
+  // bytes, we AND them together. Only when all three have an error bit in common
+  // do we fail validation.
+  //
+  simdjson_really_inline void check_special_cases(const simd8<uint8_t> bytes) {
+    const simd8<uint8_t> shifted_bytes = bytes.prev<1>(this->prev_bytes);
+    this->prev_bytes = bytes;
+
+    // Look up error masks for three consecutive nibbles. We need to
+    // AND with 0x0F for each one, because vpshufb has the neat
+    // "feature" that negative values in an index byte will result in 
+    // a zero.
+    simd8<uint8_t> nibble_1_error = shifted_bytes.shr<4>().lookup_16<uint8_t>(
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        OVERLONG_2,                          // [1100]000_         ________        Could have been encoded in 1 byte
+        0,
+        OVERLONG_3 | SURROGATE,              // [1110]0000         100_____        Could have been encoded in 2 bytes
+                                             // [1110]1010         101_____        Surrogate pairs
+        OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 // [1111]0000         1000____        Could have been encoded in 3 bytes
+                                             // [1111]0100         (1001|101_)____ > U+10FFFF
+    );
+
+    simd8<uint8_t> nibble_2_error = (shifted_bytes & 0x0F).lookup_16<uint8_t>(
+      OVERLONG_2 | OVERLONG_3 | OVERLONG_4,  // 1100[000_]       ________        Could have been encoded in 1 byte
+                                             // 1110[0000]       100_____        Could have been encoded in 2 bytes
+                                             // 1111[0000]       1000____        Could have been encoded in 3 bytes
+      OVERLONG_2,
+      0,
+      0,
+
+      TOO_LARGE,                             // 1111[0100]       (1001|101_)____ > U+10FFFF
+      TOO_LARGE_2,                           // 1111[0101..1111] ________        > U+10FFFF
+      TOO_LARGE_2,
+      TOO_LARGE_2,
+      
+      TOO_LARGE_2,
+      TOO_LARGE_2,
+      TOO_LARGE_2,
+      TOO_LARGE_2,
+
+      TOO_LARGE_2,
+      TOO_LARGE_2 | SURROGATE,               // 1110[1010]       101_____        Surrogate pairs
+      TOO_LARGE_2, TOO_LARGE_2
+    );
+
+    // Errors that apply no matter what the third byte is
+    const uint8_t CARRY = OVERLONG_2 | TOO_LARGE_2; // 1100000_         [____]____        Could have been encoded in 1 byte
+                                                    // 1111(0101..1111) [____]____        > U+10FFFF
+    simd8<uint8_t> nibble_3_error = bytes.shr<4>().lookup_16<uint8_t>(
+      CARRY, CARRY, CARRY, CARRY,
+
+      CARRY, CARRY, CARRY, CARRY,
+
+      CARRY | OVERLONG_3 | OVERLONG_4,        // 11100000       [100_]____       Could have been encoded in 2 bytes
+                                              // 11110000       [1000]____       Could have been encoded in 3 bytes
+      CARRY | OVERLONG_3 | TOO_LARGE,         // 11100000       [100_]____       Could have been encoded in 2 bytes
+                                              // 11110100       [1001|101_]____  > U+10FFFF
+      CARRY | SURROGATE | TOO_LARGE,          // 11101010       [101_]____       Surrogate pairs
+      CARRY | SURROGATE | TOO_LARGE,
+
+      CARRY, CARRY, CARRY, CARRY
+    );
+
+    // Check if any bits are set in all three error masks
+    //
+    // NOTE (@jkeiser): I turned the if() statement here into this->has_error for performance in
+    // success cases: instead of spending time testing the result and introducing a branch (which
+    // can affect performance even if it's easily predictable), we test once at the end.
+    this->special_case_errors |= nibble_1_error & nibble_2_error & nibble_3_error;
+  }
+
+  // check whether the current bytes are valid UTF-8
+  // at the end of the function, previous gets updated
+  simdjson_really_inline void check_utf8_bytes(const simd8<uint8_t> bytes, const vmask_t bit_7) {
+    this->check_length_errors(bytes, bit_7);
+    this->check_special_cases(bytes);
+  }
+
+  simdjson_really_inline void check_next_input(const simd8<uint8_t> bytes) {
+    vmask_t bit_7 = bytes.get_bit<7>();
+    if (simdjson_unlikely(bit_7)) {
+      // TODO (@jkeiser): To work with simdjson's caller model, I moved the calculation of
+      // shifted_bytes inside check_utf8_bytes. I believe this adds an extra instruction to the hot
+      // path (saving prev_bytes), which is undesirable, though 2 register accesses vs. 1 memory
+      // access might be a wash. Come back and try the other way.
+      this->check_utf8_bytes(bytes, bit_7);
+    } else {
+      this->length_errors |= this->last_cont;
+    }
+  }
+
+  simdjson_really_inline void check_next_input(const simd8x64<uint8_t>& in) {
+    for (int i=0; i<simd8x64<uint8_t>::NUM_CHUNKS; i++) {
+      this->check_next_input(in.chunks[i]);
+    }
+  }
+
+  simdjson_really_inline error_code errors() {
+    return (this->special_case_errors.any_bits_set_anywhere() | this->length_errors) ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
+  }
+}; // struct utf8_checker
+
+} // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/allocate.h
+++ b/src/generic/stage2/allocate.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {
 namespace allocate {

@ -18,5 +18,5 @@ simdjson_really_inline error_code set_max_depth(dom_parser_implementation &parse

 } // namespace allocate
 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/atomparsing.h
+++ b/src/generic/stage2/atomparsing.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {
 namespace atomparsing {

@ -60,5 +60,5 @@ simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {

 } // namespace atomparsing
 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/json_iterator.h
+++ b/src/generic/stage2/json_iterator.h
@ -1,7 +1,7 @@
 #include "generic/stage2/logger.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {

 class json_iterator {
@ -311,5 +311,5 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::visit_prim
 }

 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/jsoncharutils.h
+++ b/src/generic/stage2/jsoncharutils.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {

 // return non-zero if not a structural or whitespace char
@ -105,5 +105,5 @@ simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t va
 }

 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/logger.h
+++ b/src/generic/stage2/logger.h
@ -1,7 +1,7 @@
 // This is for an internal-only stage 2 specific logger.
 // Set LOG_ENABLED = true to log what stage 2 is doing!
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace logger {

  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
@ -82,5 +82,5 @@ namespace logger {
  }

 } // namespace logger
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/numberparsing.h
+++ b/src/generic/stage2/numberparsing.h
@ -1,8 +1,8 @@
 #include <cmath>
 #include <limits>

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {
 namespace numberparsing {

@ -766,5 +766,5 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<double> parse_double(cons

 } // namespace numberparsing
 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/stringparsing.h
+++ b/src/generic/stage2/stringparsing.h
@ -1,8 +1,8 @@
 // This file contains the common code every implementation uses
 // It is intended to be included multiple times and compiled multiple times

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {
 namespace stringparsing {

@ -129,5 +129,5 @@ SIMDJSON_UNUSED SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_str

 } // namespace stringparsing
 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/structural_iterator.h
+++ b/src/generic/stage2/structural_iterator.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {

 class structural_iterator {
@ -48,5 +48,5 @@ public:
 };

 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/tape_builder.h
+++ b/src/generic/stage2/tape_builder.h
@ -2,8 +2,8 @@
 #include "generic/stage2/tape_writer.h"
 #include "generic/stage2/atomparsing.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {

 struct tape_builder {
@ -279,5 +279,5 @@ simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
 }

 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/generic/stage2/tape_writer.h
+++ b/src/generic/stage2/tape_writer.h
@ -1,5 +1,5 @@
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage2 {

 struct tape_writer {
@ -99,5 +99,5 @@ simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val,
 }

 } // namespace stage2
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION
--- a/src/haswell/bitmanipulation.h
+++ b/src/haswell/bitmanipulation.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_HASWELL_BITMANIPULATION_H
 #define SIMDJSON_HASWELL_BITMANIPULATION_H

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 // We sometimes call trailing_zero on inputs that are zero,
 // but the algorithms do not end up using the returned value.
@ -53,7 +53,7 @@ simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2,
 #endif
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_HASWELL_BITMANIPULATION_H
--- a/src/haswell/bitmask.h
+++ b/src/haswell/bitmask.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_HASWELL_BITMASK_H
 #define SIMDJSON_HASWELL_BITMASK_H

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 //
 // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
@ -17,7 +17,7 @@ simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) {
  return _mm_cvtsi128_si64(result);
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_HASWELL_BITMASK_H
--- a/src/haswell/dom_parser_implementation.cpp
+++ b/src/haswell/dom_parser_implementation.cpp
@ -6,8 +6,8 @@
 // Stage 1
 //

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 using namespace simd;

@ -102,8 +102,8 @@ simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t>
  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "generic/stage1/utf8_lookup4_algorithm.h"
 #include "generic/stage1/json_structural_indexer.h"
@ -119,8 +119,8 @@ simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t>
 //
 // Implementation-specific overrides
 //
-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
@ -129,6 +129,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backs
 }

 } // namespace stage1
+} // unnamed namespace

 SIMDJSON_WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
  return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
@ -159,7 +160,6 @@ SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "generic/ondemand.h"

--- a/src/haswell/implementation.cpp
+++ b/src/haswell/implementation.cpp
@ -1,7 +1,6 @@
 #include "haswell/begin_implementation.h"
 #include "haswell/dom_parser_implementation.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {

 SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
@ -17,7 +16,6 @@ SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "haswell/end_implementation.h"

--- a/src/haswell/implementation.h
+++ b/src/haswell/implementation.h
@ -5,7 +5,6 @@
 #include "isadetection.h"

 // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_REGION
-namespace {
 namespace haswell {

 using namespace simdjson;
@ -27,6 +26,5 @@ public:
 };

 } // namespace haswell
-} // unnamed namespace

 #endif // SIMDJSON_HASWELL_IMPLEMENTATION_H
--- a/src/haswell/numberparsing.h
+++ b/src/haswell/numberparsing.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_HASWELL_NUMBERPARSING_H
 #define SIMDJSON_HASWELL_NUMBERPARSING_H

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
  // this actually computes *16* values so we are being wasteful.
@ -22,8 +22,8 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t
      t4); // only captures the sum of the first 8 digits, drop the rest
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #define SWAR_NUMBER_PARSING

--- a/src/haswell/simd.h
+++ b/src/haswell/simd.h
@ -3,8 +3,8 @@

 #include "simdprune_tables.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace simd {

  // Forward-declared so they can be used by splat and friends.
@ -355,7 +355,7 @@ namespace simd {

 } // namespace simd

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_HASWELL_SIMD_H
--- a/src/haswell/stringparsing.h
+++ b/src/haswell/stringparsing.h
@ -5,8 +5,8 @@
 #include "haswell/simd.h"
 #include "haswell/bitmanipulation.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 using namespace simd;

@ -38,8 +38,8 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co
  };
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "generic/stage2/stringparsing.h"

--- a/src/westmere/bitmanipulation.h
+++ b/src/westmere/bitmanipulation.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H
 #define SIMDJSON_WESTMERE_BITMANIPULATION_H

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 // We sometimes call trailing_zero on inputs that are zero,
 // but the algorithms do not end up using the returned value.
@ -62,7 +62,7 @@ simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2,
 #endif
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_WESTMERE_BITMANIPULATION_H
--- a/src/westmere/bitmask.h
+++ b/src/westmere/bitmask.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_WESTMERE_BITMASK_H
 #define SIMDJSON_WESTMERE_BITMASK_H

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 //
 // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
@ -17,7 +17,7 @@ simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) {
  return _mm_cvtsi128_si64(result);
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_WESTMERE_BITMASK_H
--- a/src/westmere/dom_parser_implementation.cpp
+++ b/src/westmere/dom_parser_implementation.cpp
@ -6,8 +6,8 @@
 // Stage 1
 //

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 using namespace simd;

@ -100,8 +100,8 @@ simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t>
  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "generic/stage1/utf8_lookup4_algorithm.h"
 #include "generic/stage1/json_structural_indexer.h"
@ -118,8 +118,8 @@ simdjson_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t>
 // Implementation-specific overrides
 //

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace stage1 {

 simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
@ -128,6 +128,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backs
 }

 } // namespace stage1
+} // unnamed namespace

 SIMDJSON_WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
  return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
@ -158,6 +159,5 @@ SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *
 }

 } // namespace SIMDJSON_IMPLEMENTATION
-} // unnamed namespace

 #include "westmere/end_implementation.h"
--- a/src/westmere/implementation.cpp
+++ b/src/westmere/implementation.cpp
@ -1,8 +1,8 @@
 #include "westmere/begin_implementation.h"
 #include "westmere/dom_parser_implementation.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation(
  size_t capacity,
@ -16,7 +16,7 @@ SIMDJSON_WARN_UNUSED error_code implementation::create_dom_parser_implementation
  return SUCCESS;
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "westmere/end_implementation.h"
--- a/src/westmere/implementation.h
+++ b/src/westmere/implementation.h
@ -6,8 +6,8 @@
 #include "isadetection.h"

 // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_REGION
-namespace {
 namespace westmere {
+namespace {

 using namespace simdjson;
 using namespace simdjson::dom;
@ -24,7 +24,7 @@ public:
  SIMDJSON_WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
 };

-} // namespace westmere
 } // unnamed namespace
+} // namespace westmere

 #endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H
--- a/src/westmere/numberparsing.h
+++ b/src/westmere/numberparsing.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H
 #define SIMDJSON_WESTMERE_NUMBERPARSING_H

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
  // this actually computes *16* values so we are being wasteful.
@ -22,8 +22,8 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t
      t4); // only captures the sum of the first 8 digits, drop the rest
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #define SWAR_NUMBER_PARSING

--- a/src/westmere/simd.h
+++ b/src/westmere/simd.h
@ -3,8 +3,8 @@

 #include "simdprune_tables.h"

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {
 namespace simd {

  template<typename Child>
@ -326,7 +326,7 @@ namespace simd {
  }; // struct simd8x64<T>

 } // namespace simd
-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #endif // SIMDJSON_WESTMERE_SIMD_INPUT_H
--- a/src/westmere/stringparsing.h
+++ b/src/westmere/stringparsing.h
@ -1,8 +1,8 @@
 #ifndef SIMDJSON_WESTMERE_STRINGPARSING_H
 #define SIMDJSON_WESTMERE_STRINGPARSING_H

-namespace {
 namespace SIMDJSON_IMPLEMENTATION {
+namespace {

 using namespace simd;

@ -36,8 +36,8 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co
  };
 }

-} // namespace SIMDJSON_IMPLEMENTATION
 } // unnamed namespace
+} // namespace SIMDJSON_IMPLEMENTATION

 #include "generic/stage2/stringparsing.h"