Merge pull request #882 from simdjson/jkeiser/move-cpp-files

Split stage2 into files-per-class
2020-05-19 14:34:01 -07:00 · 2020-05-19 14:34:01 -07:00 · 603b6596af
parent e7e6ac5bb3 64abc3e86c
commit 603b6596af
49 changed files with 6265 additions and 6069 deletions
--- a/HACKING.md
+++ b/HACKING.md
@ -29,7 +29,7 @@ simdjson's source structure, from the top level, looks like this:
    ```c++
    namespace simdjson {
      namespace haswell {
-        #include "generic/stage1_find_marks.h"
+        #include "generic/stage1/json_structural_indexer.h"
      }
    }
    ```
--- a/include/simdjson/document.h
+++ b/include/simdjson/document.h
@ -89,6 +89,12 @@ public:
  size_t json_index;
 };

+#ifdef SIMDJSON_USE_COMPUTED_GOTO
+typedef void* ret_address;
+#else
+typedef char ret_address;
+#endif
+
 } // namespace internal

 namespace dom {
@ -977,13 +983,8 @@ public:
  /** @private Tape location of each open { or [ */
  std::unique_ptr<scope_descriptor[]> containing_scope{};

-#ifdef SIMDJSON_USE_COMPUTED_GOTO
  /** @private Return address of each open { or [ */
-  std::unique_ptr<void*[]> ret_address{};
-#else
-  /** @private Return address of each open { or [ */
-  std::unique_ptr<char[]> ret_address{};
-#endif
+  std::unique_ptr<internal::ret_address[]> ret_address{};

  /** @private Use `if (parser.parse(...).error())` instead */
  bool valid{false};
--- a/include/simdjson/inline/document.h
+++ b/include/simdjson/inline/document.h
@ -509,11 +509,7 @@ inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
    // Initialize stage 2 state
    //
    containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]); // TODO realloc
-  #ifdef SIMDJSON_USE_COMPUTED_GOTO
-    ret_address.reset(new (std::nothrow) void *[max_depth]);
-  #else
-    ret_address.reset(new (std::nothrow) char[max_depth]);
-  #endif
+    ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);

    if (!ret_address || !containing_scope) {
      // Could not allocate memory
--- a/include/simdjson/inline/error.h
+++ b/include/simdjson/inline/error.h
@ -1,8 +1,9 @@
 #ifndef SIMDJSON_INLINE_ERROR_H
 #define SIMDJSON_INLINE_ERROR_H

-#include "simdjson/error.h"
+#include <cstring>
 #include <string>
+#include "simdjson/error.h"

 namespace simdjson {
 namespace internal {
--- a/singleheader/amalgamate.sh
+++ b/singleheader/amalgamate.sh
@ -7,14 +7,14 @@ set -e


 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
-PROJECTPATH=$SCRIPTPATH"/.."
+PROJECTPATH="$(dirname $SCRIPTPATH)"
 echo "Project at "$PROJECTPATH

 echo "We are about to amalgamate all simdjson files into one source file. "
 echo "See https://www.sqlite.org/amalgamation.html and https://en.wikipedia.org/wiki/Single_Compilation_Unit for rationale. "

-if [ -z "$AMALGAMATE_SOURCE_PATH" ]; then AMALGAMATE_SOURCE_PATH="$SCRIPTPATH/../src"; fi
-if [ -z "$AMALGAMATE_INCLUDE_PATH" ]; then AMALGAMATE_INCLUDE_PATH="$SCRIPTPATH/../include"; fi
+if [ -z "$AMALGAMATE_SOURCE_PATH" ]; then AMALGAMATE_SOURCE_PATH="$PROJECTPATH/src"; fi
+if [ -z "$AMALGAMATE_INCLUDE_PATH" ]; then AMALGAMATE_INCLUDE_PATH="$PROJECTPATH/include"; fi
 if [ -z "$AMALGAMATE_OUTPUT_PATH" ]; then AMALGAMATE_OUTPUT_PATH="$SCRIPTPATH"; fi

 # this list excludes the "src/generic headers"
--- a/singleheader/amalgamate_demo.cpp
+++ b/singleheader/amalgamate_demo.cpp
@ -1,4 +1,4 @@
-/* auto-generated on Tue May  5 20:03:59 EDT 2020. Do not edit! */
+/* auto-generated on Tue May 19 13:32:53 PDT 2020. Do not edit! */

 #include <iostream>
 #include "simdjson.h"
--- a/singleheader/simdjson.cpp
+++ b/singleheader/simdjson.cpp
--- a/singleheader/simdjson.h
+++ b/singleheader/simdjson.h
@ -1,4 +1,4 @@
-/* auto-generated on Tue May  5 20:03:59 EDT 2020. Do not edit! */
+/* auto-generated on Tue May 19 13:32:53 PDT 2020. Do not edit! */
 /* begin file include/simdjson.h */
 #ifndef SIMDJSON_H
 #define SIMDJSON_H
@ -328,12 +328,19 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024;
  #define unlikely(x) x
  #endif

-  #include <CppCoreCheck\Warnings.h>
  #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
  #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 ))
  #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
  // Get rid of Intellisense-only warnings (Code Analysis)
+  // Though __has_include is C++17, it looks like it is supported in Visual Studio 2017 or better.
+  // We are probably not supporting earlier version of Visual Studio in any case.
+  #if __has_include(<CppCoreCheck\Warnings.h>)
+  #include <CppCoreCheck\Warnings.h>
  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
+  #else
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+  #endif
+
  #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996)
  #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop ))

@ -2482,6 +2489,8 @@ public:
  really_inline uint32_t scope_count() const noexcept;
  template<typename T>
  really_inline T next_tape_value() const noexcept;
+  really_inline uint32_t get_string_length() const noexcept;
+  really_inline const char * get_c_str() const noexcept;
  inline std::string_view get_string_view() const noexcept;

  /** The document this element references. */
@ -2491,6 +2500,12 @@ public:
  size_t json_index;
 };

+#ifdef SIMDJSON_USE_COMPUTED_GOTO
+typedef void* ret_address;
+#else
+typedef char ret_address;
+#endif
+
 } // namespace internal

 namespace dom {
@ -2623,7 +2638,22 @@ public:
     * Get the key of this key/value pair.
     */
    inline std::string_view key() const noexcept;
-
+    /**
+     * Get the length (in bytes) of the key in this key/value pair.
+     * You should expect this function to be faster than key().size().
+     */
+    inline uint32_t key_length() const noexcept;
+    /**
+     * Returns true if the key in this key/value pair is equal
+     * to the provided string_view.
+     */
+    inline bool key_equals(const std::string_view & o) const noexcept;
+    /**
+     * Returns true if the key in this key/value pair is equal
+     * to the provided string_view in a case-insensitive manner.
+     * Case comparisons may only be handled correctly for ASCII strings.
+     */
+    inline bool key_equals_case_insensitive(const std::string_view & o) const noexcept;
    /**
     * Get the key of this key/value pair.
     */
@ -3364,16 +3394,8 @@ public:
  /** @private Tape location of each open { or [ */
  std::unique_ptr<scope_descriptor[]> containing_scope{};

-#ifdef SIMDJSON_USE_COMPUTED_GOTO
  /** @private Return address of each open { or [ */
-  std::unique_ptr<void*[]> ret_address{};
-#else
-  /** @private Return address of each open { or [ */
-  std::unique_ptr<char[]> ret_address{};
-#endif
-
-  /** @private Next write location in the string buf for stage 2 parsing */
-  uint8_t *current_string_buf_loc{};
+  std::unique_ptr<internal::ret_address[]> ret_address{};

  /** @private Use `if (parser.parse(...).error())` instead */
  bool valid{false};
@ -3405,32 +3427,6 @@ public:
  /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
  inline bool dump_raw_tape(std::ostream &os) const noexcept;

-  //
-  // Parser callbacks: these are internal!
-  //
-
-  /** @private this should be called when parsing (right before writing the tapes) */
-  inline void init_stage2() noexcept;
-  really_inline error_code on_error(error_code new_error_code) noexcept; ///< @private
-  really_inline error_code on_success(error_code success_code) noexcept; ///< @private
-  really_inline bool on_start_document(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_start_object(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_start_array(uint32_t depth) noexcept; ///< @private
-  // TODO we're not checking this bool
-  really_inline bool on_end_document(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_end_object(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_end_array(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_true_atom() noexcept; ///< @private
-  really_inline bool on_false_atom() noexcept; ///< @private
-  really_inline bool on_null_atom() noexcept; ///< @private
-  really_inline uint8_t *on_start_string() noexcept; ///< @private
-  really_inline bool on_end_string(uint8_t *dst) noexcept; ///< @private
-  really_inline bool on_number_s64(int64_t value) noexcept; ///< @private
-  really_inline bool on_number_u64(uint64_t value) noexcept; ///< @private
-  really_inline bool on_number_double(double value) noexcept; ///< @private
-
-  really_inline void increment_count(uint32_t depth) noexcept; ///< @private
-  really_inline void end_scope(uint32_t depth) noexcept; ///< @private
 private:
  /**
   * The maximum document length this parser will automatically support.
@ -3475,8 +3471,6 @@ private:
  //
  //

-  inline void write_tape(uint64_t val, internal::tape_type t) noexcept;
-
  /**
   * Ensure we have enough capacity to handle at least desired_capacity bytes,
   * and auto-allocate if not.
@ -5130,11 +5124,7 @@ inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
    // Initialize stage 2 state
    //
    containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]); // TODO realloc
-  #ifdef SIMDJSON_USE_COMPUTED_GOTO
-    ret_address.reset(new (std::nothrow) void *[max_depth]);
-  #else
-    ret_address.reset(new (std::nothrow) char[max_depth]);
-  #endif
+    ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);

    if (!ret_address || !containing_scope) {
      // Could not allocate memory
@ -5297,7 +5287,7 @@ inline simdjson_result<element> object::at(const std::string_view &json_pointer)
 inline simdjson_result<element> object::at_key(const std::string_view &key) const noexcept {
  iterator end_field = end();
  for (iterator field = begin(); field != end_field; ++field) {
-    if (key == field.key()) {
+    if (field.key_equals(key)) {
      return field.value();
    }
  }
@ -5309,13 +5299,8 @@ inline simdjson_result<element> object::at_key(const std::string_view &key) cons
 inline simdjson_result<element> object::at_key_case_insensitive(const std::string_view &key) const noexcept {
  iterator end_field = end();
  for (iterator field = begin(); field != end_field; ++field) {
-    auto field_key = field.key();
-    if (key.length() == field_key.length()) {
-      // See For case-insensitive string comparisons, avoid char-by-char functions
-      // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
-      // Note that it might be worth rolling our own strncasecmp function, with vectorization.
-      const bool equal = (simdjson_strncasecmp(key.data(), field_key.data(), key.length()) == 0);
-      if (equal) { return field.value(); }
+    if (field.key_equals_case_insensitive(key)) {
+      return field.value();
    }
  }
  return NO_SUCH_FIELD;
@ -5337,13 +5322,10 @@ inline object::iterator& object::iterator::operator++() noexcept {
  return *this;
 }
 inline std::string_view object::iterator::key() const noexcept {
-  size_t string_buf_index = size_t(tape_value());
-  uint32_t len;
-  memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
-  return std::string_view(
-    reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
-    len
-  );
+  return get_string_view();
+}
+inline uint32_t object::iterator::key_length() const noexcept {
+  return get_string_length();
 }
 inline const char* object::iterator::key_c_str() const noexcept {
  return reinterpret_cast<const char *>(&doc->string_buf[size_t(tape_value()) + sizeof(uint32_t)]);
@ -5352,6 +5334,42 @@ inline element object::iterator::value() const noexcept {
  return element(doc, json_index + 1);
 }

+/**
+ * Design notes:
+ * Instead of constructing a string_view and then comparing it with a
+ * user-provided strings, it is probably more performant to have dedicated
+ * functions taking as a parameter the string we want to compare against
+ * and return true when they are equal. That avoids the creation of a temporary
+ * std::string_view. Though it is possible for the compiler to avoid entirely
+ * any overhead due to string_view, relying too much on compiler magic is
+ * problematic: compiler magic sometimes fail, and then what do you do?
+ * Also, enticing users to rely on high-performance function is probably better
+ * on the long run.
+ */
+
+inline bool object::iterator::key_equals(const std::string_view & o) const noexcept {
+  // We use the fact that the key length can be computed quickly
+  // without access to the string buffer.
+  const uint32_t len = key_length();
+  if(o.size() == len) {
+    // We avoid construction of a temporary string_view instance.
+    return (memcmp(o.data(), key_c_str(), len) == 0);
+  }
+  return false;
+}
+
+inline bool object::iterator::key_equals_case_insensitive(const std::string_view & o) const noexcept {
+  // We use the fact that the key length can be computed quickly
+  // without access to the string buffer.
+  const uint32_t len = key_length();
+  if(o.size() == len) {
+      // See For case-insensitive string comparisons, avoid char-by-char functions
+      // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
+      // Note that it might be worth rolling our own strncasecmp function, with vectorization.
+      return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0);
+  }
+  return false;
+}
 //
 // key_value_pair inline implementation
 //
@ -5386,8 +5404,7 @@ template<>
 inline simdjson_result<const char *> element::get<const char *>() const noexcept {
  switch (tape_ref_type()) {
    case internal::tape_type::STRING: {
-      size_t string_buf_index = size_t(tape_value());
-      return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
+      return get_c_str();
    }
    default:
      return INCORRECT_TYPE;
@ -5786,13 +5803,23 @@ really_inline T tape_ref::next_tape_value() const noexcept {
  memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
  return x;
 }
-inline std::string_view internal::tape_ref::get_string_view() const noexcept {
-  size_t string_buf_index = size_t(tape_value());
+
+really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
+  uint64_t string_buf_index = size_t(tape_value());
  uint32_t len;
  memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
+  return len;
+}
+
+really_inline const char * internal::tape_ref::get_c_str() const noexcept {
+  uint64_t string_buf_index = size_t(tape_value());
+  return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
+}
+
+inline std::string_view internal::tape_ref::get_string_view() const noexcept {
  return std::string_view(
-    reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
-    len
+      get_c_str(),
+      get_string_length()
  );
 }

@ -6092,6 +6119,7 @@ inline error_code document_stream::json_parse() noexcept {
 #ifndef SIMDJSON_INLINE_ERROR_H
 #define SIMDJSON_INLINE_ERROR_H

+#include <cstring>
 #include <string>

 namespace simdjson {
--- a/src/arm64/numberparsing.h
+++ b/src/arm64/numberparsing.h
@ -31,7 +31,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {

 #define SWAR_NUMBER_PARSING

-#include "generic/numberparsing.h"
+#include "generic/stage2/numberparsing.h"

 } // namespace arm64
 } // namespace simdjson
--- a/src/arm64/stage1_find_marks.h
+++ b/src/arm64/stage1_find_marks.h
@ -1,6 +1,3 @@
-#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
-#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
-
 #include "simdjson.h"
 #include "arm64/bitmask.h"
 #include "arm64/simd.h"
@ -71,22 +68,20 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
    return is_second_byte ^ is_third_byte ^ is_fourth_byte;
 }

-#include "generic/buf_block_reader.h"
-#include "generic/json_string_scanner.h"
-#include "generic/json_scanner.h"
+#include "generic/stage1/buf_block_reader.h"
+#include "generic/stage1/json_string_scanner.h"
+#include "generic/stage1/json_scanner.h"

-#include "generic/json_minifier.h"
+#include "generic/stage1/json_minifier.h"
 WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
  return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
 }

-#include "generic/utf8_lookup2_algorithm.h"
-#include "generic/json_structural_indexer.h"
+#include "generic/stage1/utf8_lookup2_algorithm.h"
+#include "generic/stage1/json_structural_indexer.h"
 WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
  return arm64::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
 }

 } // namespace arm64
 } // namespace simdjson
-
-#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
--- a/src/arm64/stage2.cpp
+++ b/src/arm64/stage2.cpp
@ -0,0 +1,20 @@
+#ifndef SIMDJSON_ARM64_STAGE2_H
+#define SIMDJSON_ARM64_STAGE2_H
+
+#include "simdjson.h"
+#include "arm64/implementation.h"
+#include "arm64/stringparsing.h"
+#include "arm64/numberparsing.h"
+
+namespace simdjson {
+namespace arm64 {
+
+#include "generic/stage2/atomparsing.h"
+#include "generic/stage2/structural_iterator.h"
+#include "generic/stage2/structural_parser.h"
+#include "generic/stage2/streaming_structural_parser.h"
+
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_STAGE2_H
--- a/src/arm64/stage2_build_tape.h
+++ b/src/arm64/stage2_build_tape.h
@ -1,19 +0,0 @@
-#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
-#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
-
-#include "simdjson.h"
-#include "arm64/implementation.h"
-#include "arm64/stringparsing.h"
-#include "arm64/numberparsing.h"
-
-namespace simdjson {
-namespace arm64 {
-
-#include "generic/atomparsing.h"
-#include "generic/stage2_build_tape.h"
-#include "generic/stage2_streaming_build_tape.h"
-
-} // namespace arm64
-} // namespace simdjson
-
-#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
--- a/src/arm64/stringparsing.h
+++ b/src/arm64/stringparsing.h
@ -45,7 +45,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
  };
 }

-#include "generic/stringparsing.h"
+#include "generic/stage2/stringparsing.h"

 } // namespace arm64
 } // namespace simdjson
--- a/src/error.cpp
+++ b/src/error.cpp
@ -1,4 +1,4 @@
-#include "simdjson/error.h"
+#include "simdjson.h"

 namespace simdjson {
 namespace internal {
--- a/src/fallback/numberparsing.h
+++ b/src/fallback/numberparsing.h
@ -26,7 +26,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {

 #define SWAR_NUMBER_PARSING

-#include "generic/numberparsing.h"
+#include "generic/stage2/numberparsing.h"

 } // namespace fallback

--- a/src/fallback/stage1_find_marks.h
+++ b/src/fallback/stage1_find_marks.h
@ -1,6 +1,3 @@
-#ifndef SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
-#define SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
-
 #include "simdjson.h"
 #include "fallback/implementation.h"

@ -210,5 +207,3 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui

 } // namespace fallback
 } // namespace simdjson
-
-#endif // SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
--- a/src/fallback/stage2.cpp
+++ b/src/fallback/stage2.cpp
@ -0,0 +1,16 @@
+#include "simdjson.h"
+
+#include "fallback/implementation.h"
+#include "fallback/stringparsing.h"
+#include "fallback/numberparsing.h"
+
+namespace simdjson {
+namespace fallback {
+
+#include "generic/stage2/atomparsing.h"
+#include "generic/stage2/structural_iterator.h"
+#include "generic/stage2/structural_parser.h"
+#include "generic/stage2/streaming_structural_parser.h"
+
+} // namespace fallback
+} // namespace simdjson
--- a/src/fallback/stage2_build_tape.h
+++ b/src/fallback/stage2_build_tape.h
@ -1,20 +0,0 @@
-#ifndef SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
-#define SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
-
-#include "simdjson.h"
-
-#include "fallback/implementation.h"
-#include "fallback/stringparsing.h"
-#include "fallback/numberparsing.h"
-
-namespace simdjson {
-namespace fallback {
-
-#include "generic/atomparsing.h"
-#include "generic/stage2_build_tape.h"
-#include "generic/stage2_streaming_build_tape.h"
-
-} // namespace fallback
-} // namespace simdjson
-
-#endif // SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
--- a/src/fallback/stringparsing.h
+++ b/src/fallback/stringparsing.h
@ -27,7 +27,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
  return { src[0] };
 }

-#include "generic/stringparsing.h"
+#include "generic/stage2/stringparsing.h"

 } // namespace fallback
 } // namespace simdjson
--- a/src/generic/stage1/buf_block_reader.h
+++ b/src/generic/stage1/buf_block_reader.h
--- a/src/generic/stage1/json_minifier.h
+++ b/src/generic/stage1/json_minifier.h
@ -1,7 +1,7 @@
 // This file contains the common code every implementation uses in stage1
 // It is intended to be included multiple times and compiled multiple times
 // We assume the file in which it is included already includes
-// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
+// "simdjson/stage1.h" (this simplifies amalgation)

 namespace stage1 {

--- a/src/generic/stage1/json_scanner.h
+++ b/src/generic/stage1/json_scanner.h
--- a/src/generic/stage1/json_string_scanner.h
+++ b/src/generic/stage1/json_string_scanner.h
--- a/src/generic/stage1/json_structural_indexer.h
+++ b/src/generic/stage1/json_structural_indexer.h
@ -1,7 +1,7 @@
 // This file contains the common code every implementation uses in stage1
 // It is intended to be included multiple times and compiled multiple times
 // We assume the file in which it is included already includes
-// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
+// "simdjson/stage1.h" (this simplifies amalgation)

 namespace stage1 {

--- a/src/generic/stage1/utf8_fastvalidate_algorithm.h
+++ b/src/generic/stage1/utf8_fastvalidate_algorithm.h
--- a/src/generic/stage1/utf8_lookup2_algorithm.h
+++ b/src/generic/stage1/utf8_lookup2_algorithm.h
--- a/src/generic/stage1/utf8_lookup_algorithm.h
+++ b/src/generic/stage1/utf8_lookup_algorithm.h
--- a/src/generic/stage1/utf8_range_algorithm.h
+++ b/src/generic/stage1/utf8_range_algorithm.h
--- a/src/generic/stage1/utf8_zwegner_algorithm.h
+++ b/src/generic/stage1/utf8_zwegner_algorithm.h
--- a/src/generic/stage2/atomparsing.h
+++ b/src/generic/stage2/atomparsing.h
@ -1,3 +1,4 @@
+namespace stage2 {
 namespace atomparsing {

 really_inline uint32_t string_to_uint32(const char* str) { return *reinterpret_cast<const uint32_t *>(str); }
@ -46,4 +47,5 @@ really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
  else { return false; }
 }

-} // namespace atomparsing
+} // namespace atomparsing
+} // namespace stage2
--- a/src/generic/stage2/numberparsing.h
+++ b/src/generic/stage2/numberparsing.h
@ -1,3 +1,4 @@
+namespace stage2 {
 namespace numberparsing {

 // Attempts to compute i * 10^(power) exactly; and if "negative" is
@ -568,3 +569,4 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
 }

 } // namespace numberparsing
+} // namespace stage2
--- a/src/generic/stage2/streaming_structural_parser.h
+++ b/src/generic/stage2/streaming_structural_parser.h
--- a/src/generic/stage2/stringparsing.h
+++ b/src/generic/stage2/stringparsing.h
@ -3,6 +3,7 @@
 // We assume the file in which it is include already includes
 // "stringparsing.h" (this simplifies amalgation)

+namespace stage2 {
 namespace stringparsing {

 // begin copypasta
@ -118,4 +119,5 @@ WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst
  return nullptr;
 }

-} // namespace stringparsing
+} // namespace stringparsing
+} // namespace stage2
--- a/src/generic/stage2/structural_iterator.h
+++ b/src/generic/stage2/structural_iterator.h
@ -0,0 +1,69 @@
+namespace stage2 {
+
+class structural_iterator {
+public:
+  really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
+    : buf{_buf},
+     len{_len},
+     structural_indexes{_structural_indexes},
+     next_structural{next_structural_index}
+    {}
+  really_inline char advance_char() {
+    idx = structural_indexes[next_structural];
+    next_structural++;
+    c = *current();
+    return c;
+  }
+  really_inline char current_char() {
+    return c;
+  }
+  really_inline const uint8_t* current() {
+    return &buf[idx];
+  }
+  really_inline size_t remaining_len() {
+    return len - idx;
+  }
+  template<typename F>
+  really_inline bool with_space_terminated_copy(const F& f) {
+    /**
+    * We need to make a copy to make sure that the string is space terminated.
+    * This is not about padding the input, which should already padded up
+    * to len + SIMDJSON_PADDING. However, we have no control at this stage
+    * on how the padding was done. What if the input string was padded with nulls?
+    * It is quite common for an input string to have an extra null character (C string).
+    * We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+    * document, but the string "9\0" by itself is fine. So we make a copy and
+    * pad the input with spaces when we know that there is just one input element.
+    * This copy is relatively expensive, but it will almost never be called in
+    * practice unless you are in the strange scenario where you have many JSON
+    * documents made of single atoms.
+    */
+    char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
+    if (copy == nullptr) {
+      return true;
+    }
+    memcpy(copy, buf, len);
+    memset(copy + len, ' ', SIMDJSON_PADDING);
+    bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
+    free(copy);
+    return result;
+  }
+  really_inline bool past_end(uint32_t n_structural_indexes) {
+    return next_structural+1 > n_structural_indexes;
+  }
+  really_inline bool at_end(uint32_t n_structural_indexes) {
+    return next_structural+1 == n_structural_indexes;
+  }
+  really_inline size_t next_structural_index() {
+    return next_structural;
+  }
+
+  const uint8_t* const buf;
+  const size_t len;
+  const uint32_t* const structural_indexes;
+  size_t next_structural; // next structural index
+  size_t idx{0}; // location of the structural character in the input (buf)
+  uint8_t c{0};  // used to track the (structural) character we are looking at
+};
+
+} // namespace stage2
--- a/src/generic/stage2/structural_parser.h
+++ b/src/generic/stage2/structural_parser.h
@ -1,17 +1,17 @@
 // This file contains the common code every implementation uses for stage2
 // It is intended to be included multiple times and compiled multiple times
 // We assume the file in which it is include already includes
-// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
+// "simdjson/stage2.h" (this simplifies amalgation)

 namespace stage2 {

+using internal::ret_address;
+
 #ifdef SIMDJSON_USE_COMPUTED_GOTO
-typedef void* ret_address;
 #define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
 #define GOTO(address) { goto *(address); }
 #define CONTINUE(address) { goto *(address); }
-#else
-typedef char ret_address;
+#else // SIMDJSON_USE_COMPUTED_GOTO
 #define INIT_ADDRESSES() { '[', 'a', 'e', 'f', '{', 'o' };
 #define GOTO(address)                 \
  {                                   \
@ -33,7 +33,7 @@ typedef char ret_address;
      case 'f': goto finish;          \
    }                                 \
  }
-#endif
+#endif // SIMDJSON_USE_COMPUTED_GOTO

 struct unified_machine_addresses {
  ret_address array_begin;
@ -47,72 +47,6 @@ struct unified_machine_addresses {
 #undef FAIL_IF
 #define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }

-class structural_iterator {
-public:
-  really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
-    : buf{_buf},
-     len{_len},
-     structural_indexes{_structural_indexes},
-     next_structural{next_structural_index}
-    {}
-  really_inline char advance_char() {
-    idx = structural_indexes[next_structural];
-    next_structural++;
-    c = *current();
-    return c;
-  }
-  really_inline char current_char() {
-    return c;
-  }
-  really_inline const uint8_t* current() {
-    return &buf[idx];
-  }
-  really_inline size_t remaining_len() {
-    return len - idx;
-  }
-  template<typename F>
-  really_inline bool with_space_terminated_copy(const F& f) {
-    /**
-    * We need to make a copy to make sure that the string is space terminated.
-    * This is not about padding the input, which should already padded up
-    * to len + SIMDJSON_PADDING. However, we have no control at this stage
-    * on how the padding was done. What if the input string was padded with nulls?
-    * It is quite common for an input string to have an extra null character (C string).
-    * We do not want to allow 9\0 (where \0 is the null character) inside a JSON
-    * document, but the string "9\0" by itself is fine. So we make a copy and
-    * pad the input with spaces when we know that there is just one input element.
-    * This copy is relatively expensive, but it will almost never be called in
-    * practice unless you are in the strange scenario where you have many JSON
-    * documents made of single atoms.
-    */
-    char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if (copy == nullptr) {
-      return true;
-    }
-    memcpy(copy, buf, len);
-    memset(copy + len, ' ', SIMDJSON_PADDING);
-    bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
-    free(copy);
-    return result;
-  }
-  really_inline bool past_end(uint32_t n_structural_indexes) {
-    return next_structural+1 > n_structural_indexes;
-  }
-  really_inline bool at_end(uint32_t n_structural_indexes) {
-    return next_structural+1 == n_structural_indexes;
-  }
-  really_inline size_t next_structural_index() {
-    return next_structural;
-  }
-
-  const uint8_t* const buf;
-  const size_t len;
-  const uint32_t* const structural_indexes;
-  size_t next_structural; // next structural index
-  size_t idx{0}; // location of the structural character in the input (buf)
-  uint8_t c{0};  // used to track the (structural) character we are looking at
-};
-
 struct number_writer {
  parser &doc_parser;
  
--- a/src/haswell/numberparsing.h
+++ b/src/haswell/numberparsing.h
@ -39,7 +39,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {

 #define SWAR_NUMBER_PARSING

-#include "generic/numberparsing.h"
+#include "generic/stage2/numberparsing.h"

 } // namespace haswell

--- a/src/haswell/stage1_find_marks.h
+++ b/src/haswell/stage1_find_marks.h
@ -1,6 +1,3 @@
-#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
-#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
-
 #include "simdjson.h"

 #include "haswell/bitmask.h"
@ -60,17 +57,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
 }

-#include "generic/buf_block_reader.h"
-#include "generic/json_string_scanner.h"
-#include "generic/json_scanner.h"
+#include "generic/stage1/buf_block_reader.h"
+#include "generic/stage1/json_string_scanner.h"
+#include "generic/stage1/json_scanner.h"

-#include "generic/json_minifier.h"
+#include "generic/stage1/json_minifier.h"
 WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
  return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
 }

-#include "generic/utf8_lookup2_algorithm.h"
-#include "generic/json_structural_indexer.h"
+#include "generic/stage1/utf8_lookup2_algorithm.h"
+#include "generic/stage1/json_structural_indexer.h"
 WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
  return haswell::stage1::json_structural_indexer::index<128>(buf, len, parser, streaming);
 }
@ -79,5 +76,3 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa

 } // namespace simdjson
 UNTARGET_REGION
-
-#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
--- a/src/haswell/stage2.cpp
+++ b/src/haswell/stage2.cpp
@ -0,0 +1,17 @@
+#include "simdjson.h"
+#include "haswell/implementation.h"
+#include "haswell/stringparsing.h"
+#include "haswell/numberparsing.h"
+
+TARGET_HASWELL
+namespace simdjson {
+namespace haswell {
+
+#include "generic/stage2/atomparsing.h"
+#include "generic/stage2/structural_iterator.h"
+#include "generic/stage2/structural_parser.h"
+#include "generic/stage2/streaming_structural_parser.h"
+
+} // namespace haswell
+} // namespace simdjson
+UNTARGET_REGION
--- a/src/haswell/stage2_build_tape.h
+++ b/src/haswell/stage2_build_tape.h
@ -1,21 +0,0 @@
-#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
-#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
-
-#include "simdjson.h"
-#include "haswell/implementation.h"
-#include "haswell/stringparsing.h"
-#include "haswell/numberparsing.h"
-
-TARGET_HASWELL
-namespace simdjson {
-namespace haswell {
-
-#include "generic/atomparsing.h"
-#include "generic/stage2_build_tape.h"
-#include "generic/stage2_streaming_build_tape.h"
-
-} // namespace haswell
-} // namespace simdjson
-UNTARGET_REGION
-
-#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
--- a/src/haswell/stringparsing.h
+++ b/src/haswell/stringparsing.h
@ -41,7 +41,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
  };
 }

-#include "generic/stringparsing.h"
+#include "generic/stage2/stringparsing.h"

 } // namespace haswell
 } // namespace simdjson
--- a/src/jsoncharutils.h
+++ b/src/jsoncharutils.h
@ -3,6 +3,12 @@

 #include "simdjson.h"

+#ifdef JSON_TEST_STRINGS
+void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
+                  const uint8_t *parsed_end);
+void found_bad_string(const uint8_t *buf);
+#endif
+
 namespace simdjson {
 // structural chars here are
 // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
@ -1328,4 +1334,4 @@ const uint64_t mantissa_128[] = {

 } // namespace simdjson

-#endif
+#endif // SIMDJSON_JSONCHARUTILS_H
--- a/src/simdjson.cpp
+++ b/src/simdjson.cpp
@ -5,7 +5,28 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS

 #include "error.cpp"
 #include "implementation.cpp"
-#include "stage1_find_marks.cpp"
-#include "stage2_build_tape.cpp"
+
+// Anything in the top level directory MUST be included outside of the #if statements
+// below, or amalgamation will screw them up!
+#include "isadetection.h"
+#include "jsoncharutils.h"
+#include "simdprune_tables.h"
+
+#if SIMDJSON_IMPLEMENTATION_ARM64
+#include "arm64/stage1.cpp"
+#include "arm64/stage2.cpp"
+#endif
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+#include "fallback/stage1.cpp"
+#include "fallback/stage2.cpp"
+#endif
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+#include "haswell/stage1.cpp"
+#include "haswell/stage2.cpp"
+#endif
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+#include "westmere/stage1.cpp"
+#include "westmere/stage2.cpp"
+#endif

 SIMDJSON_POP_DISABLE_WARNINGS
--- a/src/stage1_find_marks.cpp
+++ b/src/stage1_find_marks.cpp
@ -1,12 +0,0 @@
-#if SIMDJSON_IMPLEMENTATION_ARM64
-#include "arm64/stage1_find_marks.h"
-#endif
-#if SIMDJSON_IMPLEMENTATION_FALLBACK
-#include "fallback/stage1_find_marks.h"
-#endif
-#if SIMDJSON_IMPLEMENTATION_HASWELL
-#include "haswell/stage1_find_marks.h"
-#endif
-#if SIMDJSON_IMPLEMENTATION_WESTMERE
-#include "westmere/stage1_find_marks.h"
-#endif
--- a/src/stage2_build_tape.cpp
+++ b/src/stage2_build_tape.cpp
@ -1,25 +0,0 @@
-#include "simdjson.h"
-#include <cassert>
-#include <cstring>
-#include "jsoncharutils.h"
-
-using namespace simdjson;
-
-#ifdef JSON_TEST_STRINGS
-void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
-                  const uint8_t *parsed_end);
-void found_bad_string(const uint8_t *buf);
-#endif
-
-#if SIMDJSON_IMPLEMENTATION_ARM64
-#include "arm64/stage2_build_tape.h"
-#endif
-#if SIMDJSON_IMPLEMENTATION_FALLBACK
-#include "fallback/stage2_build_tape.h"
-#endif
-#if SIMDJSON_IMPLEMENTATION_HASWELL
-#include "haswell/stage2_build_tape.h"
-#endif
-#if SIMDJSON_IMPLEMENTATION_WESTMERE
-#include "westmere/stage2_build_tape.h"
-#endif
--- a/src/westmere/numberparsing.h
+++ b/src/westmere/numberparsing.h
@ -40,7 +40,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {

 #define SWAR_NUMBER_PARSING

-#include "generic/numberparsing.h"
+#include "generic/stage2/numberparsing.h"

 } // namespace westmere

--- a/src/westmere/stage1_find_marks.h
+++ b/src/westmere/stage1_find_marks.h
@ -1,6 +1,3 @@
-#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
-#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
-
 #include "simdjson.h"
 #include "westmere/bitmask.h"
 #include "westmere/simd.h"
@ -59,17 +56,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
 }

-#include "generic/buf_block_reader.h"
-#include "generic/json_string_scanner.h"
-#include "generic/json_scanner.h"
+#include "generic/stage1/buf_block_reader.h"
+#include "generic/stage1/json_string_scanner.h"
+#include "generic/stage1/json_scanner.h"

-#include "generic/json_minifier.h"
+#include "generic/stage1/json_minifier.h"
 WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
  return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
 }

-#include "generic/utf8_lookup2_algorithm.h"
-#include "generic/json_structural_indexer.h"
+#include "generic/stage1/utf8_lookup2_algorithm.h"
+#include "generic/stage1/json_structural_indexer.h"
 WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
  return westmere::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
 }
@ -78,5 +75,3 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa

 } // namespace simdjson
 UNTARGET_REGION
-
-#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
--- a/src/westmere/stage2.cpp
+++ b/src/westmere/stage2.cpp
@ -0,0 +1,17 @@
+#include "simdjson.h"
+#include "westmere/implementation.h"
+#include "westmere/stringparsing.h"
+#include "westmere/numberparsing.h"
+
+TARGET_WESTMERE
+namespace simdjson {
+namespace westmere {
+
+#include "generic/stage2/atomparsing.h"
+#include "generic/stage2/structural_iterator.h"
+#include "generic/stage2/structural_parser.h"
+#include "generic/stage2/streaming_structural_parser.h"
+
+} // namespace westmere
+} // namespace simdjson
+UNTARGET_REGION
--- a/src/westmere/stage2_build_tape.h
+++ b/src/westmere/stage2_build_tape.h
@ -1,20 +0,0 @@
-#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
-#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
-
-#include "simdjson.h"
-#include "westmere/implementation.h"
-#include "westmere/stringparsing.h"
-#include "westmere/numberparsing.h"
-
-TARGET_WESTMERE
-namespace simdjson {
-namespace westmere {
-
-#include "generic/atomparsing.h"
-#include "generic/stage2_build_tape.h"
-#include "generic/stage2_streaming_build_tape.h"
-
-} // namespace westmere
-} // namespace simdjson
-UNTARGET_REGION
-#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
--- a/src/westmere/stringparsing.h
+++ b/src/westmere/stringparsing.h
@ -43,7 +43,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
  };
 }

-#include "generic/stringparsing.h"
+#include "generic/stage2/stringparsing.h"

 } // namespace westmere
 } // namespace simdjson