diff --git a/Makefile b/Makefile
index 2ccb4ef8..72b976eb 100644
--- a/Makefile
+++ b/Makefile
@@ -8,11 +8,11 @@
 
 CXXFLAGS =  -std=c++11 -g2 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude  -Ibenchmark/linux -Idependencies/double-conversion -Idependencies/rapidjson/include -Ldependencies/double-conversion/release
 LIBFLAGS = -ldouble-conversion
-EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition parseunified 
+EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition
 DOUBLEEXECUTABLES=parsedouble jsoncheckdouble parsingcompetitiondouble
 
-HEADERS=include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage3_ape_machine.h include/jsonparser/stage4_shovel_machine.h include/jsonparser/stage34_unified.h
-LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp     src/stage2_flatten.cpp        src/stage3_ape_machine.cpp    src/stage4_shovel_machine.cpp src/stage34_unified.cpp
+HEADERS=include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage34_unified.h
+LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp     src/stage2_flatten.cpp        src/stage34_unified.cpp
 MINIFIERHEADERS=include/jsonparser/jsonminifier.h include/jsonparser/simdprune_tables.h
 MINIFIERLIBFILES=src/jsonminifier.cpp
 
@@ -39,9 +39,6 @@ bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
 	$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude  -march=native -lm -Wall -Wextra -Wno-narrowing
 
 
-parseunified: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
-	$(CXX) $(CXXFLAGS) -o parseunified $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS) -DTEST_UNIFIED
-
 parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
 
diff --git a/benchmark/parse.cpp b/benchmark/parse.cpp
index 2a8aeb94..22dc0968 100644
--- a/benchmark/parse.cpp
+++ b/benchmark/parse.cpp
@@ -22,8 +22,6 @@
 #include <vector>
 #include <x86intrin.h>
 
-//#define TEST_UNIFIED
-
 /// Fixme: enable doube conv
 // #define DOUBLECONV
 #ifdef DOUBLECONV
@@ -39,8 +37,6 @@ using namespace double_conversion;
 #include "jsonparser/simdjson_internal.h"
 #include "jsonparser/stage1_find_marks.h"
 #include "jsonparser/stage2_flatten.h"
-#include "jsonparser/stage3_ape_machine.h"
-#include "jsonparser/stage4_shovel_machine.h"
 #include "jsonparser/stage34_unified.h"
 using namespace std;
 
@@ -129,7 +125,6 @@ int main(int argc, char *argv[]) {
     cerr << "Currently only support JSON files < 16MB\n";
     exit(1);
   }
-  init_state_machine();
 
   pj.n_structural_indexes = 0;
   // we have potentially 1 structure per byte of input
@@ -159,8 +154,8 @@ int main(int argc, char *argv[]) {
   LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
   vector<u64> results;
   results.resize(evts.size());
-  unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
-  unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
+  unsigned long cy1 = 0, cy2 = 0, cy3 = 0;
+  unsigned long cl1 = 0, cl2 = 0, cl3 = 0;
 #endif
   bool isok = true;
   for (u32 i = 0; i < iterations; i++) {
@@ -191,31 +186,6 @@ int main(int argc, char *argv[]) {
     unified.start();
 #endif
 
-#ifndef TEST_UNIFIED
-
-    isok = ape_machine(p.first, p.second, pj);
-#ifndef SQUASH_COUNTERS
-    unified.end(results);
-    cy3 += results[0];
-    cl3 += results[1];
-    if (!isok) {
-      cout << "Failed out during stage 3\n";
-      break;
-    }
-    unified.start();
-#endif
-    isok = shovel_machine(p.first, p.second, pj);
-#ifndef SQUASH_COUNTERS
-    unified.end(results);
-    cy4 += results[0];
-    cl4 += results[1];
-#endif
-    if (!isok) {
-      cout << "Failed out during stage 4\n";
-      break;
-    }
-#else
-
     isok = unified_machine(p.first, p.second, pj);
 #ifndef SQUASH_COUNTERS
     unified.end(results);
@@ -227,7 +197,6 @@ int main(int argc, char *argv[]) {
     }
 #endif
 
-#endif
     auto end = std::chrono::steady_clock::now();
     std::chrono::duration<double> secs = end - start;
     res[i] = secs.count();
@@ -237,7 +206,7 @@ int main(int argc, char *argv[]) {
   printf("number of bytes %ld number of structural chars %d ratio %.3f\n",
          p.second, pj.n_structural_indexes,
          (double)pj.n_structural_indexes / p.second);
-  unsigned long total = cy1 + cy2 + cy3 + cy4;
+  unsigned long total = cy1 + cy2 + cy3;
 
   printf(
       "stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
@@ -261,14 +230,6 @@ int main(int argc, char *argv[]) {
   printf("%.2f cycles per structural character.\n",
          (double)cy3 / (iterations * pj.n_structural_indexes));
 
-  printf(
-      "stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
-      cl4, cy4, 100. * cy4 / total, (double)cl4 / cy4);
-  printf(" stage 4 runs at %.2f cycles per input byte and ",
-         (double)cy4 / (iterations * p.second));
-  printf("%.2f cycles per structural character.\n",
-         (double)cy4 / (iterations * pj.n_structural_indexes));
-
   printf(" all stages: %.2f cycles per input byte.\n",
          (double)total / (iterations * p.second));
 #endif
diff --git a/benchmark/parsingcompetition.cpp b/benchmark/parsingcompetition.cpp
index ba4dad71..dca069e3 100644
--- a/benchmark/parsingcompetition.cpp
+++ b/benchmark/parsingcompetition.cpp
@@ -46,7 +46,6 @@ int main(int argc, char *argv[]) {
   int repeat = 10;
   int volume = p.second;
   BEST_TIME(json_parse(p.first, p.second, pj), true, , repeat, volume, true);
-  BEST_TIME(json_parse_4stages(p.first, p.second, pj), true, , repeat, volume, true);
 
   rapidjson::Document d;
 
diff --git a/include/jsonparser/jsonparser.h b/include/jsonparser/jsonparser.h
index 33d5d0db..6b138e24 100644
--- a/include/jsonparser/jsonparser.h
+++ b/include/jsonparser/jsonparser.h
@@ -5,8 +5,6 @@
 #include "simdjson_internal.h"
 #include "stage1_find_marks.h"
 #include "stage2_flatten.h"
-#include "stage3_ape_machine.h"
-#include "stage4_shovel_machine.h"
 #include "stage34_unified.h"
 
 // Allocate a ParsedJson structure that can support document
@@ -22,6 +20,3 @@ void deallocate_ParsedJson(ParsedJson *pj_ptr);
 // Parse a document found in buf, need to preallocate ParsedJson.
 // Return false in case of a failure.
 bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);
-
-// like json_parse but users 4 stages, slower.
-bool json_parse_4stages(const u8 *buf, size_t len, ParsedJson &pj);
diff --git a/include/jsonparser/stage3_ape_machine.h b/include/jsonparser/stage3_ape_machine.h
deleted file mode 100644
index 4afd1a0b..00000000
--- a/include/jsonparser/stage3_ape_machine.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#pragma once
-
-#include "common_defs.h"
-#include "simdjson_internal.h"
-
-void init_state_machine();
-bool ape_machine(const u8 *buf, size_t len, ParsedJson &pj);
diff --git a/include/jsonparser/stage4_shovel_machine.h b/include/jsonparser/stage4_shovel_machine.h
deleted file mode 100644
index 44132f1d..00000000
--- a/include/jsonparser/stage4_shovel_machine.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#include "common_defs.h"
-#include "simdjson_internal.h"
-
-bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj);
diff --git a/src/jsonparser.cpp b/src/jsonparser.cpp
index def9f436..9ad4daea 100644
--- a/src/jsonparser.cpp
+++ b/src/jsonparser.cpp
@@ -44,27 +44,6 @@ void deallocate_ParsedJson(ParsedJson *pj_ptr) {
   delete pj_ptr;
 }
 
-// parse a document found in buf, need to preallocate ParsedJson. 
-// this can probably be considered a legacy function at this point.
-bool json_parse_4stages(const u8 *buf, size_t len, ParsedJson &pj) {
-  if (pj.bytecapacity < len) {
-    std::cerr << "Your ParsedJson cannot support documents that big: " << len
-              << std::endl;
-    return false;
-  }
-  bool isok = find_structural_bits(buf, len, pj);
-  if (isok) {
-    isok = flatten_indexes(len, pj);
-  }
-  if (isok) {
-    isok = ape_machine(buf, len, pj);
-  }
-  if (isok) {
-    isok = shovel_machine(buf, len, pj);
-  }
-  return isok;
-}
-
 // parse a document found in buf, need to preallocate ParsedJson.
 bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
   if (pj.bytecapacity < len) {
diff --git a/src/stage3_ape_machine.cpp b/src/stage3_ape_machine.cpp
deleted file mode 100644
index 592e71ed..00000000
--- a/src/stage3_ape_machine.cpp
+++ /dev/null
@@ -1,338 +0,0 @@
-#ifdef _MSC_VER
-/* Microsoft C/C++-compatible compiler */
-#include <intrin.h>
-#else
-#include <immintrin.h>
-#include <x86intrin.h>
-#endif
-
-#include <cassert>
-#include <cstring>
-
-#include "jsonparser/common_defs.h"
-#include "jsonparser/simdjson_internal.h"
-
-// the ape machine consists of two parts:
-//
-// 1) The "state machine", which is a multiple channel per-level state machine
-//    It is a conventional DFA except in that it 'changes track' on {}[]
-//    characters
-//
-// 2) The "tape machine": this records offsets of various structures as they go
-// by
-//    These structures are either u32 offsets of other tapes or u32 offsets into
-//    our input or structures.
-//
-// The state machine doesn't record ouput.
-// The tape machine doesn't validate.
-//
-// The output of the tape machine is meaningful only if the state machine is in
-// non-error states.
-
-// depth adjustment is strictly based on whether we are {[ or }]
-
-// depth adjustment is a pre-increment which, in effect, means that a {[
-// contained in an object is in the level one deeper, while the corresponding }]
-// is at the level
-
-// TAPE MACHINE DEFINITIONS
-
-const u32 DEPTH_PLUS_ONE = 0x01000000;
-const u32 DEPTH_ZERO = 0x00000000;
-const u32 DEPTH_MINUS_ONE = 0xff000000;
-const u32 WRITE_ZERO = 0x0;
-const u32 WRITE_FOUR = 0x1;
-
-const u32 CDF = DEPTH_ZERO | WRITE_ZERO; // default 'control'
-const u32 C04 = DEPTH_ZERO | WRITE_FOUR;
-const u32 CP4 = DEPTH_PLUS_ONE | WRITE_FOUR;
-const u32 CM4 = DEPTH_MINUS_ONE | WRITE_FOUR;
-
-inline s8 get_depth_adjust(u32 control) { return (s8)(((s32)control) >> 24); }
-inline size_t get_write_size(u32 control) { return control & 0xff; }
-
-const u32 char_control[256] = {
-    // nothing interesting from 0x00-0x20
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF,
-
-    // " is 0x22, - is 0x2d
-    CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF,
-    CDF,
-
-    // numbers are 0x30-0x39
-    C04, C04, C04, C04, C04, C04, C04, C04, C04, C04, CDF, CDF, CDF, CDF, CDF,
-    CDF,
-
-    // nothing interesting from 0x40-0x49
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF,
-
-    // 0x5b/5d are []
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
-    CDF,
-
-    // f is 0x66 n is 0x6e
-    CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04,
-    CDF,
-
-    // 0x7b/7d are {}, 74 is t
-    CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
-    CDF,
-
-    // nothing interesting from 0x80-0xff
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
-    CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF};
-
-// all of this stuff needs to get moved somewhere reasonable
-// like our ParsedJson structure
-/*
-u64 tape[MAX_TAPE];
-u32 tape_locs[MAX_DEPTH];
-u8 string_buf[512*1024];
-u8 * current_string_buf_loc;
-u8 number_buf[512*1024]; // holds either doubles or longs, really
-u8 * current_number_buf_loc;
-*/
-
-// STATE MACHINE DECLARATIONS
-const u32 MAX_STATES = 16;
-
-/**
- * It is annoying to have to call init_state_machine each time.
- * Better to precompute the (small) result into a header file.
- */
-// u32 trans[MAX_STATES][256];
-#include "jsonparser/transitions.h"
-
-u32 states[MAX_DEPTH];
-const int START_STATE = 1;
-
-u32 valid_end_states[MAX_STATES] = {
-    0, // 0 state is by definition an error
-    1, // ok to still be in start state
-    1, // state 2: we've seen an { - if we left this level it's ok
-    0, // state 3 is abolished, we shouldn't be in it
-
-    0, // state 4 means we saw a string inside an object. We can't end like
-       // this!
-    0, // similarly state 5 means we saw a string followed by a colon.
-    0, // state 6 is abolished
-    1, // it's ok to finish on 7
-
-    0, // state 8 we've seen a comma inside an object - can't finish here
-    1, // state 9 is like state 2 only for arrays, so ok
-    0, // state 10 abolished
-    1, // state 11 is ok to finish on, we just saw a unary inside a array
-
-    0, // state 12 we've just seen a comma inside an array - can't finish
-    0, // state 13 is our weird start state. I think we shouldn't end on it as
-       // we need to see something
-    1, // state 14 is ok. Its an error to see something *more* here but not to
-       // be in this state
-    0, // we don't use state 15
-};
-
-// weird sub-machine for starting depth only
-// we start at 13 and go to 14 on a single UNARY
-// 14 doesn't have to have any transitions. Anything
-// else arrives after the single thing it's an error
-const int START_DEPTH_START_STATE = 13;
-
-// ANYTHING_IS_ERROR_STATE is useful both as a target
-// for a transition at the start depth and also as
-// a good initial value for "red line" depths; that
-// is, depths that are maintained strictly to avoid
-// undefined behavior (e.g. depths below the starting
-// depth).
-const int ANYTHING_IS_ERROR_STATE = 14;
-
-void init_state_machine() {
-  // states 10 and 6 eliminated
-
-  trans[1][(int)'{'] = 2;
-  trans[2][(int)'"'] = 4;
-  trans[4][(int)':'] = 5;
-  // 5->7 on all values ftn0123456789-"
-  trans[7][(int)','] = 8;
-  trans[8][(int)'"'] = 4;
-
-  trans[1][(int)'['] = 9;
-  // 9->11 on all values ftn0123456789-"
-  trans[11][(int)','] = 12;
-  // 12->11 on all values ftn0123456789-"
-
-  const char *UNARIES = "}]ftn0123456789-\"";
-  for (u32 i = 0; i < strlen(UNARIES); i++) {
-    trans[5][(u32)UNARIES[i]] = 7;
-    trans[9][(u32)UNARIES[i]] = 11;
-    trans[12][(u32)UNARIES[i]] = 11;
-#ifdef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
-    // NOTE: if we permit JSON documents that
-    // contain a single number or string, then we
-    // allow all the unaries at the top level
-    trans[13][(u32)UNARIES[i]] = 14;
-#endif
-  }
-
-#ifndef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
-  // NOTE: if we don't permit JSON documents that
-  // that contain a single number or string, we must
-  // make sure we accept the top-level closing braces
-  // that are delivered to the start depth only
-  trans[13][(int)'}'] = 14;
-  trans[13][(int)']'] = 14;
-#endif
-
-  // back transitions when new things are open
-  trans[2][(int)'{'] = 2;
-  trans[7][(int)'{'] = 2;
-  trans[9][(int)'{'] = 2;
-  trans[11][(int)'{'] = 2;
-  trans[2][(int)'['] = 9;
-  trans[7][(int)'['] = 9;
-  trans[9][(int)'['] = 9;
-  trans[11][(int)'['] = 9;
-}
-
-bool ape_machine(const u8 *buf, UNUSED size_t len, ParsedJson &pj) {
-
-  // NOTE - our depth is used by both the tape machine and the state machine
-  // Further, in production we will set it to a largish value in a generous
-  // buffer as a rogue input could consist of many {[ characters or many }]
-  // characters. We aren't busily checking errors (and in fact, a aggressive
-  // sequence of [ characters is actually valid input!) so something that blows
-  // out maximum depth will need to be periodically checked for, as will
-  // something that tries to set depth very low. If we set our starting depth,
-  // say, to 256, we can tolerate 256 bogus close brace characters without
-  // aggressively going wrong and writing to bad memory Note that any specious
-  // depth can have a specious tape associated with and all these specious
-  // depths can share a region of the tape - it's harmless. Since tape is
-  // one-way, any movement in a specious tape is an error (so we can detect
-  // max_depth violations by making sure that specious tape locations haven't
-  // moved from their starting values)
-
-  u32 depth = START_DEPTH;
-
-  for (u32 i = 0; i < MAX_DEPTH; i++) {
-    pj.tape_locs[i] = i * MAX_TAPE_ENTRIES;
-    if (i == START_DEPTH) {
-      states[i] = START_DEPTH_START_STATE;
-    } else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
-      states[i] = ANYTHING_IS_ERROR_STATE;
-    } else {
-      states[i] = START_STATE;
-    }
-  }
-
-  pj.current_string_buf_loc = pj.string_buf;
-  pj.current_number_buf_loc = pj.number_buf;
-
-  u32 error_sump = 0;
-  u32 old_tape_loc = pj.tape_locs[depth]; // need to initialize for first write
-
-  u32 next_idx = pj.structural_indexes[0];
-  u8 next_c = buf[next_idx];
-  u32 next_control = char_control[next_c];
-
-  for (u32 i = 0; i < pj.n_structural_indexes; i++) {
-
-    // very periodic safety checking. This does NOT guarantee that we
-    // haven't been in our dangerous zones above or below our normal
-    // depths. It ONLY checks to be sure that we don't manage to leave
-    // these zones and write completely off our tape.
-    if (!(i % DEPTH_SAFETY_MARGIN)) {
-      if (depth < START_DEPTH || depth >= REDLINE_DEPTH) {
-        error_sump |= 1;
-        break;
-      }
-    }
-
-    u32 idx = next_idx;
-    u8 c = next_c;
-    u32 control = next_control;
-
-    next_idx = pj.structural_indexes[i + 1];
-    next_c = buf[next_idx];
-    next_control = char_control[next_c];
-
-    // TAPE MACHINE
-    s8 depth_adjust = get_depth_adjust(control);
-    u8 write_size = get_write_size(control);
-    u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx;
-    depth += depth_adjust;
-#ifdef DEBUG
-    cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
-    cout << "TAPE MACHINE: depth change " << (s32)depth_adjust << " write_size "
-         << (u32)write_size << " current_depth: " << depth << "\n";
-#endif
-
-    // STATE MACHINE - hoisted here to fill in during the tape machine's
-    // latencies
-#ifdef DEBUG
-    cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
-#endif
-    states[depth] = trans[states[depth]][c];
-#ifdef DEBUG
-    cout << "post " << states[depth] << "\n";
-#endif
-    // TAPE MACHINE, again
-    pj.tape[pj.tape_locs[depth]] = write_val | (((u64)c) << 56);
-    old_tape_loc = pj.tape_locs[depth] += write_size;
-  }
-
-  if (depth != START_DEPTH) {
-    // We haven't returned to our start depth, so our braces can't possibly
-    // match Note this doesn't exclude the possibility that we have improperly
-    // matched { } or [] pairs
-    return false;
-  }
-
-  for (u32 i = 0; i < MAX_DEPTH; i++) {
-    if (!valid_end_states[states[i]]) {
-#ifdef DEBUG
-      printf("Invalid ending state: states[%d] == %d\n", states[i]);
-#endif
-      return false;
-    }
-  }
-
-#define DUMP_TAPES
-#ifdef DEBUG
-  for (u32 i = 0; i < MAX_DEPTH; i++) {
-    u32 start_loc = i * MAX_TAPE_ENTRIES;
-    cout << " tape section i " << i;
-    if (i == START_DEPTH) {
-      cout << "   (START) ";
-    } else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
-      cout << " (REDLINE) ";
-    } else {
-      cout << "  (NORMAL) ";
-    }
-
-    cout << " from: " << start_loc << " to: " << tape_locs[i] << " "
-         << " size: " << (tape_locs[i] - start_loc) << "\n";
-    cout << " state: " << states[i] << "\n";
-#ifdef DUMP_TAPES
-    for (u32 j = start_loc; j < tape_locs[i]; j++) {
-      if (tape[j]) {
-        cout << "j: " << j << " tape[j] char " << (char)(tape[j] >> 56)
-             << " tape[j][0..55]: " << (tape[j] & 0xffffffffffffffULL) << "\n";
-      }
-    }
-#endif
-  }
-#endif
-  if (error_sump) {
-    return false;
-  }
-  return true;
-}
diff --git a/src/stage4_shovel_machine.cpp b/src/stage4_shovel_machine.cpp
deleted file mode 100644
index 2048718a..00000000
--- a/src/stage4_shovel_machine.cpp
+++ /dev/null
@@ -1,654 +0,0 @@
-#ifdef _MSC_VER
-/* Microsoft C/C++-compatible compiler */
-#include <intrin.h>
-#else
-#include <immintrin.h>
-#include <x86intrin.h>
-#endif
-
-#include <cassert>
-#include <cstring>
-
-#include "jsonparser/common_defs.h"
-#include "jsonparser/simdjson_internal.h"
-
-// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
-// these go into the first 3 buckets of the comparison (1/2/4)
-
-// we are also interested in the four whitespace characters
-// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
-
-const u32 structural_or_whitespace_negated[256] = {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
-
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
-
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
-
-// return non-zero if not a structural or whitespace char
-// zero otherwise
-really_inline u32 is_not_structural_or_whitespace(u8 c) {
-  return structural_or_whitespace_negated[c];
-}
-
-// These chars yield themselves: " \ /
-// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
-// u not handled in this table as it's complex
-const u8 escape_map[256] = {
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
-    0, 0, 0x08, 0, 0,    0, 0x12, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
-    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
-
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
-};
-
-const u32 leading_zeros_to_utf_bytes[33] = {
-    1, 1, 1, 1, 1, 1, 1, 1,           // 7 bits for first one
-    2, 2, 2, 2,                       // 11 bits for next
-    3, 3, 3, 3, 3,                    // 16 bits for next
-    4, 4, 4, 4, 4,                    // 21 bits for next
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // error
-
-const u32 UTF_PDEP_MASK[5] = {0x00, // error
-                              0x7f, 0x1f3f, 0x0f3f3f, 0x073f3f3f};
-
-const u32 UTF_OR_MASK[5] = {0x00, // error
-                            0x00, 0xc080, 0xe08080, 0xf0808080};
-
-bool is_hex_digit(u8 v) {
-  if (v >= '0' && v <= '9')
-    return true;
-  v &= 0xdf;
-  if (v >= 'A' && v <= 'F')
-    return true;
-  return false;
-}
-
-u8 digit_to_val(u8 v) {
-  if (v >= '0' && v <= '9')
-    return v - '0';
-  v &= 0xdf;
-  return v - 'A' + 10;
-}
-
-bool hex_to_u32(const u8 *src, u32 *res) {
-  u8 v1 = src[0];
-  u8 v2 = src[1];
-  u8 v3 = src[2];
-  u8 v4 = src[3];
-  if (!is_hex_digit(v1) || !is_hex_digit(v2) || !is_hex_digit(v3) ||
-      !is_hex_digit(v4)) {
-    return false;
-  }
-  *res = digit_to_val(v1) << 24 | digit_to_val(v2) << 16 |
-         digit_to_val(v3) << 8 | digit_to_val(v4);
-  return true;
-}
-
-// handle a unicode codepoint
-// write appropriate values into dest
-// src will always advance 6 bytes
-// dest will advance a variable amount (return via pointer)
-// return true if the unicode codepoint was valid
-// We work in little-endian then swap at write time
-really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
-  u32 code_point = 0; // read the hex, potentially reading another \u beyond if
-                      // it's a // wacky one
-  if (!hex_to_u32(*src_ptr + 2, &code_point)) {
-    return false;
-  }
-  *src_ptr += 6;
-  // check for the weirdo double-UTF-16 nonsense for things outside Basic
-  // Multilingual Plane.
-  if (code_point >= 0xd800 && code_point < 0xdc00) {
-    // TODO: sanity check and clean up; snippeted from RapidJSON and poorly
-    // understood at the moment
-    if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') {
-      return false;
-    }
-    u32 code_point_2 = 0;
-    if (!hex_to_u32(*src_ptr + 2, &code_point_2)) {
-      return false;
-    }
-    if (code_point_2 < 0xdc00 || code_point_2 > 0xdfff) {
-      return false;
-    }
-    code_point =
-        (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
-    *src_ptr += 6;
-  }
-  // TODO: check to see whether the below code is nonsense (it's really only a
-  // sketch at this point)
-  u32 lz = __builtin_clz(code_point);
-  u32 utf_bytes = leading_zeros_to_utf_bytes[lz];
-  u32 tmp =
-      _pdep_u32(code_point, UTF_PDEP_MASK[utf_bytes]) | UTF_OR_MASK[utf_bytes];
-  // swap and move to the other side of the register
-  tmp = __builtin_bswap32(tmp);
-  tmp >>= ((4 - utf_bytes) * 8) & 31; // if utf_bytes, this could become a shift
-                                      // by 32, hence the mask with 31
-  // use memcpy to avoid undefined behavior:
-  std::memcpy(*(u32 **)dst_ptr, &tmp, sizeof(u32)); //**(u32 **)dst_ptr = tmp;
-  *dst_ptr += utf_bytes;
-  return true;
-}
-
-really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
-                                ParsedJson &pj, u32 tape_loc) {
-  u32 offset = pj.tape[tape_loc] & 0xffffff;
-  const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
-  u8 *dst = pj.current_string_buf_loc;
-#ifdef DEBUG
-  cout << "Entering parse string with offset " << offset << "\n";
-#endif
-  // basic non-sexy parsing code
-  while (1) {
-#ifdef DEBUG
-    for (u32 j = 0; j < 32; j++) {
-      char c = *(src + j);
-      if (isprint(c)) {
-        cout << c;
-      } else {
-        cout << '_';
-      }
-    }
-    cout << "|  ... string handling input\n";
-#endif
-    m256 v = _mm256_loadu_si256((const m256 *)(src));
-    u32 bs_bits =
-        (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
-    dumpbits32(bs_bits, "backslash bits 2");
-    u32 quote_bits =
-        (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
-    dumpbits32(quote_bits, "quote_bits");
-    u32 quote_dist = __builtin_ctz(quote_bits);
-    u32 bs_dist = __builtin_ctz(bs_bits);
-    // store to dest unconditionally - we can overwrite the bits we don't like
-    // later
-    _mm256_storeu_si256((m256 *)(dst), v);
-#ifdef DEBUG
-    cout << "quote dist: " << quote_dist << " bs dist: " << bs_dist << "\n";
-#endif
-
-    if (quote_dist < bs_dist) {
-#ifdef DEBUG
-      cout << "Found end, leaving!\n";
-#endif
-      // we encountered quotes first. Move dst to point to quotes and exit
-      dst[quote_dist] = 0; // null terminate and get out
-      pj.current_string_buf_loc = dst + quote_dist + 1;
-      pj.tape[tape_loc] =
-          ((u32)'"') << 24 |
-          (pj.current_string_buf_loc -
-           pj.string_buf); // assume 2^24 will hold all strings for now
-      return true;
-    } else if (quote_dist > bs_dist) {
-      u8 escape_char = src[bs_dist + 1];
-#ifdef DEBUG
-      cout << "Found escape char: " << escape_char << "\n";
-#endif
-      // we encountered backslash first. Handle backslash
-      if (escape_char == 'u') {
-        // move src/dst up to the start; they will be further adjusted
-        // within the unicode codepoint handling code.
-        src += bs_dist;
-        dst += bs_dist;
-        if (!handle_unicode_codepoint(&src, &dst)) {
-          return false;
-        }
-        return true;
-      } else {
-        // simple 1:1 conversion. Will eat bs_dist+2 characters in input and
-        // write bs_dist+1 characters to output
-        // note this may reach beyond the part of the buffer we've actually
-        // seen. I think this is ok
-        u8 escape_result = escape_map[escape_char];
-        if (!escape_result)
-          return false; // bogus escape value is an error
-        dst[bs_dist] = escape_result;
-        src += bs_dist + 2;
-        dst += bs_dist + 1;
-      }
-    } else {
-      // they are the same. Since they can't co-occur, it means we encountered
-      // neither.
-      src += 32;
-      dst += 32;
-    }
-    return true;
-  }
-  // later extensions -
-  // if \\ we could detect whether it's a substantial run of \ or just eat 2
-  // chars and write 1 handle anything short of \u or \\\ (as a prefix) with
-  // clever PSHUFB stuff and don't leave SIMD
-  return true;
-}
-
-#ifdef DOUBLECONV
-#include "double-conversion/double-conversion.h"
-#include "double-conversion/ieee.h"
-using namespace double_conversion;
-static StringToDoubleConverter
-    converter(StringToDoubleConverter::ALLOW_TRAILING_JUNK, 2000000.0,
-              Double::NaN(), NULL, NULL);
-#endif
-
-
-// does not validation whatsoever, assumes that all digit
-// this is CS 101
-u64 naivestrtoll(const char *p, const char *end) {
-    if(p == end) return 0; // should be an error?
-    // this code could get a whole lot smarter if we have many long ints:
-    // e.g., see http://0x80.pl/articles/simd-parsing-int-sequences.html
-    u64 x = *p - '0';
-    p++;
-    for(;p < end;p++) {
-      x = (x*10) + (*p - '0');
-    }
-    return x;
-}
-// put a parsed version of number (either as a double or a signed long) into the
-// number buffer, put a 'tag' indicating which type and where it is back onto
-// the tape at that location return false if we can't parse the number which
-// means either (a) the number isn't valid, or (b) the number is followed by
-// something that isn't whitespace, comma or a close }] character which are the
-// only things that should follow a number at this stage bools to detect what we
-// found in our initial character already here - we are already switching on 0
-// vs 1-9 vs - so we may as well keep separate paths where that's useful
-
-// TODO: see if we really need a separate number_buf or whether we should just
-//       have a generic scratch - would need to align before using for this
-really_inline bool parse_number(const u8 *buf, UNUSED size_t len,
-                                UNUSED ParsedJson &pj, u32 tape_loc,
-                                UNUSED bool found_zero, bool found_minus) {
-  u32 offset = pj.tape[tape_loc] & 0xffffff;
-////////////////
-// This is temporary... but it illustrates how one could use Google's double
-// conv.
-///
-#ifdef DOUBLECONV
-  // Maybe surprisingly, StringToDouble does not parse according to the JSON
-  // spec (e.g., it will happily parse 012 as 12).
-  int processed_characters_count;
-  double result_double_conv = converter.StringToDouble(
-      (const char *)(buf + offset), 10, &processed_characters_count);
-  *((double *)pj.current_number_buf_loc) = result_double_conv;
-  pj.tape[tape_loc] =
-        ((u32)'d') << 24 |
-        (pj.current_number_buf_loc -
-         pj.number_buf); // assume 2^24 will hold all numbers for now
-  pj.current_number_buf_loc += 8;
-  return result_double_conv == result_double_conv;
-#endif
-  ////////////////
-  // end of double conv temporary stuff.
-  ////////////////
-  if (found_minus) {
-    offset++;
-  }
-  const u8 *src = &buf[offset];
-  m256 v = _mm256_loadu_si256((const m256 *)(src));
-  u64 error_sump = 0;
-#ifdef DEBUG
-  for (u32 j = 0; j < 32; j++) {
-    char c = *(src + j);
-    if (isprint(c)) {
-      cout << c;
-    } else {
-      cout << '_';
-    }
-  }
-  cout << "|  ... number handling input\n";
-#endif
-
-  // categories to extract
-  // Digits:
-  // 0 (0x30) - bucket 0
-  // 1-9 (never any distinction except if we didn't get the free kick at 0 due
-  // to the leading minus) (0x31-0x39) - bucket 1
-  // . (0x2e) - bucket 2
-  // E or e - no distinction (0x45/0x65) - bucket 3
-  // + (0x2b) - bucket 4
-  // - (0x2d) - bucket 4
-  // Terminators
-  // Whitespace: 0x20, 0x09, 0x0a, 0x0d - bucket 5+6
-  // Comma and the closes: 0x2c is comma, } is 0x5d, ] is 0x7d - bucket 5+7
-
-  // Another shufti - also a bit hand-hacked. Need to make a better construction
-  const m256 low_nibble_mask = _mm256_setr_epi8(
-      //  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f
-      33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0, 33, 2, 2, 2, 2,
-      10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0);
-  const m256 high_nibble_mask = _mm256_setr_epi8(
-      //  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f
-      64, 0, 52, 3, 8, -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 52, 3, 8,
-      -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0);
-
-  m256 tmp = _mm256_and_si256(
-      _mm256_shuffle_epi8(low_nibble_mask, v),
-      _mm256_shuffle_epi8(
-          high_nibble_mask,
-          _mm256_and_si256(_mm256_srli_epi32(v, 4), _mm256_set1_epi8(0x7f))));
-#ifdef DEBUG
-  // let us print out the magic:
-  uint8_t buffer[32];
-  _mm256_storeu_si256((__m256i *)buffer,tmp);
-  for(int k = 0; k < 32; k++)
-  printf("%.2x ",buffer[k]);
-  printf("\n");
-#endif
-  m256 enders_mask = _mm256_set1_epi8(0xe0);
-  m256 tmp_enders = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, enders_mask),
-                                      _mm256_set1_epi8(0));
-  u32 enders = ~(u32)_mm256_movemask_epi8(tmp_enders);
-  dumpbits32(enders, "ender characters");
-//dumpbits32_always(enders, "ender characters");
-
-  if (enders == 0) {
-    error_sump = 1;
-    //  if enders == 0  we have
-    // a heroically long number string or some garbage
-  }
-  // TODO: make a mask that indicates where our digits are // DANIEL: Isn't that digit_characters?
-  u32 number_mask = ~enders & (enders - 1);
-  dumpbits32(number_mask, "number mask");
-//dumpbits32_always(number_mask, "number mask");
-  m256 n_mask = _mm256_set1_epi8(0x1f);
-  m256 tmp_n =
-      _mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask), _mm256_set1_epi8(0));
-  u32 number_characters = ~(u32)_mm256_movemask_epi8(tmp_n);
-
-  // put something into our error sump if we have something
-  // before our ending characters that isn't a valid character
-  // for the inside of our JSON
-  number_characters &= number_mask;
-  error_sump |= number_characters ^ number_mask;
-  dumpbits32(number_characters, "number characters");
-
-  m256 d_mask = _mm256_set1_epi8(0x03);
-  m256 tmp_d =
-      _mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask), _mm256_set1_epi8(0));
-  u32 digit_characters = ~(u32)_mm256_movemask_epi8(tmp_d);
-  digit_characters &= number_mask;
-  dumpbits32(digit_characters, "digit characters");
-  //  dumpbits32_always(digit_characters, "digit characters");
-
-
-  m256 p_mask = _mm256_set1_epi8(0x04);
-  m256 tmp_p =
-      _mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask), _mm256_set1_epi8(0));
-  u32 decimal_characters = ~(u32)_mm256_movemask_epi8(tmp_p);
-  decimal_characters &= number_mask;
-  dumpbits32(decimal_characters, "decimal characters");
-
-  m256 e_mask = _mm256_set1_epi8(0x08);
-  m256 tmp_e =
-      _mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask), _mm256_set1_epi8(0));
-  u32 exponent_characters = ~(u32)_mm256_movemask_epi8(tmp_e);
-  exponent_characters &= number_mask;
-  dumpbits32(exponent_characters, "exponent characters");
-
-
-  m256 zero_mask = _mm256_set1_epi8(0x1);
-  m256 tmp_zero =
-      _mm256_cmpeq_epi8(tmp, zero_mask);
-  u32 zero_characters = (u32)_mm256_movemask_epi8(tmp_zero);
-  dumpbits32(zero_characters, "zero characters");
-
-  // if the  zero character is in first position, it
-  // needs to be followed by decimal or exponent or ender (note: we
-  // handle found_minus separately)
-  u32 expo_or_decimal_or_ender = exponent_characters | decimal_characters | enders;
-  error_sump |= zero_characters & 0x01 & (~(expo_or_decimal_or_ender >> 1));
-
-  m256 s_mask = _mm256_set1_epi8(0x10);
-  m256 tmp_s =
-      _mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask), _mm256_set1_epi8(0));
-  u32 sign_characters = ~(u32)_mm256_movemask_epi8(tmp_s);
-  sign_characters &= number_mask;
-  dumpbits32(sign_characters, "sign characters");
-
-  u32 digit_edges = ~(digit_characters << 1) & digit_characters;
-  dumpbits32(digit_edges, "digit_edges");
-
-  // check that we have 1-3 'edges' only
-  u32 t = digit_edges;
-  t &= t - 1;
-  t &= t - 1;
-  t &= t - 1;
-  error_sump |= t;
-
-  // check that we start with a digit
-  error_sump |= ~digit_characters & 0x1;
-
-  // having done some checks, get lazy and fall back
-  // to strtoll or strtod
-  // TODO: handle the easy cases ourselves; these are
-  // expensive and we've done a lot of the prepwork.
-  // return errors if strto* fail, otherwise fill in a code on the tape
-  // 'd' for floating point and 'l' for long and put a pointer to the
-  // spot in the buffer.
-  if ( digit_edges == 1) {
-  //if (__builtin_popcount(digit_edges) == 1) { // DANIEL :  shouldn't we have digit_edges == 1
-#define NAIVEINTPARSING // naive means "faster" in this case
-#ifdef NAIVEINTPARSING
-    // this is faster, maybe, because we use a naive strtoll
-    // should be all digits?
-    error_sump |= number_characters ^ digit_characters;
-    int stringlength = __builtin_ctz(~digit_characters);
-    const char *end = (const char *)src + stringlength;
-    u64 result = naivestrtoll((const char *)src,end);
-    if (found_minus) { // unfortunate that it is a branch?
-      result = -result;
-    }
-#else
-    // try a strtoll (this is likely slower because it revalidates)
-    char *end;
-    u64 result = strtoll((const char *)src, &end, 10);
-    if ((errno != 0) || (end == (const char *)src)) {
-      error_sump |= 1;
-    }
-    error_sump |= is_not_structural_or_whitespace(*end);
-    if (found_minus) {
-      result = -result;
-    }
-#endif
-#ifdef DEBUG
-    cout << "Found number " << result << "\n";
-#endif
-    *((u64 *)pj.current_number_buf_loc) = result;
-    pj.tape[tape_loc] =
-        ((u32)'l') << 24 |
-        (pj.current_number_buf_loc -
-         pj.number_buf); // assume 2^24 will hold all numbers for now
-    pj.current_number_buf_loc += 8;
-  } else {
-    // try a strtod
-    char *end;
-    double result = strtod((const char *)src, &end);
-    if ((errno != 0) || (end == (const char *)src)) {
-      error_sump |= 1;
-    }
-    error_sump |= is_not_structural_or_whitespace(*end);
-    if (found_minus) {
-      result = -result;
-    }
-#ifdef DEBUG
-    cout << "Found number " << result << "\n";
-#endif
-    *((double *)pj.current_number_buf_loc) = result;
-    pj.tape[tape_loc] =
-        ((u32)'d') << 24 |
-        (pj.current_number_buf_loc -
-         pj.number_buf); // assume 2^24 will hold all numbers for now
-    pj.current_number_buf_loc += 8;
-  }
-  // TODO: check the MSB element is a digit
-
-  // TODO: a whole bunch of checks
-
-  // TODO:  <=1 decimal point, eE mark, +- construct
-
-  // TODO: first and last character in mask region must be
-  // digit
-
-  // TODO: if it exists,
-  // Decimal point is after the first cluster of numbers only
-  // and before the second cluster of numbers only. It must
-  // be digit_or_zero . digit_or_zero strictly
-
-  // TODO: eE mark and +- construct are adjacent with eE first
-  // eE mark preceeds final cluster of numbers only
-  // and immediately follows second-last cluster of numbers only (not
-  // necessarily second, as we may have 4e10).
-  // it may suffice to insist that eE is preceeded immediately
-  // by a digit of any kind and that it's followed locally by
-  // a digit immediately or a +- construct then a digit.
-
-  // TODO: if we have both . and the eE mark then the . must
-  // precede the eE mark
-
-  if (error_sump)
-    return false;
-  return true;
-}
-
-bool tape_disturbed(u32 i, ParsedJson &pj) {
-  u32 start_loc = i * MAX_TAPE_ENTRIES;
-  u32 end_loc = pj.tape_locs[i];
-  return start_loc != end_loc;
-}
-
-bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj) {
-  // fixup the mess made by the ape_machine
-  // as such it does a bunch of miscellaneous things on the tapes
-  u32 error_sump = 0;
-  u64 tv = *(const u64 *)"true    ";
-  u64 nv = *(const u64 *)"null    ";
-  u64 fv = *(const u64 *)"false   ";
-  u64 mask4 = 0x00000000ffffffff;
-  u64 mask5 = 0x000000ffffffffff;
-
-  // if the tape has been touched at all at the depths outside the safe
-  // zone we need to quit. Note that our periodic checks to see that we're
-  // inside our safe zone in stage 3 don't guarantee that the system did
-  // not get into the danger area briefly.
-  if (tape_disturbed(START_DEPTH - 1, pj) ||
-      tape_disturbed(REDLINE_DEPTH, pj)) {
-    return false;
-  }
-
-  // walk over each tape
-  for (u32 i = START_DEPTH; i < MAX_DEPTH; i++) {
-    u32 start_loc = i * MAX_TAPE_ENTRIES;
-    u32 end_loc = pj.tape_locs[i];
-    if (start_loc == end_loc) {
-      break;
-    }
-    for (u32 j = start_loc; j < end_loc; j++) {
-      switch (pj.tape[j] >> 56) {
-      case '{':
-      case '[': {
-        // pivot our tapes
-        // point the enclosing structural char (}]) to the head marker ({[) and
-        // put the end of the sequence on the tape at the head marker
-        // we start with head marker pointing at the enclosing structural char
-        // and the enclosing structural char pointing at the end. Just swap
-        // them. also check the balanced-{} or [] property here
-        u8 head_marker_c = pj.tape[j] >> 56;
-        u32 head_marker_loc = pj.tape[j] & 0xffffffffffffffULL;
-        u64 tape_enclosing = pj.tape[head_marker_loc];
-        u8 enclosing_c = tape_enclosing >> 56;
-        pj.tape[head_marker_loc] = pj.tape[j];
-        pj.tape[j] = tape_enclosing;
-        error_sump |= (enclosing_c - head_marker_c -
-                       2); // [] and {} only differ by 2 chars
-        break;
-      }
-      case '"': {
-        error_sump |= !parse_string(buf, len, pj, j);
-        break;
-      }
-      case '1':
-      case '2':
-      case '3':
-      case '4':
-      case '5':
-      case '6':
-      case '7':
-      case '8':
-      case '9':
-        error_sump |= !parse_number(buf, len, pj, j, false, false);
-        break;
-      case '0':
-        error_sump |= !parse_number(buf, len, pj, j, true, false);
-        break;
-      case '-':
-        error_sump |= !parse_number(buf, len, pj, j, false, true);
-        break;
-      case 't': {
-        u32 offset = pj.tape[j] & 0xffffffffffffffULL;
-        const u8 *loc = buf + offset;
-        u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
-                    // C/C++)
-        std::memcpy(&locval, loc, sizeof(u64));
-        error_sump |= (locval & mask4) ^ tv;
-        error_sump |= is_not_structural_or_whitespace(loc[4]);
-        break;
-      }
-      case 'f': {
-        u32 offset = pj.tape[j] & 0xffffffffffffffULL;
-        const u8 *loc = buf + offset;
-        u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
-                    // C/C++)
-        std::memcpy(&locval, loc, sizeof(u64));
-        error_sump |= (locval & mask5) ^ fv;
-        error_sump |= is_not_structural_or_whitespace(loc[5]);
-        break;
-      }
-      case 'n': {
-        u32 offset = pj.tape[j] & 0xffffffffffffffULL;
-        const u8 *loc = buf + offset;
-        u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
-                    // C/C++)
-        std::memcpy(&locval, loc, sizeof(u64));
-        error_sump |= (locval & mask4) ^ nv;
-        error_sump |= is_not_structural_or_whitespace(loc[4]);
-        break;
-      }
-      default:
-        break;
-      }
-    }
-  }
-  /*
-  if (error_sump) {
-    return false;
-  }
-  */
-  return true;
-}