Lots and lots of cleaning.

2018-11-27 14:37:59 -05:00 · 2018-11-27 14:37:59 -05:00 · a43b0772e1
parent 5fae7b2100
commit a43b0772e1
15 changed files with 521 additions and 465 deletions
--- a/benchmark/linux/linux-perf-events.h
+++ b/benchmark/linux/linux-perf-events.h
@ -21,7 +21,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
  std::vector<uint64_t> ids;
 public:
-  LinuxEvents(std::vector<int> config_vec) : fd(0) {
+  explicit LinuxEvents(std::vector<int> config_vec) : fd(0) {
    memset(&attribs, 0, sizeof(attribs));
    attribs.type = TYPE;
    attribs.size = sizeof(attribs);
--- a/benchmark/minifiercompetition.cpp
+++ b/benchmark/minifiercompetition.cpp
@ -1,3 +1,4 @@
 #include <unistd.h>
 #include <iostream>
 #include "benchmark.h"
@ -13,6 +14,7 @@
 #include "rapidjson/writer.h"
 #include "sajson.h"
 using namespace rapidjson;
 using namespace std;
@ -43,17 +45,29 @@ std::string rapidstringme(char *json) {
 }
 int main(int argc, char *argv[]) {
-  if (argc < 2) {
+  int c;
-    cerr << "Usage: " << argv[0] << " <jsonfile>\n";
+  bool verbose = false;
-    cerr << "Or " << argv[0] << " -v <jsonfile>\n";
+  while ((c = getopt (argc, argv, "v")) != -1)
    switch (c)
      {
      case 'v':
        verbose = true;
        break;
      default:
        abort ();
      }
  if (optind >= argc) {
    cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
    exit(1);
  }
-  bool verbose = false;
+  const char * filename = argv[optind];
-  if (argc > 2) {
+  pair<u8 *, size_t> p;
-    if (strcmp(argv[1], "-v"))
+  try {
-      verbose = true;
+    p = get_corpus(filename);
  } catch (const std::exception& e) { // caught by reference to base
    std::cout << "Could not load the file " << filename << std::endl;
    return EXIT_FAILURE;
  }
  pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
  if (verbose) {
    std::cout << "Input has ";
    if (p.second > 1024 * 1024)
--- a/benchmark/parse.cpp
+++ b/benchmark/parse.cpp
@ -31,79 +31,14 @@
 #include "jsonparser/stage34_unified.h"
 using namespace std;
 // https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
 namespace Color {
 enum Code {
  FG_DEFAULT = 39,
  FG_BLACK = 30,
  FG_RED = 31,
  FG_GREEN = 32,
  FG_YELLOW = 33,
  FG_BLUE = 34,
  FG_MAGENTA = 35,
  FG_CYAN = 36,
  FG_LIGHT_GRAY = 37,
  FG_DARK_GRAY = 90,
  FG_LIGHT_RED = 91,
  FG_LIGHT_GREEN = 92,
  FG_LIGHT_YELLOW = 93,
  FG_LIGHT_BLUE = 94,
  FG_LIGHT_MAGENTA = 95,
  FG_LIGHT_CYAN = 96,
  FG_WHITE = 97,
  BG_RED = 41,
  BG_GREEN = 42,
  BG_BLUE = 44,
  BG_DEFAULT = 49
 };
 class Modifier {
  Code code;
 public:
  Modifier(Code pCode) : code(pCode) {}
  friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
    return os << "\033[" << mod.code << "m";
  }
 };
 } // namespace Color
 void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
  Color::Modifier greenfg(Color::FG_GREEN);
  Color::Modifier yellowfg(Color::FG_YELLOW);
  Color::Modifier deffg(Color::FG_DEFAULT);
  size_t i = 0;
  // skip initial fluff
  while ((i + 1 < pj.n_structural_indexes) &&
         (pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
    i++;
  }
  for (; i < pj.n_structural_indexes; i++) {
    u32 idx = pj.structural_indexes[i];
    u8 c = buf[idx];
    if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
      std::cout << greenfg << buf[idx] << deffg;
    } else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
      std::cout << greenfg << buf[idx] << deffg;
    } else {
      std::cout << yellowfg << buf[idx] << deffg;
    }
    if (i + 1 < pj.n_structural_indexes) {
      u32 nextidx = pj.structural_indexes[i + 1];
      for (u32 pos = idx + 1; pos < nextidx; pos++) {
        std::cout << buf[pos];
      }
    }
  }
  std::cout << std::endl;
 }
 int main(int argc, char *argv[]) {
  bool verbose = false;
  bool dump = false;
  bool forceoneiteration = false;
  int c;
-  while ((c = getopt (argc, argv, "vd")) != -1)
+  while ((c = getopt (argc, argv, "1vd")) != -1)
    switch (c)
      {
      case 'v':
@ -112,6 +47,9 @@ int main(int argc, char *argv[]) {
      case 'd':
        dump = true;
        break;
      case '1':
        forceoneiteration = true;
        break;
      default:
        abort ();
      }
@ -124,7 +62,13 @@ int main(int argc, char *argv[]) {
    cerr << "warning: ignoring everything after " << argv[optind  + 1] << endl;
  }
  if(verbose) cout << "[verbose] loading " << filename << endl;
-  pair<u8 *, size_t> p = get_corpus(filename);
+  pair<u8 *, size_t> p;
  try {
    p = get_corpus(filename);
  } catch (const std::exception& e) { // caught by reference to base
    std::cout << "Could not load the file " << filename << std::endl;
    return EXIT_FAILURE;
  }
  if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
  ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
  ParsedJson &pj(*pj_ptr);
@ -133,7 +77,7 @@ int main(int argc, char *argv[]) {
 #if defined(DEBUG)
  const u32 iterations = 1;
 #else
-  const u32 iterations = p.second < 1 * 1000 * 1000? 1000 : 10;
+  const u32 iterations = forceoneiteration ? 1 : ( p.second < 1 * 1000 * 1000? 1000 : 10);
 #endif
  vector<double> res;
  res.resize(iterations);
@ -174,7 +118,7 @@ int main(int argc, char *argv[]) {
    }
    unified.start();
 #endif
-    isok = flatten_indexes(p.second, pj);
+    isok = isok && flatten_indexes(p.second, pj);
 #ifndef SQUASH_COUNTERS
    unified.end(results);
    cy2 += results[0];
@ -187,7 +131,7 @@ int main(int argc, char *argv[]) {
    unified.start();
 #endif
-    isok = unified_machine(p.first, p.second, pj);
+    isok = isok && unified_machine(p.first, p.second, pj);
 #ifndef SQUASH_COUNTERS
    unified.end(results);
    cy3 += results[0];
--- a/benchmark/parsingcompetition.cpp
+++ b/benchmark/parsingcompetition.cpp
@ -31,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
 bool fastjson_parse(const char *input) {
  fastjson::Token token;
  fastjson::dom::Chunk chunk;
  std::string error_message;
  return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
 }
 // end of fastjson stuff
@ -62,7 +61,14 @@ int main(int argc, char *argv[]) {
  if(optind + 1 < argc) {
    cerr << "warning: ignoring everything after " << argv[optind  + 1] << endl;
  }
-  pair<u8 *, size_t> p = get_corpus(filename);
+  pair<u8 *, size_t> p;
  try {
    p = get_corpus(filename);
  } catch (const std::exception& e) { // caught by reference to base
    std::cout << "Could not load the file " << filename << std::endl;
    return EXIT_FAILURE;
  }
  if (verbose) {
    std::cout << "Input has ";
    if (p.second > 1024 * 1024)
--- a/include/jsonparser/jsonioutil.h
+++ b/include/jsonparser/jsonioutil.h
@ -20,6 +20,13 @@ char * allocate_aligned_buffer(size_t length);
 // first element of the pair is a string (null terminated)
 // whereas the second element is the length.
 // caller is responsible to free (free std::pair<u8 *, size_t>.first)
 // 
 // throws an exception if the file cannot be opened, use try/catch
 //      try {
 //        p = get_corpus(filename);
 //      } catch (const std::exception& e) { 
 //        std::cout << "Could not load the file " << filename << std::endl;
 //      }
 std::pair<u8 *, size_t> get_corpus(std::string filename);
 #endif
--- a/include/jsonparser/numberparsing.h
+++ b/include/jsonparser/numberparsing.h
@ -128,7 +128,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
  const __m128i mul_1_10000 =
      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
-  const __m128i input = _mm_sub_epi8(_mm_loadu_si128((__m128i *)chars), ascii0);
+  const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
  const __m128i t3 = _mm_packus_epi32(t2, t2);
@ -149,7 +149,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
 //
 static never_inline bool
 parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
-                          ParsedJson &pj, const u32 depth, const u32 offset,
+                          ParsedJson &pj, UNUSED const u32 depth, const u32 offset,
                          UNUSED bool found_zero, bool found_minus) {
  const char *p = (const char *)(buf + offset);
@ -193,7 +193,6 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
    }
    exponent = firstafterperiod - p;
  }
  int64_t expnumber = 0; // exponential part
  if (('e' == *p) || ('E' == *p)) {
    ++p;
    bool negexp = false;
@ -210,7 +209,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
      return false;
    }
    unsigned char digit = *p - '0';
-    expnumber = digit;
+    int64_t expnumber = digit; // exponential part
    p++;
    if (is_integer(*p)) {
      digit = *p - '0';
@ -270,7 +269,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
 //
 static never_inline bool parse_large_integer(const u8 *const buf,
                                             UNUSED size_t len, ParsedJson &pj,
-                                             const u32 depth, const u32 offset,
+                                             UNUSED const u32 depth, const u32 offset,
                                             UNUSED bool found_zero,
                                             bool found_minus) {
  const char *p = (const char *)(buf + offset);
@ -340,10 +339,12 @@ static never_inline bool parse_large_integer(const u8 *const buf,
 #define unlikely(x) __builtin_expect(!!(x), 0)
 #endif
 // parse the number at buf + offset
 // define JSON_TEST_NUMBERS for unit testing
 static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
-                                       ParsedJson &pj, const u32 depth,
+                                       ParsedJson &pj, UNUSED const u32 depth,
                                       const u32 offset, UNUSED bool found_zero,
                                       bool found_minus) {
  const char *p = (const char *)(buf + offset);
--- a/include/jsonparser/simdjson_internal.h
+++ b/include/jsonparser/simdjson_internal.h
@ -105,14 +105,14 @@ public:
    void write_tape_s64(s64 i) {
-        *((s64 *)current_number_buf_loc) = i;
+        *((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy
-        current_number_buf_loc += 8;
+        current_number_buf_loc += sizeof(s64);
        write_tape(current_number_buf_loc - number_buf, 'l');
    }
    void write_tape_double(double d) {
-        *((double *)current_number_buf_loc) = d;
+        *((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy
-        current_number_buf_loc += 8;
+        current_number_buf_loc += sizeof(double);
        write_tape(current_number_buf_loc - number_buf, 'd');
    }
@ -137,7 +137,7 @@ public:
        u32 scope_header; // the start of our current scope that contains our current location
        u32 location;     // our current location on a tape
-        ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
+        explicit ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
        // OK with default copy constructor as the way to clone the POD structure
        // some placeholder navigation. Will convert over to a more native C++-ish way of doing
@ -167,7 +167,7 @@ public:
 #ifdef DEBUG
-inline void dump256(m256 d, std::string msg) {
+inline void dump256(m256 d, const std::string msg) {
  for (u32 i = 0; i < 32; i++) {
    std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i);
    if (!((i + 1) % 8))
@ -181,14 +181,14 @@ inline void dump256(m256 d, std::string msg) {
 }
 // dump bits low to high
-inline void dumpbits(u64 v, std::string msg) {
+inline void dumpbits(u64 v, const std::string msg) {
  for (u32 i = 0; i < 64; i++) {
    std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
  }
  std::cout << " " << msg << "\n";
 }
-inline void dumpbits32(u32 v, std::string msg) {
+inline void dumpbits32(u32 v, const std::string msg) {
  for (u32 i = 0; i < 32; i++) {
    std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
  }
@ -201,14 +201,14 @@ inline void dumpbits32(u32 v, std::string msg) {
 #endif
 // dump bits low to high
-inline void dumpbits_always(u64 v, std::string msg) {
+inline void dumpbits_always(u64 v, const std::string msg) {
  for (u32 i = 0; i < 64; i++) {
    std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
  }
  std::cout << " " << msg << "\n";
 }
-inline void dumpbits32_always(u32 v, std::string msg) {
+inline void dumpbits32_always(u32 v, const std::string msg) {
  for (u32 i = 0; i < 32; i++) {
    std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
  }
--- a/include/jsonparser/stringparsing.h
+++ b/include/jsonparser/stringparsing.h
@ -58,7 +58,7 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
 }
 really_inline  bool parse_string(const u8 *buf, UNUSED size_t len,
-                                ParsedJson &pj, u32 depth, u32 offset) {
+                                ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
  using namespace std;
  const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
  u8 *dst = pj.current_string_buf_loc;
--- a/src/jsonminifier.cpp
+++ b/src/jsonminifier.cpp
@ -137,7 +137,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
      uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
          _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
      quote_mask ^= prev_iter_inside_quote;
-      prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
+      prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior
      const __m256i low_nibble_mask = _mm256_setr_epi8(
          //  0                           9  a   b  c  d
          16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
@ -220,7 +220,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
    uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
        _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
    quote_mask ^= prev_iter_inside_quote;
-    prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
+    // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore
    __m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
    __m256i mask_70 =
--- a/src/stage34_unified.cpp
+++ b/src/stage34_unified.cpp
@ -10,9 +10,9 @@
 #include <cstring>
 #include "jsonparser/common_defs.h"
 #include "jsonparser/simdjson_internal.h"
 #include "jsonparser/jsoncharutils.h"
 #include "jsonparser/numberparsing.h"
 #include "jsonparser/simdjson_internal.h"
 #include "jsonparser/stringparsing.h"
 #include <iostream>
@ -20,390 +20,430 @@
 #define PATH_SEP '/'
 #if defined(DEBUG) && !defined(DEBUG_PRINTF)
 #include <string.h>
 #include <stdio.h>
-#define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \
+#include <string.h>
-                                         strrchr(__FILE__, PATH_SEP) + 1, \
+#define DEBUG_PRINTF(format, ...)                                              \
-                                         __func__, __LINE__,  ## __VA_ARGS__)
+  printf("%s:%s:%d:" format, strrchr(__FILE__, PATH_SEP) + 1, __func__,        \
         __LINE__, ##__VA_ARGS__)
 #elif !defined(DEBUG_PRINTF)
-#define DEBUG_PRINTF(format, ...) do { } while(0)
+#define DEBUG_PRINTF(format, ...)                                              \
  do {                                                                         \
  } while (0)
 #endif
 using namespace std;
 WARN_UNUSED
-really_inline bool is_valid_true_atom(const u8 * loc) {
+really_inline bool is_valid_true_atom(const u8 *loc) {
-    u64 tv = *(const u64 *)"true    ";
+  u64 tv = *(const u64 *)"true    ";
-    u64 mask4 = 0x00000000ffffffff;
+  u64 mask4 = 0x00000000ffffffff;
-    u32 error = 0;
+  u32 error = 0;
-    u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
+  u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
-    std::memcpy(&locval, loc, sizeof(u64));
+  std::memcpy(&locval, loc, sizeof(u64));
-    error = (locval & mask4) ^ tv;
+  error = (locval & mask4) ^ tv;
-    error |= is_not_structural_or_whitespace(loc[4]);
+  error |= is_not_structural_or_whitespace(loc[4]);
-    return error == 0;
+  return error == 0;
 }
 WARN_UNUSED
-really_inline bool is_valid_false_atom(const u8 * loc) {
+really_inline bool is_valid_false_atom(const u8 *loc) {
-    u64 fv = *(const u64 *)"false   ";
+  u64 fv = *(const u64 *)"false   ";
-    u64 mask5 = 0x000000ffffffffff;
+  u64 mask5 = 0x000000ffffffffff;
-    u32 error = 0;
+  u32 error = 0;
-    u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
+  u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
-    std::memcpy(&locval, loc, sizeof(u64));
+  std::memcpy(&locval, loc, sizeof(u64));
-    error = (locval & mask5) ^ fv;
+  error = (locval & mask5) ^ fv;
-    error |= is_not_structural_or_whitespace(loc[5]);
+  error |= is_not_structural_or_whitespace(loc[5]);
-    return error == 0;
+  return error == 0;
 }
 WARN_UNUSED
-really_inline bool is_valid_null_atom(const u8 * loc) {
+really_inline bool is_valid_null_atom(const u8 *loc) {
-    u64 nv = *(const u64 *)"null    ";
+  u64 nv = *(const u64 *)"null    ";
-    u64 mask4 = 0x00000000ffffffff;
+  u64 mask4 = 0x00000000ffffffff;
-    u32 error = 0;
+  u32 error = 0;
-    u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
+  u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
-    std::memcpy(&locval, loc, sizeof(u64));
+  std::memcpy(&locval, loc, sizeof(u64));
-    error = (locval & mask4) ^ nv;
+  error = (locval & mask4) ^ nv;
-    error |= is_not_structural_or_whitespace(loc[4]);
+  error |= is_not_structural_or_whitespace(loc[4]);
-    return error == 0;
+  return error == 0;
 }
-// Implemented using Labels as Values which works in GCC and CLANG (and maybe also in Intel's compiler),
+// Implemented using Labels as Values which works in GCC and CLANG (and maybe
-// but won't work in MSVC. This would need to be reimplemented differently
+// also in Intel's compiler), but won't work in MSVC. This would need to be
-// if one wants to be standard compliant.
+// reimplemented differently if one wants to be standard compliant.
 WARN_UNUSED
 bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
-    u32 i = 0; // index of the structural character (0,1,2,3...)
+  u32 i = 0; // index of the structural character (0,1,2,3...)
-    u32 idx; // location of the structural character in the input (buf)
+  u32 idx;   // location of the structural character in the input (buf)
-    u8 c; // used to track the (structural) character we are looking at, updated by UPDATE_CHAR macro
+  u8 c; // used to track the (structural) character we are looking at, updated
-    u32 depth = 0;//START_DEPTH; // an arbitrary starting depth
+        // by UPDATE_CHAR macro
-    //void * ret_address[MAX_DEPTH]; // used to store "labels as value" (non-standard compiler extension)
+  u32 depth = 0; // could have an arbitrary starting depth
-
+  pj.init();
    // a call site is the start of either an object or an array ('[' or '{')
    // this is the location of the previous call site 
    // (in the tape, at the given depth); 
    // we only need one.
    // We should also track the tape address of our containing
    // scope for two reasons. First, we will need to put an 
    // up pointer there at each call site so we can navigate
    // upwards. Second, when we encounter the end of the scope
    // we can put the current offset into a record for the 
    // scope so we know where it is
    //u32 containing_scope_offset[MAX_DEPTH];
    pj.init();
    // add a sentinel to the end to avoid premature exit
    // need to be able to find the \0 at the 'padded length' end of the buffer
    // FIXME: TERRIFYING!
    //size_t j;
    //for (j = len; buf[j] != 0; j++)
    //    ;
    //pj.structural_indexes[pj.n_structural_indexes++] = j;
 // this macro reads the next structural character, updating idx, i and c.
-#define UPDATE_CHAR() { idx = pj.structural_indexes[i++]; c = buf[idx]; DEBUG_PRINTF("Got %c at %d (%d offset)\n", c, idx, i-1);}
+#define UPDATE_CHAR()                                                          \
  {                                                                            \
    idx = pj.structural_indexes[i++];                                          \
    c = buf[idx];                                                              \
    DEBUG_PRINTF("Got %c at %d (%d offset) (depth %d)\n", c, idx, i - 1,       \
                 depth);                                                       \
  }
-
+  ////////////////////////////// START STATE /////////////////////////////
-
+  DEBUG_PRINTF("at start\n");
-
+  pj.ret_address[depth] = &&start_continue;
-
+  pj.containing_scope_offset[depth] = pj.get_current_loc();
-////////////////////////////// START STATE /////////////////////////////
+  pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
-printf("at start\n");
+  depth++; // everything starts at depth = 1, depth = 0 is just for the root
-    DEBUG_PRINTF("at start\n");
+  if (depth > pj.depthcapacity) {
-    pj.ret_address[depth] = &&start_continue; 
+    goto fail;
-    pj.containing_scope_offset[depth] = pj.get_current_loc(); 
+  }
-    pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
+  UPDATE_CHAR();
-    depth++;// everything starts at depth = 1, depth = 0 is just for the root
+  switch (c) {
-    if(depth > pj.depthcapacity) {
+  case '{':
-        goto fail;
+    goto object_begin;
-    }
+  case '[':
-    printf("got char %c \n",c);
+    goto array_begin;
    UPDATE_CHAR();
    switch (c) {
        case '{': goto object_begin;
        case '[': goto array_begin;
 #define SIMDJSON_ALLOWANYTHINGINROOT
-   // A JSON text is a serialized value.  Note that certain previous
+    // A JSON text is a serialized value.  Note that certain previous
-   // specifications of JSON constrained a JSON text to be an object or an
+    // specifications of JSON constrained a JSON text to be an object or an
-   // array.  Implementations that generate only objects or arrays where a
+    // array.  Implementations that generate only objects or arrays where a
-   // JSON text is called for will be interoperable in the sense that all
+    // JSON text is called for will be interoperable in the sense that all
-   // implementations will accept these as conforming JSON texts.
+    // implementations will accept these as conforming JSON texts.
-   // https://tools.ietf.org/html/rfc8259
+    // https://tools.ietf.org/html/rfc8259
 #ifdef SIMDJSON_ALLOWANYTHINGINROOT
-        case '"': {
+  case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
+    if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
+      goto fail;
            }
            goto start_continue;
        }
        case 't': 
            if (!is_valid_true_atom(buf + idx)) {
                goto fail;
            }
            pj.write_tape(0, c);
            goto start_continue;
        case 'f': 
            if (!is_valid_false_atom(buf + idx)) {
                goto fail;
            }
            pj.write_tape(0, c);
            goto start_continue;
        case 'n': 
            if (!is_valid_null_atom(buf + idx)) {
                goto fail;
            }
            pj.write_tape(0, c);
            goto start_continue;
        case '0': {
            if (!parse_number(buf, len, pj, depth, idx, true, false)) {
                goto fail;
            }
            goto start_continue;
        }
        case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':  {
            if (!parse_number(buf, len, pj, depth, idx, false, false)) {
                goto fail;
            }
            goto start_continue;
        }
        case '-': {
            if (!parse_number(buf, len, pj, depth, idx, false, true)) {
                goto fail;
            }
            goto start_continue;
        }
 #endif // ALLOWANYTHINGINROOT
        default: goto fail;
    }
    break;
  }
  case 't':
    if (!is_valid_true_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break;
  case 'f':
    if (!is_valid_false_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break;
  case 'n':
    if (!is_valid_null_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break;
  case '0': {
    if (!parse_number(buf, len, pj, depth, idx, true, false)) {
      goto fail;
    }
    break;
  }
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9': {
    if (!parse_number(buf, len, pj, depth, idx, false, false)) {
      goto fail;
    }
    break;
  }
  case '-': {
    if (!parse_number(buf, len, pj, depth, idx, false, true)) {
      goto fail;
    }
    break;
  }
 #endif // ALLOWANYTHINGINROOT
  default:
    goto fail;
  }
 #ifdef SIMDJSON_ALLOWANYTHINGINROOT
  depth--; // for fall-through cases (e.g., documents containing just a string)
 #endif     // ALLOWANYTHINGINROOT
 start_continue:
-    DEBUG_PRINTF("in start_object_close\n");
+  DEBUG_PRINTF("in start_object_close\n");
-    UPDATE_CHAR();
+  UPDATE_CHAR();
-    switch (c) {
+  switch (c) {
-        case 0: goto succeed;
+  case 0:
-        default: goto fail;
+    goto succeed;
-    }
+  default:
    goto fail;
  }
-////////////////////////////// OBJECT STATES /////////////////////////////
+  ////////////////////////////// OBJECT STATES /////////////////////////////
 object_begin:
-    printf("in object_begin %c \n",c);
+  DEBUG_PRINTF("in object_begin\n");
-    DEBUG_PRINTF("in object_begin\n");
+  pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
+  pj.write_tape(0, c);
-    pj.write_tape(0, c); 
+
-    depth ++;
+  UPDATE_CHAR();
-    if(depth > pj.depthcapacity) {
+  switch (c) {
-        goto fail;
+  case '"': {
-    }
+    if (!parse_string(buf, len, pj, depth, idx)) {
-    UPDATE_CHAR();
+      goto fail;
    switch (c) {
        case '"': {
            if (!parse_string(buf, len, pj, depth, idx)) {
                goto fail;
            }
            goto object_key_state;
        }
        case '}': goto scope_end;
        default: goto fail;
    }
    goto object_key_state;
  }
  case '}':
    goto scope_end; // could also go to object_continue
  default:
    goto fail;
  }
 object_key_state:
-    printf("in object_key_state %c \n",c);
+  DEBUG_PRINTF("in object_key_state\n");
  UPDATE_CHAR();
  if (c != ':') {
    goto fail;
  }
  UPDATE_CHAR();
  switch (c) {
  case '"': {
    if (!parse_string(buf, len, pj, depth, idx)) {
      goto fail;
    }
    break;
  }
  case 't':
    if (!is_valid_true_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break;
  case 'f':
    if (!is_valid_false_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break;
  case 'n':
    if (!is_valid_null_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break;
  case '0': {
    if (!parse_number(buf, len, pj, depth, idx, true, false)) {
      goto fail;
    }
    break;
  }
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9': {
    if (!parse_number(buf, len, pj, depth, idx, false, false)) {
      goto fail;
    }
    break;
  }
  case '-': {
    if (!parse_number(buf, len, pj, depth, idx, false, true)) {
      goto fail;
    }
    break;
  }
  case '{': {
    // we have not yet encountered } so we need to come back for it
    pj.ret_address[depth] = &&object_continue;
    // we found an object inside an object, so we need to increment the depth
    depth++;
    if (depth > pj.depthcapacity) {
      goto fail;
    }
-    DEBUG_PRINTF("in object_key_state\n");
+    goto object_begin;
-    UPDATE_CHAR();
+  }
-    if (c != ':') {
+  case '[': {
-        goto fail;
+    // we have not yet encountered } so we need to come back for it
-    }
+    pj.ret_address[depth] = &&object_continue;
-    UPDATE_CHAR();
+    // we found an array inside an object, so we need to increment the depth
-    switch (c) {
+    depth++;
-        case '"': {
+    if (depth > pj.depthcapacity) {
-            if (!parse_string(buf, len, pj, depth, idx)) {
+      goto fail;
                goto fail;
            }
            break;
        }
        case 't': if (!is_valid_true_atom(buf + idx)) {
                    goto fail;
                  }
                  pj.write_tape(0, c);
                  break;
        case 'f': if (!is_valid_false_atom(buf + idx)) {
                    goto fail;
                  }
                  pj.write_tape(0, c);
                  break;
        case 'n': if (!is_valid_null_atom(buf + idx)) {
                    goto fail;
                  }
                  pj.write_tape(0, c);
                  break;
        case '0': {
            if (!parse_number(buf, len, pj, depth, idx, true, false)) {
                goto fail;
            }
            break;
        }
        case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':  {
            if (!parse_number(buf, len, pj, depth, idx, false, false)) {
                goto fail;
            }
            break;
        }
        case '-': {
            if (!parse_number(buf, len, pj, depth, idx, false, true)) {
                goto fail;
            }
            break;
        }
        case '{': {
            pj.ret_address[depth] = &&object_continue; 
            goto object_begin;
        }
        case '[': {
            pj.ret_address[depth] = &&object_continue; 
            goto array_begin;
        }
        default: goto fail;
    }
    goto array_begin;
  }
  default:
    goto fail;
  }
 object_continue:
-    printf("in object_continue %c \n",c);
+  DEBUG_PRINTF("in object_continue\n");
-
+  UPDATE_CHAR();
-    DEBUG_PRINTF("in object_continue\n");
+  switch (c) {
  case ',':
    UPDATE_CHAR();
-    switch (c) {
+    if (c != '"') {
-        case ',': 
+      goto fail;
-            UPDATE_CHAR();
+    } else {
-            if (c != '"') {
+      if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
+        goto fail;
-            } else {
+      }
-                if (!parse_string(buf, len, pj, depth, idx)) {
+      goto object_key_state;
                    goto fail;
                }
                goto object_key_state;
            }
        case '}': goto scope_end;
        default: goto fail;
    }
  case '}':
    goto scope_end;
  default:
    goto fail;
  }
-////////////////////////////// COMMON STATE /////////////////////////////
+  ////////////////////////////// COMMON STATE /////////////////////////////
 scope_end:
-    // write our tape location to the header scope
+  // write our tape location to the header scope
-    depth--;
+  depth--;
-    pj.write_tape(pj.containing_scope_offset[depth], c);
+  pj.write_tape(pj.containing_scope_offset[depth], c);
-    pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
+  pj.annotate_previousloc(pj.containing_scope_offset[depth],
-    // goto saved_state
+                          pj.get_current_loc());
-    goto *pj.ret_address[depth];
+  // goto saved_state
-
+  goto *pj.ret_address[depth];
 ////////////////////////////// ARRAY STATES /////////////////////////////
  ////////////////////////////// ARRAY STATES /////////////////////////////
 array_begin:
-    printf("in array_begin %c \n",c);
+  DEBUG_PRINTF("in array_begin\n");
-
+  pj.containing_scope_offset[depth] = pj.get_current_loc();
-    DEBUG_PRINTF("in array_begin\n");
+  pj.write_tape(0, c);
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
+  UPDATE_CHAR();
-    pj.write_tape(0, c); 
+  if (c == ']') {
-    depth ++;
+    goto scope_end; // could also go to array_continue
-    if(depth > pj.depthcapacity) {
+  }
        goto fail;
    }
    UPDATE_CHAR();
    if (c == ']') {
        goto scope_end;
    }
 main_array_switch:
-    // we call update char on all paths in, so we can peek at c on the
+  // we call update char on all paths in, so we can peek at c on the
-    // on paths that can accept a close square brace (post-, and at start)
+  // on paths that can accept a close square brace (post-, and at start)
-    switch (c) {
+  switch (c) {
-        case '"': {
+  case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
+    if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
+      goto fail;
            }
            goto array_continue;
        }
        case 't': if (!is_valid_true_atom(buf + idx)) {
                    goto fail;
                  }
                  pj.write_tape(0, c);
                  break;
        case 'f': if (!is_valid_false_atom(buf + idx)) {
                    goto fail;
                  }
                  pj.write_tape(0, c);
                  break;
        case 'n': if (!is_valid_null_atom(buf + idx)) {
                    goto fail;
                  }
                  pj.write_tape(0, c);
                  break;
        case '0': {
            if (!parse_number(buf, len, pj, depth, idx, true, false)) {
                goto fail;
            }
            break;
        }
        case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':  {
            if (!parse_number(buf, len, pj, depth, idx, false, false)) {
                goto fail;
            }
            break;
        }
        case '-': {
            if (!parse_number(buf, len, pj, depth, idx, false, true)) {
                goto fail;
            }
            break;
        }
        case '{': {
            pj.ret_address[depth] = &&array_continue; 
            goto object_begin;
        }
        case '[': {
            pj.ret_address[depth] = &&array_continue; 
            goto array_begin;
        }
        default: goto fail;
    }
    break;
  }
  case 't':
    if (!is_valid_true_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break; 
  case 'f':
    if (!is_valid_false_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break; 
  case 'n':
    if (!is_valid_null_atom(buf + idx)) {
      goto fail;
    }
    pj.write_tape(0, c);
    break; // goto array_continue;
  case '0': {
    if (!parse_number(buf, len, pj, depth, idx, true, false)) {
      goto fail;
    }
    break; // goto array_continue;
  }
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9': {
    if (!parse_number(buf, len, pj, depth, idx, false, false)) {
      goto fail;
    }
    break; // goto array_continue;
  }
  case '-': {
    if (!parse_number(buf, len, pj, depth, idx, false, true)) {
      goto fail;
    }
    break; // goto array_continue;
  }
  case '{': {
    // we have not yet encountered ] so we need to come back for it
    pj.ret_address[depth] = &&array_continue;
    // we found an object inside an array, so we need to increment the depth
    depth++;
    if (depth > pj.depthcapacity) {
      goto fail;
    }
    goto object_begin;
  }
  case '[': {
    // we have not yet encountered ] so we need to come back for it
    pj.ret_address[depth] = &&array_continue;
    // we found an array inside an array, so we need to increment the depth
    depth++;
    if (depth > pj.depthcapacity) {
      goto fail;
    }
    goto array_begin;
  }
  default:
    goto fail;
  }
 array_continue:
-    printf("in array_begin %c \n",c);
+  DEBUG_PRINTF("in array_continue\n");
-
+  UPDATE_CHAR();
-    DEBUG_PRINTF("in array_continue\n");
+  switch (c) {
  case ',':
    UPDATE_CHAR();
-    switch (c) {
+    goto main_array_switch;
-        case ',': UPDATE_CHAR(); goto main_array_switch;
+  case ']':
-        case ']': goto scope_end;
+    goto scope_end;
-        default: goto fail;
+  default:
-    }
+    goto fail;
  }
-////////////////////////////// FINAL STATES /////////////////////////////
+  ////////////////////////////// FINAL STATES /////////////////////////////
 succeed:
-    DEBUG_PRINTF("in succeed\n");
+  DEBUG_PRINTF("in succeed, depth = %d \n", depth);
-    // we annotate the root node
+  // we annotate the root node
-    depth--;
+  // depth--;
-    // next line allows us to go back to the start
+  // next line allows us to go back to the start
-    pj.write_tape(pj.containing_scope_offset[depth], 'r');// r is root
+  pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
-    // next line tells the root node how to go to the end
+  // next line tells the root node how to go to the end
-    pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
+  pj.annotate_previousloc(pj.containing_scope_offset[depth],
                          pj.get_current_loc());
 #ifdef DEBUG
-    pj.dump_tapes();
+  pj.dump_tapes();
 #endif
-    return true;
+  return true;
 fail:
-    DEBUG_PRINTF("in fail\n");
+  DEBUG_PRINTF("in fail\n");
 #ifdef DEBUG
-    pj.dump_tapes();
+  pj.dump_tapes();
 #endif
-    return false;    
+  return false;
 }
--- a/tests/allparserscheckfile.cpp
+++ b/tests/allparserscheckfile.cpp
@ -1,3 +1,4 @@
 #include <unistd.h>
 #include "jsonparser/jsonparser.h"
@ -30,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
 bool fastjson_parse(const char *input) {
  fastjson::Token token;
  fastjson::dom::Chunk chunk;
  std::string error_message;
  return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
 }
 // end of fastjson stuff
@ -41,17 +41,30 @@ using namespace rapidjson;
 using namespace std;
 int main(int argc, char *argv[]) {
-  if (argc < 2) {
+  bool verbose = false;
    int c;
  while ((c = getopt (argc, argv, "v")) != -1)
    switch (c)
      {
      case 'v':
        verbose = true;
        break;
      default:
        abort ();
      }
  if (optind >= argc) {
    cerr << "Usage: " << argv[0] << " <jsonfile>\n";
    cerr << "Or " << argv[0] << " -v <jsonfile>\n";
    exit(1);
  }
-  bool verbose = false;
+  const char * filename = argv[optind];
-  if (argc > 2) {
+  std::pair<u8 *, size_t> p;
-    if (strcmp(argv[1], "-v"))
+  try {
-      verbose = true;
+    p = get_corpus(filename);
  } catch (const std::exception& e) { // caught by reference to base
    std::cout << "Could not load the file " << filename << std::endl;
    return EXIT_FAILURE;
  }
  pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
  if (verbose) {
    std::cout << "Input has ";
    if (p.second > 1024 * 1024)
--- a/tests/jsoncheck.cpp
+++ b/tests/jsoncheck.cpp
@ -5,6 +5,7 @@
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include "jsonparser/jsonparser.h"
@ -41,6 +42,8 @@ bool validate(const char *dirname) {
    printf("nothing in dir %s \n", dirname);
    return false;
  }
  bool * isfileasexpected = new bool[c];
  for(int i = 0; i < c; i++) isfileasexpected[i] = true;
  size_t howmany = 0;
  bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
  for (int i = 0; i < c; i++) {
@ -56,7 +59,13 @@ bool validate(const char *dirname) {
      } else {
        strcpy(fullpath + dirlen, name);
      }
-      std::pair<u8 *, size_t> p = get_corpus(fullpath);
+      std::pair<u8 *, size_t> p;
      try {
        p = get_corpus(fullpath);
      } catch (const std::exception& e) { 
        std::cout << "Could not load the file " << fullpath << std::endl;
        return EXIT_FAILURE;
      }
      ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
      if(pj_ptr == NULL) {
        std::cerr<< "can't allocate memory"<<std::endl;
@ -70,11 +79,13 @@ bool validate(const char *dirname) {
        howmany--;
      } else if (startsWith("pass", name)) {
        if (!isok) {
          isfileasexpected[i] = false;
          printf("warning: file %s should pass but it fails.\n", name);
          everythingfine = false;
        }
      } else if (startsWith("fail", name)) {
        if (isok) {
          isfileasexpected[i] = false;
          printf("warning: file %s should fail but it passes.\n", name);
          everythingfine = false;
        }
@ -87,11 +98,20 @@ bool validate(const char *dirname) {
      deallocate_ParsedJson(pj_ptr);
    }
  }
  printf("%zu files checked.\n", howmany);
  if(everythingfine) {
    printf("All ok!\n");
  } else {
    printf("There were problems! Consider reviewing the following files:\n");
    for(int i = 0; i < c; i++) {
      if(!isfileasexpected[i]) printf("%s \n", entry_list[i]->d_name);
    }
  }
  for (int i = 0; i < c; ++i)
    free(entry_list[i]);
  free(entry_list);
-  printf("%zu files checked.\n", howmany);
+  delete[] isfileasexpected;
-  if(everythingfine) printf("All ok!\n");
+
  return everythingfine;
 }
--- a/tests/numberparsingcheck.cpp
+++ b/tests/numberparsingcheck.cpp
@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
  size_t lenpre = strlen(pre), lenstr = strlen(str);
  return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
 }
-bool is_in_bad_list(char *buf) {
+bool is_in_bad_list(const char *buf) {
  for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
    if (startsWith(really_bad[i], buf))
      return true;
@ -38,9 +38,9 @@ bool is_in_bad_list(char *buf) {
 inline void foundInvalidNumber(const u8 *buf) {
  invalid_count++;
  char *endptr;
-  double expected = strtod((char *)buf, &endptr);
+  double expected = strtod((const char *)buf, &endptr);
-  if (endptr != (char *)buf) {
+  if (endptr != (const char *)buf) {
-    if (!is_in_bad_list((char *)buf)) {
+    if (!is_in_bad_list((const char *)buf)) {
      printf(
          "Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
          buf, expected);
@ -53,8 +53,8 @@ inline void foundInvalidNumber(const u8 *buf) {
 inline void foundInteger(int64_t result, const u8 *buf) {
  int_count++;
  char *endptr;
-  long long expected = strtoll((char *)buf, &endptr, 10);
+  long long expected = strtoll((const char *)buf, &endptr, 10);
-  if ((endptr == (char *)buf) || (expected != result)) {
+  if ((endptr == (const char *)buf) || (expected != result)) {
    printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
    printf(" while parsing %s \n", fullpath);
    parse_error |= PARSE_ERROR;
@ -64,8 +64,8 @@ inline void foundInteger(int64_t result, const u8 *buf) {
 inline void foundFloat(double result, const u8 *buf) {
  char *endptr;
  float_count++;
-  double expected = strtod((char *)buf, &endptr);
+  double expected = strtod((const char *)buf, &endptr);
-  if (endptr == (char *)buf) {
+  if (endptr == (const char *)buf) {
    printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
           result, buf);
    printf(" while parsing %s \n", fullpath);
@ -123,7 +123,13 @@ bool validate(const char *dirname) {
      } else {
        strcpy(fullpath + dirlen, name);
      }
-      std::pair<u8 *, size_t> p = get_corpus(fullpath);
+      std::pair<u8 *, size_t> p;
      try {
        p = get_corpus(fullpath);
      } catch (const std::exception& e) { 
        std::cout << "Could not load the file " << fullpath << std::endl;
        return EXIT_FAILURE;
      }
      // terrible hack but just to get it working
      ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
      if (pj_ptr == NULL) {
--- a/tests/stringparsingcheck.cpp
+++ b/tests/stringparsingcheck.cpp
@ -241,7 +241,7 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
    // we have a zero-length string
    if (parsed_begin != parsed_end) {
      printf("WARNING: We have a zero-length but gap is %zu \n",
-             parsed_end - parsed_begin);
+             (size_t)(parsed_end - parsed_begin));
      probable_bug = true;
    }
    empty_string++;
@ -252,12 +252,12 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
    printf("WARNING: lengths on parsed strings disagree %zu %zu \n", thislen,
           len);
    printf("\nour parsed string  : '%*s'\n\n", (int)thislen,
-           (char *)parsed_begin);
+           (const char *)parsed_begin);
-    print_hex((char *)parsed_begin, thislen);
+    print_hex((const char *)parsed_begin, thislen);
    printf("\n");
    printf("reference parsing   :'%*s'\n\n", (int)len, bigbuffer);
-    print_hex((char *)bigbuffer, len);
+    print_hex((const char *)bigbuffer, len);
    printf("\n");
    probable_bug = true;
@ -267,15 +267,15 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
    printf("Lengths %zu %zu  \n", thislen, len);
    printf("\nour parsed string  : '%*s'\n", (int)thislen,
-           (char *)parsed_begin);
+           (const char *)parsed_begin);
-    print_hex((char *)parsed_begin, thislen);
+    print_hex((const char *)parsed_begin, thislen);
    printf("\n");
    printf("reference parsing   :'%*s'\n", (int)len, bigbuffer);
-    print_hex((char *)bigbuffer, len);
+    print_hex((const char *)bigbuffer, len);
    printf("\n");
-    print_cmp_hex((char *)parsed_begin, bigbuffer, thislen);
+    print_cmp_hex((const char *)parsed_begin, bigbuffer, thislen);
    probable_bug = true;
  }
@ -325,8 +325,13 @@ bool validate(const char *dirname) {
      } else {
        strcpy(fullpath + dirlen, name);
      }
-      std::pair<u8 *, size_t> p = get_corpus(fullpath);
+      std::pair<u8 *, size_t> p;
-      // terrible hack but just to get it working
+      try {
        p = get_corpus(fullpath);
      } catch (const std::exception& e) { 
        std::cout << "Could not load the file " << fullpath << std::endl;
        return EXIT_FAILURE;
      }      
      ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
      if (pj_ptr == NULL) {
        std::cerr << "can't allocate memory" << std::endl;