Lots and lots of cleaning.

2018-11-27 14:37:59 -05:00 · 2018-11-27 14:37:59 -05:00 · a43b0772e1
parent 5fae7b2100
commit a43b0772e1
15 changed files with 521 additions and 465 deletions
--- a/2
+++ b/2
@ -7,7 +7,7 @@
 .PHONY: clean cleandist

 DEPSINCLUDE = -Idependencies/rapidjson/include -Idependencies/sajson/include -Idependencies/json11 -Idependencies/fastjson/src -Idependencies/fastjson/include -Idependencies/gason/src -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
-CXXFLAGS =  -std=c++11  -march=native -Wall -Wextra -Wshadow -Iinclude  -Ibenchmark/linux  $(DEPSINCLUDE) 
+CXXFLAGS =  -std=c++11  -march=native -Wall -Wextra -Wshadow -Iinclude  -Ibenchmark/linux  $(DEPSINCLUDE)    
 CFLAGS = -march=native  -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
 ifeq ($(SANITIZE),1)
 	CXXFLAGS += -g3 -O0  -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined
--- a/benchmark/linux/linux-perf-events.h
+++ b/benchmark/linux/linux-perf-events.h
@ -21,7 +21,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
  std::vector<uint64_t> ids;

 public:
-  LinuxEvents(std::vector<int> config_vec) : fd(0) {
+  explicit LinuxEvents(std::vector<int> config_vec) : fd(0) {
    memset(&attribs, 0, sizeof(attribs));
    attribs.type = TYPE;
    attribs.size = sizeof(attribs);
--- a/benchmark/minifiercompetition.cpp
+++ b/benchmark/minifiercompetition.cpp
@ -1,3 +1,4 @@
+#include <unistd.h>
 #include <iostream>

 #include "benchmark.h"
@ -13,6 +14,7 @@
 #include "rapidjson/writer.h"
 #include "sajson.h"

+
 using namespace rapidjson;
 using namespace std;

@ -43,17 +45,29 @@ std::string rapidstringme(char *json) {
 }

 int main(int argc, char *argv[]) {
-  if (argc < 2) {
-    cerr << "Usage: " << argv[0] << " <jsonfile>\n";
-    cerr << "Or " << argv[0] << " -v <jsonfile>\n";
+  int c;
+  bool verbose = false;
+  while ((c = getopt (argc, argv, "v")) != -1)
+    switch (c)
+      {
+      case 'v':
+        verbose = true;
+        break;
+      default:
+        abort ();
+      }
+  if (optind >= argc) {
+    cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
    exit(1);
  }
-  bool verbose = false;
-  if (argc > 2) {
-    if (strcmp(argv[1], "-v"))
-      verbose = true;
+  const char * filename = argv[optind];
+  pair<u8 *, size_t> p;
+  try {
+    p = get_corpus(filename);
+  } catch (const std::exception& e) { // caught by reference to base
+    std::cout << "Could not load the file " << filename << std::endl;
+    return EXIT_FAILURE;
  }
-  pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
  if (verbose) {
    std::cout << "Input has ";
    if (p.second > 1024 * 1024)
--- a/benchmark/parse.cpp
+++ b/benchmark/parse.cpp
@ -31,79 +31,14 @@
 #include "jsonparser/stage34_unified.h"
 using namespace std;

-// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
-namespace Color {
-enum Code {
-  FG_DEFAULT = 39,
-  FG_BLACK = 30,
-  FG_RED = 31,
-  FG_GREEN = 32,
-  FG_YELLOW = 33,
-  FG_BLUE = 34,
-  FG_MAGENTA = 35,
-  FG_CYAN = 36,
-  FG_LIGHT_GRAY = 37,
-  FG_DARK_GRAY = 90,
-  FG_LIGHT_RED = 91,
-  FG_LIGHT_GREEN = 92,
-  FG_LIGHT_YELLOW = 93,
-  FG_LIGHT_BLUE = 94,
-  FG_LIGHT_MAGENTA = 95,
-  FG_LIGHT_CYAN = 96,
-  FG_WHITE = 97,
-  BG_RED = 41,
-  BG_GREEN = 42,
-  BG_BLUE = 44,
-  BG_DEFAULT = 49
-};
-class Modifier {
-  Code code;
-
-public:
-  Modifier(Code pCode) : code(pCode) {}
-  friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
-    return os << "\033[" << mod.code << "m";
-  }
-};
-} // namespace Color
-
-void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
-  Color::Modifier greenfg(Color::FG_GREEN);
-  Color::Modifier yellowfg(Color::FG_YELLOW);
-  Color::Modifier deffg(Color::FG_DEFAULT);
-  size_t i = 0;
-  // skip initial fluff
-  while ((i + 1 < pj.n_structural_indexes) &&
-         (pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
-    i++;
-  }
-  for (; i < pj.n_structural_indexes; i++) {
-    u32 idx = pj.structural_indexes[i];
-    u8 c = buf[idx];
-    if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
-      std::cout << greenfg << buf[idx] << deffg;
-    } else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
-      std::cout << greenfg << buf[idx] << deffg;
-    } else {
-      std::cout << yellowfg << buf[idx] << deffg;
-    }
-    if (i + 1 < pj.n_structural_indexes) {
-      u32 nextidx = pj.structural_indexes[i + 1];
-      for (u32 pos = idx + 1; pos < nextidx; pos++) {
-        std::cout << buf[pos];
-      }
-    }
-  }
-  std::cout << std::endl;
-}
-
 int main(int argc, char *argv[]) {
  bool verbose = false;
  bool dump = false;
+  bool forceoneiteration = false;

  int c;

-  while ((c = getopt (argc, argv, "vd")) != -1)
+  while ((c = getopt (argc, argv, "1vd")) != -1)
    switch (c)
      {
      case 'v':
@ -112,6 +47,9 @@ int main(int argc, char *argv[]) {
      case 'd':
        dump = true;
        break;
+      case '1':
+        forceoneiteration = true;
+        break;
      default:
        abort ();
      }
@ -124,7 +62,13 @@ int main(int argc, char *argv[]) {
    cerr << "warning: ignoring everything after " << argv[optind  + 1] << endl;
  }
  if(verbose) cout << "[verbose] loading " << filename << endl;
-  pair<u8 *, size_t> p = get_corpus(filename);
+  pair<u8 *, size_t> p;
+  try {
+    p = get_corpus(filename);
+  } catch (const std::exception& e) { // caught by reference to base
+    std::cout << "Could not load the file " << filename << std::endl;
+    return EXIT_FAILURE;
+  }
  if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
  ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
  ParsedJson &pj(*pj_ptr);
@ -133,7 +77,7 @@ int main(int argc, char *argv[]) {
 #if defined(DEBUG)
  const u32 iterations = 1;
 #else
-  const u32 iterations = p.second < 1 * 1000 * 1000? 1000 : 10;
+  const u32 iterations = forceoneiteration ? 1 : ( p.second < 1 * 1000 * 1000? 1000 : 10);
 #endif
  vector<double> res;
  res.resize(iterations);
@ -174,7 +118,7 @@ int main(int argc, char *argv[]) {
    }
    unified.start();
 #endif
-    isok = flatten_indexes(p.second, pj);
+    isok = isok && flatten_indexes(p.second, pj);
 #ifndef SQUASH_COUNTERS
    unified.end(results);
    cy2 += results[0];
@ -187,7 +131,7 @@ int main(int argc, char *argv[]) {
    unified.start();
 #endif

-    isok = unified_machine(p.first, p.second, pj);
+    isok = isok && unified_machine(p.first, p.second, pj);
 #ifndef SQUASH_COUNTERS
    unified.end(results);
    cy3 += results[0];
--- a/benchmark/parsingcompetition.cpp
+++ b/benchmark/parsingcompetition.cpp
@ -31,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
 bool fastjson_parse(const char *input) {
  fastjson::Token token;
  fastjson::dom::Chunk chunk;
-  std::string error_message;
  return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
 }
 // end of fastjson stuff
@ -62,7 +61,14 @@ int main(int argc, char *argv[]) {
  if(optind + 1 < argc) {
    cerr << "warning: ignoring everything after " << argv[optind  + 1] << endl;
  }
-  pair<u8 *, size_t> p = get_corpus(filename);
+  pair<u8 *, size_t> p;
+  try {
+    p = get_corpus(filename);
+  } catch (const std::exception& e) { // caught by reference to base
+    std::cout << "Could not load the file " << filename << std::endl;
+    return EXIT_FAILURE;
+  }
+  
  if (verbose) {
    std::cout << "Input has ";
    if (p.second > 1024 * 1024)
--- a/include/jsonparser/jsonioutil.h
+++ b/include/jsonparser/jsonioutil.h
@ -20,6 +20,13 @@ char * allocate_aligned_buffer(size_t length);
 // first element of the pair is a string (null terminated)
 // whereas the second element is the length.
 // caller is responsible to free (free std::pair<u8 *, size_t>.first)
+// 
+// throws an exception if the file cannot be opened, use try/catch
+//      try {
+//        p = get_corpus(filename);
+//      } catch (const std::exception& e) { 
+//        std::cout << "Could not load the file " << filename << std::endl;
+//      }
 std::pair<u8 *, size_t> get_corpus(std::string filename);

 #endif
--- a/include/jsonparser/numberparsing.h
+++ b/include/jsonparser/numberparsing.h
@ -128,7 +128,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
  const __m128i mul_1_10000 =
      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
-  const __m128i input = _mm_sub_epi8(_mm_loadu_si128((__m128i *)chars), ascii0);
+  const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
  const __m128i t3 = _mm_packus_epi32(t2, t2);
@ -149,7 +149,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
 //
 static never_inline bool
 parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
-                          ParsedJson &pj, const u32 depth, const u32 offset,
+                          ParsedJson &pj, UNUSED const u32 depth, const u32 offset,
                          UNUSED bool found_zero, bool found_minus) {
  const char *p = (const char *)(buf + offset);

@ -193,7 +193,6 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
    }
    exponent = firstafterperiod - p;
  }
-  int64_t expnumber = 0; // exponential part
  if (('e' == *p) || ('E' == *p)) {
    ++p;
    bool negexp = false;
@ -210,7 +209,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
      return false;
    }
    unsigned char digit = *p - '0';
-    expnumber = digit;
+    int64_t expnumber = digit; // exponential part
    p++;
    if (is_integer(*p)) {
      digit = *p - '0';
@ -270,7 +269,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
 //
 static never_inline bool parse_large_integer(const u8 *const buf,
                                             UNUSED size_t len, ParsedJson &pj,
-                                             const u32 depth, const u32 offset,
+                                             UNUSED const u32 depth, const u32 offset,
                                             UNUSED bool found_zero,
                                             bool found_minus) {
  const char *p = (const char *)(buf + offset);
@ -340,10 +339,12 @@ static never_inline bool parse_large_integer(const u8 *const buf,
 #define unlikely(x) __builtin_expect(!!(x), 0)
 #endif

+
+
 // parse the number at buf + offset
 // define JSON_TEST_NUMBERS for unit testing
 static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
-                                       ParsedJson &pj, const u32 depth,
+                                       ParsedJson &pj, UNUSED const u32 depth,
                                       const u32 offset, UNUSED bool found_zero,
                                       bool found_minus) {
  const char *p = (const char *)(buf + offset);
--- a/include/jsonparser/simdjson_internal.h
+++ b/include/jsonparser/simdjson_internal.h
@ -105,14 +105,14 @@ public:


    void write_tape_s64(s64 i) {
-        *((s64 *)current_number_buf_loc) = i;
-        current_number_buf_loc += 8;
+        *((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy
+        current_number_buf_loc += sizeof(s64);
        write_tape(current_number_buf_loc - number_buf, 'l');
    }

    void write_tape_double(double d) {
-        *((double *)current_number_buf_loc) = d;
-        current_number_buf_loc += 8;
+        *((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy
+        current_number_buf_loc += sizeof(double);
        write_tape(current_number_buf_loc - number_buf, 'd');
    }

@ -137,7 +137,7 @@ public:
        u32 scope_header; // the start of our current scope that contains our current location
        u32 location;     // our current location on a tape

-        ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
+        explicit ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
        // OK with default copy constructor as the way to clone the POD structure

        // some placeholder navigation. Will convert over to a more native C++-ish way of doing
@ -167,7 +167,7 @@ public:


 #ifdef DEBUG
-inline void dump256(m256 d, std::string msg) {
+inline void dump256(m256 d, const std::string msg) {
  for (u32 i = 0; i < 32; i++) {
    std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i);
    if (!((i + 1) % 8))
@ -181,14 +181,14 @@ inline void dump256(m256 d, std::string msg) {
 }

 // dump bits low to high
-inline void dumpbits(u64 v, std::string msg) {
+inline void dumpbits(u64 v, const std::string msg) {
  for (u32 i = 0; i < 64; i++) {
    std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
  }
  std::cout << " " << msg << "\n";
 }

-inline void dumpbits32(u32 v, std::string msg) {
+inline void dumpbits32(u32 v, const std::string msg) {
  for (u32 i = 0; i < 32; i++) {
    std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
  }
@ -201,14 +201,14 @@ inline void dumpbits32(u32 v, std::string msg) {
 #endif

 // dump bits low to high
-inline void dumpbits_always(u64 v, std::string msg) {
+inline void dumpbits_always(u64 v, const std::string msg) {
  for (u32 i = 0; i < 64; i++) {
    std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
  }
  std::cout << " " << msg << "\n";
 }

-inline void dumpbits32_always(u32 v, std::string msg) {
+inline void dumpbits32_always(u32 v, const std::string msg) {
  for (u32 i = 0; i < 32; i++) {
    std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
  }
--- a/include/jsonparser/stringparsing.h
+++ b/include/jsonparser/stringparsing.h
@ -58,7 +58,7 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
 }

 really_inline  bool parse_string(const u8 *buf, UNUSED size_t len,
-                                ParsedJson &pj, u32 depth, u32 offset) {
+                                ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
  using namespace std;
  const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
  u8 *dst = pj.current_string_buf_loc;
--- a/src/jsonminifier.cpp
+++ b/src/jsonminifier.cpp
@ -137,7 +137,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
      uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
          _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
      quote_mask ^= prev_iter_inside_quote;
-      prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
+      prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior
      const __m256i low_nibble_mask = _mm256_setr_epi8(
          //  0                           9  a   b  c  d
          16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
@ -220,7 +220,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
    uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
        _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
    quote_mask ^= prev_iter_inside_quote;
-    prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
+    // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore

    __m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
    __m256i mask_70 =
--- a/src/stage34_unified.cpp
+++ b/src/stage34_unified.cpp
@ -10,9 +10,9 @@
 #include <cstring>

 #include "jsonparser/common_defs.h"
-#include "jsonparser/simdjson_internal.h"
 #include "jsonparser/jsoncharutils.h"
 #include "jsonparser/numberparsing.h"
+#include "jsonparser/simdjson_internal.h"
 #include "jsonparser/stringparsing.h"

 #include <iostream>
@ -20,390 +20,430 @@
 #define PATH_SEP '/'

 #if defined(DEBUG) && !defined(DEBUG_PRINTF)
-#include <string.h>
 #include <stdio.h>
-#define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \
-                                         strrchr(__FILE__, PATH_SEP) + 1, \
-                                         __func__, __LINE__,  ## __VA_ARGS__)
+#include <string.h>
+#define DEBUG_PRINTF(format, ...)                                              \
+  printf("%s:%s:%d:" format, strrchr(__FILE__, PATH_SEP) + 1, __func__,        \
+         __LINE__, ##__VA_ARGS__)
 #elif !defined(DEBUG_PRINTF)
-#define DEBUG_PRINTF(format, ...) do { } while(0)
+#define DEBUG_PRINTF(format, ...)                                              \
+  do {                                                                         \
+  } while (0)
 #endif

 using namespace std;

 WARN_UNUSED
-really_inline bool is_valid_true_atom(const u8 * loc) {
-    u64 tv = *(const u64 *)"true    ";
-    u64 mask4 = 0x00000000ffffffff;
-    u32 error = 0;
-    u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
-    std::memcpy(&locval, loc, sizeof(u64));
-    error = (locval & mask4) ^ tv;
-    error |= is_not_structural_or_whitespace(loc[4]);
-    return error == 0;
+really_inline bool is_valid_true_atom(const u8 *loc) {
+  u64 tv = *(const u64 *)"true    ";
+  u64 mask4 = 0x00000000ffffffff;
+  u32 error = 0;
+  u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
+  std::memcpy(&locval, loc, sizeof(u64));
+  error = (locval & mask4) ^ tv;
+  error |= is_not_structural_or_whitespace(loc[4]);
+  return error == 0;
 }

 WARN_UNUSED
-really_inline bool is_valid_false_atom(const u8 * loc) {
-    u64 fv = *(const u64 *)"false   ";
-    u64 mask5 = 0x000000ffffffffff;
-    u32 error = 0;
-    u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
-    std::memcpy(&locval, loc, sizeof(u64));
-    error = (locval & mask5) ^ fv;
-    error |= is_not_structural_or_whitespace(loc[5]);
-    return error == 0;
+really_inline bool is_valid_false_atom(const u8 *loc) {
+  u64 fv = *(const u64 *)"false   ";
+  u64 mask5 = 0x000000ffffffffff;
+  u32 error = 0;
+  u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
+  std::memcpy(&locval, loc, sizeof(u64));
+  error = (locval & mask5) ^ fv;
+  error |= is_not_structural_or_whitespace(loc[5]);
+  return error == 0;
 }

 WARN_UNUSED
-really_inline bool is_valid_null_atom(const u8 * loc) {
-    u64 nv = *(const u64 *)"null    ";
-    u64 mask4 = 0x00000000ffffffff;
-    u32 error = 0;
-    u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
-    std::memcpy(&locval, loc, sizeof(u64));
-    error = (locval & mask4) ^ nv;
-    error |= is_not_structural_or_whitespace(loc[4]);
-    return error == 0;
+really_inline bool is_valid_null_atom(const u8 *loc) {
+  u64 nv = *(const u64 *)"null    ";
+  u64 mask4 = 0x00000000ffffffff;
+  u32 error = 0;
+  u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
+  std::memcpy(&locval, loc, sizeof(u64));
+  error = (locval & mask4) ^ nv;
+  error |= is_not_structural_or_whitespace(loc[4]);
+  return error == 0;
 }

-// Implemented using Labels as Values which works in GCC and CLANG (and maybe also in Intel's compiler),
-// but won't work in MSVC. This would need to be reimplemented differently
-// if one wants to be standard compliant.
+// Implemented using Labels as Values which works in GCC and CLANG (and maybe
+// also in Intel's compiler), but won't work in MSVC. This would need to be
+// reimplemented differently if one wants to be standard compliant.
 WARN_UNUSED
 bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
-    u32 i = 0; // index of the structural character (0,1,2,3...)
-    u32 idx; // location of the structural character in the input (buf)
-    u8 c; // used to track the (structural) character we are looking at, updated by UPDATE_CHAR macro
-    u32 depth = 0;//START_DEPTH; // an arbitrary starting depth
-    //void * ret_address[MAX_DEPTH]; // used to store "labels as value" (non-standard compiler extension)
-
-    // a call site is the start of either an object or an array ('[' or '{')
-    // this is the location of the previous call site 
-    // (in the tape, at the given depth); 
-    // we only need one.
-
-    // We should also track the tape address of our containing
-    // scope for two reasons. First, we will need to put an 
-    // up pointer there at each call site so we can navigate
-    // upwards. Second, when we encounter the end of the scope
-    // we can put the current offset into a record for the 
-    // scope so we know where it is
-
-    //u32 containing_scope_offset[MAX_DEPTH];
-
-    pj.init();
-
-    // add a sentinel to the end to avoid premature exit
-    // need to be able to find the \0 at the 'padded length' end of the buffer
-    // FIXME: TERRIFYING!
-    //size_t j;
-    //for (j = len; buf[j] != 0; j++)
-    //    ;
-    //pj.structural_indexes[pj.n_structural_indexes++] = j;
-
+  u32 i = 0; // index of the structural character (0,1,2,3...)
+  u32 idx;   // location of the structural character in the input (buf)
+  u8 c; // used to track the (structural) character we are looking at, updated
+        // by UPDATE_CHAR macro
+  u32 depth = 0; // could have an arbitrary starting depth
+  pj.init();
 // this macro reads the next structural character, updating idx, i and c.
-#define UPDATE_CHAR() { idx = pj.structural_indexes[i++]; c = buf[idx]; DEBUG_PRINTF("Got %c at %d (%d offset)\n", c, idx, i-1);}
+#define UPDATE_CHAR()                                                          \
+  {                                                                            \
+    idx = pj.structural_indexes[i++];                                          \
+    c = buf[idx];                                                              \
+    DEBUG_PRINTF("Got %c at %d (%d offset) (depth %d)\n", c, idx, i - 1,       \
+                 depth);                                                       \
+  }


-
-
-
-
-////////////////////////////// START STATE /////////////////////////////
-printf("at start\n");
-    DEBUG_PRINTF("at start\n");
-    pj.ret_address[depth] = &&start_continue; 
-    pj.containing_scope_offset[depth] = pj.get_current_loc(); 
-    pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
-    depth++;// everything starts at depth = 1, depth = 0 is just for the root
-    if(depth > pj.depthcapacity) {
-        goto fail;
-    }
-    printf("got char %c \n",c);
-    UPDATE_CHAR();
-    switch (c) {
-        case '{': goto object_begin;
-        case '[': goto array_begin;
+  ////////////////////////////// START STATE /////////////////////////////
+  DEBUG_PRINTF("at start\n");
+  pj.ret_address[depth] = &&start_continue;
+  pj.containing_scope_offset[depth] = pj.get_current_loc();
+  pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
+  depth++; // everything starts at depth = 1, depth = 0 is just for the root
+  if (depth > pj.depthcapacity) {
+    goto fail;
+  }
+  UPDATE_CHAR();
+  switch (c) {
+  case '{':
+    goto object_begin;
+  case '[':
+    goto array_begin;
 #define SIMDJSON_ALLOWANYTHINGINROOT
-   // A JSON text is a serialized value.  Note that certain previous
-   // specifications of JSON constrained a JSON text to be an object or an
-   // array.  Implementations that generate only objects or arrays where a
-   // JSON text is called for will be interoperable in the sense that all
-   // implementations will accept these as conforming JSON texts.
-   // https://tools.ietf.org/html/rfc8259
+    // A JSON text is a serialized value.  Note that certain previous
+    // specifications of JSON constrained a JSON text to be an object or an
+    // array.  Implementations that generate only objects or arrays where a
+    // JSON text is called for will be interoperable in the sense that all
+    // implementations will accept these as conforming JSON texts.
+    // https://tools.ietf.org/html/rfc8259
 #ifdef SIMDJSON_ALLOWANYTHINGINROOT
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            goto start_continue;
-        }
-        case 't': 
-            if (!is_valid_true_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            goto start_continue;
-        case 'f': 
-            if (!is_valid_false_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            goto start_continue;
-        case 'n': 
-            if (!is_valid_null_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            goto start_continue;
-        case '0': {
-            if (!parse_number(buf, len, pj, depth, idx, true, false)) {
-                goto fail;
-            }
-            goto start_continue;
-        }
-        case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':  {
-            if (!parse_number(buf, len, pj, depth, idx, false, false)) {
-                goto fail;
-            }
-            goto start_continue;
-        }
-        case '-': {
-            if (!parse_number(buf, len, pj, depth, idx, false, true)) {
-                goto fail;
-            }
-            goto start_continue;
-        }
-#endif // ALLOWANYTHINGINROOT
-        default: goto fail;
+  case '"': {
+    if (!parse_string(buf, len, pj, depth, idx)) {
+      goto fail;
    }
+    break;
+  }
+  case 't':
+    if (!is_valid_true_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break;
+  case 'f':
+    if (!is_valid_false_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break;
+  case 'n':
+    if (!is_valid_null_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break;
+  case '0': {
+    if (!parse_number(buf, len, pj, depth, idx, true, false)) {
+      goto fail;
+    }
+
+    break;
+  }
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9': {
+    if (!parse_number(buf, len, pj, depth, idx, false, false)) {
+      goto fail;
+    }
+    break;
+  }
+  case '-': {
+    if (!parse_number(buf, len, pj, depth, idx, false, true)) {
+      goto fail;
+    }
+    break;
+  }
+#endif // ALLOWANYTHINGINROOT
+  default:
+    goto fail;
+  }
+#ifdef SIMDJSON_ALLOWANYTHINGINROOT
+  depth--; // for fall-through cases (e.g., documents containing just a string)
+#endif     // ALLOWANYTHINGINROOT

 start_continue:
-    DEBUG_PRINTF("in start_object_close\n");
-    UPDATE_CHAR();
-    switch (c) {
-        case 0: goto succeed;
-        default: goto fail;
-    }
+  DEBUG_PRINTF("in start_object_close\n");
+  UPDATE_CHAR();
+  switch (c) {
+  case 0:
+    goto succeed;
+  default:
+    goto fail;
+  }

-////////////////////////////// OBJECT STATES /////////////////////////////
+  ////////////////////////////// OBJECT STATES /////////////////////////////

 object_begin:
-    printf("in object_begin %c \n",c);
-    DEBUG_PRINTF("in object_begin\n");
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.write_tape(0, c); 
-    depth ++;
-    if(depth > pj.depthcapacity) {
-        goto fail;
-    }
-    UPDATE_CHAR();
-    switch (c) {
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            goto object_key_state;
-        }
-        case '}': goto scope_end;
-        default: goto fail;
+  DEBUG_PRINTF("in object_begin\n");
+  pj.containing_scope_offset[depth] = pj.get_current_loc();
+  pj.write_tape(0, c);
+
+  UPDATE_CHAR();
+  switch (c) {
+  case '"': {
+    if (!parse_string(buf, len, pj, depth, idx)) {
+      goto fail;
    }
+    goto object_key_state;
+  }
+  case '}':
+    goto scope_end; // could also go to object_continue
+  default:
+    goto fail;
+  }

 object_key_state:
-    printf("in object_key_state %c \n",c);
+  DEBUG_PRINTF("in object_key_state\n");
+  UPDATE_CHAR();
+  if (c != ':') {
+    goto fail;
+  }
+  UPDATE_CHAR();
+  switch (c) {
+  case '"': {
+    if (!parse_string(buf, len, pj, depth, idx)) {
+      goto fail;
+    }
+    break;
+  }
+  case 't':
+    if (!is_valid_true_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break;
+  case 'f':
+    if (!is_valid_false_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break;
+  case 'n':
+    if (!is_valid_null_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break;
+  case '0': {
+    if (!parse_number(buf, len, pj, depth, idx, true, false)) {
+      goto fail;
+    }
+    break;
+  }
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9': {
+    if (!parse_number(buf, len, pj, depth, idx, false, false)) {
+      goto fail;
+    }
+    break;
+  }
+  case '-': {
+    if (!parse_number(buf, len, pj, depth, idx, false, true)) {
+      goto fail;
+    }
+    break;
+  }
+  case '{': {
+    // we have not yet encountered } so we need to come back for it
+    pj.ret_address[depth] = &&object_continue;
+    // we found an object inside an object, so we need to increment the depth
+    depth++;
+    if (depth > pj.depthcapacity) {
+      goto fail;
+    }

-    DEBUG_PRINTF("in object_key_state\n");
-    UPDATE_CHAR();
-    if (c != ':') {
-        goto fail;
-    }
-    UPDATE_CHAR();
-    switch (c) {
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            break;
-        }
-        case 't': if (!is_valid_true_atom(buf + idx)) {
-                    goto fail;
-                  }
-                  pj.write_tape(0, c);
-                  break;
-        case 'f': if (!is_valid_false_atom(buf + idx)) {
-                    goto fail;
-                  }
-                  pj.write_tape(0, c);
-                  break;
-        case 'n': if (!is_valid_null_atom(buf + idx)) {
-                    goto fail;
-                  }
-                  pj.write_tape(0, c);
-                  break;
-        case '0': {
-            if (!parse_number(buf, len, pj, depth, idx, true, false)) {
-                goto fail;
-            }
-            break;
-        }
-        case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':  {
-            if (!parse_number(buf, len, pj, depth, idx, false, false)) {
-                goto fail;
-            }
-            break;
-        }
-        case '-': {
-            if (!parse_number(buf, len, pj, depth, idx, false, true)) {
-                goto fail;
-            }
-            break;
-        }
-        case '{': {
-            pj.ret_address[depth] = &&object_continue; 
-            goto object_begin;
-        }
-        case '[': {
-            pj.ret_address[depth] = &&object_continue; 
-            goto array_begin;
-        }
-        default: goto fail;
+    goto object_begin;
+  }
+  case '[': {
+    // we have not yet encountered } so we need to come back for it
+    pj.ret_address[depth] = &&object_continue;
+    // we found an array inside an object, so we need to increment the depth
+    depth++;
+    if (depth > pj.depthcapacity) {
+      goto fail;
    }
+    goto array_begin;
+  }
+  default:
+    goto fail;
+  }

 object_continue:
-    printf("in object_continue %c \n",c);
-
-    DEBUG_PRINTF("in object_continue\n");
+  DEBUG_PRINTF("in object_continue\n");
+  UPDATE_CHAR();
+  switch (c) {
+  case ',':
    UPDATE_CHAR();
-    switch (c) {
-        case ',': 
-            UPDATE_CHAR();
-            if (c != '"') {
-                goto fail;
-            } else {
-                if (!parse_string(buf, len, pj, depth, idx)) {
-                    goto fail;
-                }
-                goto object_key_state;
-            }
-        case '}': goto scope_end;
-        default: goto fail;
-    }
-
-////////////////////////////// COMMON STATE /////////////////////////////
-
-scope_end: 
-    // write our tape location to the header scope
-    depth--;
-    pj.write_tape(pj.containing_scope_offset[depth], c);
-    pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
-    // goto saved_state
-    goto *pj.ret_address[depth];
-
-    
-////////////////////////////// ARRAY STATES /////////////////////////////
-
-array_begin:
-    printf("in array_begin %c \n",c);
-
-    DEBUG_PRINTF("in array_begin\n");
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.write_tape(0, c); 
-    depth ++;
-    if(depth > pj.depthcapacity) {
+    if (c != '"') {
+      goto fail;
+    } else {
+      if (!parse_string(buf, len, pj, depth, idx)) {
        goto fail;
+      }
+      goto object_key_state;
    }
-    UPDATE_CHAR();
-    if (c == ']') {
-        goto scope_end;
-    }
+  case '}':
+    goto scope_end;
+  default:
+    goto fail;
+  }
+
+  ////////////////////////////// COMMON STATE /////////////////////////////
+
+scope_end:
+  // write our tape location to the header scope
+  depth--;
+  pj.write_tape(pj.containing_scope_offset[depth], c);
+  pj.annotate_previousloc(pj.containing_scope_offset[depth],
+                          pj.get_current_loc());
+  // goto saved_state
+  goto *pj.ret_address[depth];
+
+  ////////////////////////////// ARRAY STATES /////////////////////////////
+array_begin:
+  DEBUG_PRINTF("in array_begin\n");
+  pj.containing_scope_offset[depth] = pj.get_current_loc();
+  pj.write_tape(0, c);
+  UPDATE_CHAR();
+  if (c == ']') {
+    goto scope_end; // could also go to array_continue
+  }

 main_array_switch:
-    // we call update char on all paths in, so we can peek at c on the
-    // on paths that can accept a close square brace (post-, and at start)
-    switch (c) {
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            goto array_continue;
-        }
-        case 't': if (!is_valid_true_atom(buf + idx)) {
-                    goto fail;
-                  }
-                  pj.write_tape(0, c);
-                  break;
-        case 'f': if (!is_valid_false_atom(buf + idx)) {
-                    goto fail;
-                  }
-                  pj.write_tape(0, c);
-                  break;
-        case 'n': if (!is_valid_null_atom(buf + idx)) {
-                    goto fail;
-                  }
-                  pj.write_tape(0, c);
-                  break;
-        
-        case '0': {
-            if (!parse_number(buf, len, pj, depth, idx, true, false)) {
-                goto fail;
-            }
-            break;
-        }
-        case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':  {
-            if (!parse_number(buf, len, pj, depth, idx, false, false)) {
-                goto fail;
-            }
-            break;
-        }
-        case '-': {
-            if (!parse_number(buf, len, pj, depth, idx, false, true)) {
-                goto fail;
-            }
-            break;
-        }
-        case '{': {
-            pj.ret_address[depth] = &&array_continue; 
-            goto object_begin;
-        }
-        case '[': {
-            pj.ret_address[depth] = &&array_continue; 
-            goto array_begin;
-        }
-        default: goto fail;
+  // we call update char on all paths in, so we can peek at c on the
+  // on paths that can accept a close square brace (post-, and at start)
+  switch (c) {
+  case '"': {
+    if (!parse_string(buf, len, pj, depth, idx)) {
+      goto fail;
    }
+    break;
+  }
+  case 't':
+    if (!is_valid_true_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break; 
+  case 'f':
+    if (!is_valid_false_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break; 
+  case 'n':
+    if (!is_valid_null_atom(buf + idx)) {
+      goto fail;
+    }
+    pj.write_tape(0, c);
+    break; // goto array_continue;
+
+  case '0': {
+    if (!parse_number(buf, len, pj, depth, idx, true, false)) {
+      goto fail;
+    }
+    break; // goto array_continue;
+  }
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9': {
+    if (!parse_number(buf, len, pj, depth, idx, false, false)) {
+      goto fail;
+    }
+    break; // goto array_continue;
+  }
+  case '-': {
+    if (!parse_number(buf, len, pj, depth, idx, false, true)) {
+      goto fail;
+    }
+    break; // goto array_continue;
+  }
+  case '{': {
+    // we have not yet encountered ] so we need to come back for it
+    pj.ret_address[depth] = &&array_continue;
+
+    // we found an object inside an array, so we need to increment the depth
+    depth++;
+    if (depth > pj.depthcapacity) {
+      goto fail;
+    }
+
+    goto object_begin;
+  }
+  case '[': {
+    // we have not yet encountered ] so we need to come back for it
+    pj.ret_address[depth] = &&array_continue;
+
+    // we found an array inside an array, so we need to increment the depth
+    depth++;
+    if (depth > pj.depthcapacity) {
+      goto fail;
+    }
+
+    goto array_begin;
+  }
+  default:
+    goto fail;
+  }

 array_continue:
-    printf("in array_begin %c \n",c);
-
-    DEBUG_PRINTF("in array_continue\n");
+  DEBUG_PRINTF("in array_continue\n");
+  UPDATE_CHAR();
+  switch (c) {
+  case ',':
    UPDATE_CHAR();
-    switch (c) {
-        case ',': UPDATE_CHAR(); goto main_array_switch;
-        case ']': goto scope_end;
-        default: goto fail;
-    }
+    goto main_array_switch;
+  case ']':
+    goto scope_end;
+  default:
+    goto fail;
+  }

-////////////////////////////// FINAL STATES /////////////////////////////
+  ////////////////////////////// FINAL STATES /////////////////////////////

 succeed:
-    DEBUG_PRINTF("in succeed\n");
-    // we annotate the root node
-    depth--;
-    // next line allows us to go back to the start
-    pj.write_tape(pj.containing_scope_offset[depth], 'r');// r is root
-    // next line tells the root node how to go to the end
-    pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
+  DEBUG_PRINTF("in succeed, depth = %d \n", depth);
+  // we annotate the root node
+  // depth--;
+  // next line allows us to go back to the start
+  pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
+  // next line tells the root node how to go to the end
+  pj.annotate_previousloc(pj.containing_scope_offset[depth],
+                          pj.get_current_loc());

 #ifdef DEBUG
-    pj.dump_tapes();
+  pj.dump_tapes();
 #endif
-    return true;
-    
+  return true;
+
 fail:
-    DEBUG_PRINTF("in fail\n");
+  DEBUG_PRINTF("in fail\n");
 #ifdef DEBUG
-    pj.dump_tapes();
+  pj.dump_tapes();
 #endif
-    return false;    
+  return false;
 }
--- a/tests/allparserscheckfile.cpp
+++ b/tests/allparserscheckfile.cpp
@ -1,3 +1,4 @@
+#include <unistd.h>

 #include "jsonparser/jsonparser.h"

@ -30,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
 bool fastjson_parse(const char *input) {
  fastjson::Token token;
  fastjson::dom::Chunk chunk;
-  std::string error_message;
  return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
 }
 // end of fastjson stuff
@ -41,17 +41,30 @@ using namespace rapidjson;
 using namespace std;

 int main(int argc, char *argv[]) {
-  if (argc < 2) {
+  bool verbose = false;
+    int c;
+  while ((c = getopt (argc, argv, "v")) != -1)
+    switch (c)
+      {
+      case 'v':
+        verbose = true;
+        break;
+      default:
+        abort ();
+      }
+  if (optind >= argc) {
    cerr << "Usage: " << argv[0] << " <jsonfile>\n";
    cerr << "Or " << argv[0] << " -v <jsonfile>\n";
    exit(1);
  }
-  bool verbose = false;
-  if (argc > 2) {
-    if (strcmp(argv[1], "-v"))
-      verbose = true;
+  const char * filename = argv[optind];
+  std::pair<u8 *, size_t> p;
+  try {
+    p = get_corpus(filename);
+  } catch (const std::exception& e) { // caught by reference to base
+    std::cout << "Could not load the file " << filename << std::endl;
+    return EXIT_FAILURE;
  }
-  pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
  if (verbose) {
    std::cout << "Input has ";
    if (p.second > 1024 * 1024)
--- a/tests/jsoncheck.cpp
+++ b/tests/jsoncheck.cpp
@ -5,6 +5,7 @@
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>

 #include "jsonparser/jsonparser.h"

@ -41,6 +42,8 @@ bool validate(const char *dirname) {
    printf("nothing in dir %s \n", dirname);
    return false;
  }
+  bool * isfileasexpected = new bool[c];
+  for(int i = 0; i < c; i++) isfileasexpected[i] = true;
  size_t howmany = 0;
  bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
  for (int i = 0; i < c; i++) {
@ -56,7 +59,13 @@ bool validate(const char *dirname) {
      } else {
        strcpy(fullpath + dirlen, name);
      }
-      std::pair<u8 *, size_t> p = get_corpus(fullpath);
+      std::pair<u8 *, size_t> p;
+      try {
+        p = get_corpus(fullpath);
+      } catch (const std::exception& e) { 
+        std::cout << "Could not load the file " << fullpath << std::endl;
+        return EXIT_FAILURE;
+      }
      ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
      if(pj_ptr == NULL) {
        std::cerr<< "can't allocate memory"<<std::endl;
@ -70,11 +79,13 @@ bool validate(const char *dirname) {
        howmany--;
      } else if (startsWith("pass", name)) {
        if (!isok) {
+          isfileasexpected[i] = false;
          printf("warning: file %s should pass but it fails.\n", name);
          everythingfine = false;
        }
      } else if (startsWith("fail", name)) {
        if (isok) {
+          isfileasexpected[i] = false;
          printf("warning: file %s should fail but it passes.\n", name);
          everythingfine = false;
        }
@ -87,11 +98,20 @@ bool validate(const char *dirname) {
      deallocate_ParsedJson(pj_ptr);
    }
  }
+  printf("%zu files checked.\n", howmany);
+  if(everythingfine) {
+    printf("All ok!\n");
+  } else {
+    printf("There were problems! Consider reviewing the following files:\n");
+    for(int i = 0; i < c; i++) {
+      if(!isfileasexpected[i]) printf("%s \n", entry_list[i]->d_name);
+    }
+  }
  for (int i = 0; i < c; ++i)
    free(entry_list[i]);
  free(entry_list);
-  printf("%zu files checked.\n", howmany);
-  if(everythingfine) printf("All ok!\n");
+  delete[] isfileasexpected;
+
  return everythingfine;
 }

--- a/tests/numberparsingcheck.cpp
+++ b/tests/numberparsingcheck.cpp
@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
  size_t lenpre = strlen(pre), lenstr = strlen(str);
  return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
 }
-bool is_in_bad_list(char *buf) {
+bool is_in_bad_list(const char *buf) {
  for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
    if (startsWith(really_bad[i], buf))
      return true;
@ -38,9 +38,9 @@ bool is_in_bad_list(char *buf) {
 inline void foundInvalidNumber(const u8 *buf) {
  invalid_count++;
  char *endptr;
-  double expected = strtod((char *)buf, &endptr);
-  if (endptr != (char *)buf) {
-    if (!is_in_bad_list((char *)buf)) {
+  double expected = strtod((const char *)buf, &endptr);
+  if (endptr != (const char *)buf) {
+    if (!is_in_bad_list((const char *)buf)) {
      printf(
          "Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
          buf, expected);
@ -53,8 +53,8 @@ inline void foundInvalidNumber(const u8 *buf) {
 inline void foundInteger(int64_t result, const u8 *buf) {
  int_count++;
  char *endptr;
-  long long expected = strtoll((char *)buf, &endptr, 10);
-  if ((endptr == (char *)buf) || (expected != result)) {
+  long long expected = strtoll((const char *)buf, &endptr, 10);
+  if ((endptr == (const char *)buf) || (expected != result)) {
    printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
    printf(" while parsing %s \n", fullpath);
    parse_error |= PARSE_ERROR;
@ -64,8 +64,8 @@ inline void foundInteger(int64_t result, const u8 *buf) {
 inline void foundFloat(double result, const u8 *buf) {
  char *endptr;
  float_count++;
-  double expected = strtod((char *)buf, &endptr);
-  if (endptr == (char *)buf) {
+  double expected = strtod((const char *)buf, &endptr);
+  if (endptr == (const char *)buf) {
    printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
           result, buf);
    printf(" while parsing %s \n", fullpath);
@ -123,7 +123,13 @@ bool validate(const char *dirname) {
      } else {
        strcpy(fullpath + dirlen, name);
      }
-      std::pair<u8 *, size_t> p = get_corpus(fullpath);
+      std::pair<u8 *, size_t> p;
+      try {
+        p = get_corpus(fullpath);
+      } catch (const std::exception& e) { 
+        std::cout << "Could not load the file " << fullpath << std::endl;
+        return EXIT_FAILURE;
+      }
      // terrible hack but just to get it working
      ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
      if (pj_ptr == NULL) {
--- a/tests/stringparsingcheck.cpp
+++ b/tests/stringparsingcheck.cpp
@ -241,7 +241,7 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
    // we have a zero-length string
    if (parsed_begin != parsed_end) {
      printf("WARNING: We have a zero-length but gap is %zu \n",
-             parsed_end - parsed_begin);
+             (size_t)(parsed_end - parsed_begin));
      probable_bug = true;
    }
    empty_string++;
@ -252,12 +252,12 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
    printf("WARNING: lengths on parsed strings disagree %zu %zu \n", thislen,
           len);
    printf("\nour parsed string  : '%*s'\n\n", (int)thislen,
-           (char *)parsed_begin);
-    print_hex((char *)parsed_begin, thislen);
+           (const char *)parsed_begin);
+    print_hex((const char *)parsed_begin, thislen);
    printf("\n");

    printf("reference parsing   :'%*s'\n\n", (int)len, bigbuffer);
-    print_hex((char *)bigbuffer, len);
+    print_hex((const char *)bigbuffer, len);
    printf("\n");

    probable_bug = true;
@ -267,15 +267,15 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
    printf("Lengths %zu %zu  \n", thislen, len);

    printf("\nour parsed string  : '%*s'\n", (int)thislen,
-           (char *)parsed_begin);
-    print_hex((char *)parsed_begin, thislen);
+           (const char *)parsed_begin);
+    print_hex((const char *)parsed_begin, thislen);
    printf("\n");

    printf("reference parsing   :'%*s'\n", (int)len, bigbuffer);
-    print_hex((char *)bigbuffer, len);
+    print_hex((const char *)bigbuffer, len);
    printf("\n");

-    print_cmp_hex((char *)parsed_begin, bigbuffer, thislen);
+    print_cmp_hex((const char *)parsed_begin, bigbuffer, thislen);

    probable_bug = true;
  }
@ -325,8 +325,13 @@ bool validate(const char *dirname) {
      } else {
        strcpy(fullpath + dirlen, name);
      }
-      std::pair<u8 *, size_t> p = get_corpus(fullpath);
-      // terrible hack but just to get it working
+      std::pair<u8 *, size_t> p;
+      try {
+        p = get_corpus(fullpath);
+      } catch (const std::exception& e) { 
+        std::cout << "Could not load the file " << fullpath << std::endl;
+        return EXIT_FAILURE;
+      }      
      ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
      if (pj_ptr == NULL) {
        std::cerr << "can't allocate memory" << std::endl;