Ok. Looks complete.

2018-12-14 21:32:42 -05:00 · 2018-12-14 21:32:42 -05:00 · 0769c39e27
parent c127570c83
commit 0769c39e27
8 changed files with 321 additions and 191 deletions
--- a/33
+++ b/33
@ -5,9 +5,9 @@
 .PHONY: clean cleandist
-
+COREDEPSINCLUDE = -Idependencies/rapidjson/include -Idependencies/sajson/include 
-DEPSINCLUDE = -Idependencies/rapidjson/include -Idependencies/sajson/include -Idependencies/json11 -Idependencies/fastjson/src -Idependencies/fastjson/include -Idependencies/gason/src -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
+EXTRADEPSINCLUDE =  -Idependencies/json11 -Idependencies/fastjson/src -Idependencies/fastjson/include -Idependencies/gason/src -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
-CXXFLAGS =  -std=c++17  -march=native -Wall -Wextra -Wshadow -Iinclude  -Ibenchmark/linux  $(DEPSINCLUDE)    
+CXXFLAGS =  -std=c++17  -march=native -Wall -Wextra -Wshadow -Iinclude  -Ibenchmark/linux      
 CFLAGS = -march=native  -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
 ifeq ($(SANITIZE),1)
 	CXXFLAGS += -g3 -O0  -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined
@ -24,7 +24,7 @@ endif
 MAINEXECUTABLES=parse minify json2json
 TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck 
-COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition  allparserscheckfile
+COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile
 HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
 LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp     src/stage2_flatten.cpp        src/stage34_unified.cpp
@ -40,9 +40,11 @@ GASON_INCLUDE:=dependencies/gason/src/gason.h
 UJSON4C_INCLUDE:=dependencies/ujson4c/src/ujdecode.c
 LIBS=$(RAPIDJSON_INCLUDE) $(SAJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJSON_INCLUDE) $(GASON_INCLUDE) $(UJSON4C_INCLUDE)
-OBJECTS=ujdecode.o
+EXTRAOBJECTS=ujdecode.o
 all:  $(MAINEXECUTABLES)
 competition:  $(COMPARISONEXECUTABLES)
 test: jsoncheck numberparsingcheck stringparsingcheck
 	./numberparsingcheck
 	./stringparsingcheck
@ -91,7 +93,7 @@ stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
 minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
-	$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) $(MINIFIERLIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS)
+	$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) $(MINIFIERLIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
 minify: tools/minify.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
 	$(CXX) $(CXXFLAGS) -o minify $(MINIFIERLIBFILES) $(LIBFILES) tools/minify.cpp -I. 
@ -103,15 +105,18 @@ json2json: tools/json2json.cpp $(HEADERS) $(LIBFILES)
 ujdecode.o: $(UJSON4C_INCLUDE)
 	$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c 
-parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
+parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES)  
-	$(CXX) $(CXXFLAGS)  -o parseandstatcompetition $(LIBFILES) benchmark/parseandstatcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
+	$(CXX) $(CXXFLAGS)  -o parseandstatcompetition $(LIBFILES) benchmark/parseandstatcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
 distinctuseridcompetition: benchmark/distinctuseridcompetition.cpp $(HEADERS) $(LIBFILES) 
 	$(CXX) $(CXXFLAGS)  -o distinctuseridcompetition $(LIBFILES) benchmark/distinctuseridcompetition.cpp  -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
-parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
+parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(EXTRAOBJECTS) 
-	$(CXX) $(CXXFLAGS)  -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
+	$(CXX) $(CXXFLAGS)  -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(EXTRAOBJECTS) -I. $(LIBFLAGS) $(COREDEPSINCLUDE) $(EXTRADEPSINCLUDE)
-allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
+allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(EXTRAOBJECTS) 
-	$(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp $(OBJECTS) -I. $(LIBFLAGS)
+	$(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp $(EXTRAOBJECTS) -I. $(LIBFLAGS) $(COREDEPSINCLUDE) $(EXTRADEPSINCLUDE)
 parsehisto: benchmark/parse.cpp  $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
@ -121,7 +126,7 @@ cppcheck:
 clean:
-	rm -f $(OBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
+	rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
 cleandist:
-	rm -f $(OBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
+	rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
--- a/README.md
+++ b/README.md
@ -90,6 +90,7 @@ To simplify the engineering, we make some assumptions.
 - We assume AVX2 support which is available in all recent mainstream x86 processors produced by AMD and Intel. No support for non-x86 processors is included though it can be done. We plan to support ARM processors (help is invited).
 - We only support GNU GCC and LLVM Clang at this time. There is no support for Microsoft Visual Studio, though it should not be difficult (help is invited).
 - In cases of failure, we just report a failure without any indication as to the nature of the problem. (This can be easily improved without affecting performance.)
 - As allowed by the specification, we allow repeated keys within an object (other parsers like sajson do the same).
 *We do not aim to provide a general-purpose JSON library.* A library like RapidJSON offers much more than just parsing, it helps you generate JSON and offers various other convenient functions. We merely parse the document.
@ -97,7 +98,7 @@ To simplify the engineering, we make some assumptions.
 ## Features
 - The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers.
+- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers in [-9223372036854775808,9223372036854775808). Among the parsers  that differentiate between integers and floating-point numbers, not all support 64-bit integers. (For example, sajson stores integers larger than 2147483648 as floating-point numbers.)
 - We do full UTF-8 validation as part of the parsing. (Parsers like fastjson, gason and dropbox json11 do not do UTF-8 validation.)
 - We fully validate the numbers. (Parsers like gason and ultranjson will accept `[0e+]` as valid JSON.)
 - We validate string content for unescaped characters. (Parsers like fastjson and ultrajson accept unescaped line breaks and tags in strings.)
@ -111,6 +112,102 @@ The parser works in three stages:
 - Stage 3. (Structure building) Involves constructing a "tree" of sort to navigate through the data. Strings and numbers are parsed at this stage.
 ## Navigating the parsed document
 Here is a code sample to dump back the parsed JSON to a string:
 ```c
    ParsedJson::iterator pjh(pj);
    if (!pjh.isOk()) {
      std::cerr << " Could not iterate parsed result. " << std::endl;
      return EXIT_FAILURE;
    }
    compute_dump(pj);
    //
    // where compute_dump is :
 void compute_dump(ParsedJson::iterator &pjh) {
  if (pjh.is_object()) {
    std::cout << "{";
    if (pjh.down()) {
      pjh.print(std::cout); // must be a string
      std::cout << ":";
      pjh.next();
      compute_dump(pjh); // let us recurse
      while (pjh.next()) {
        std::cout << ",";
        pjh.print(std::cout);
        std::cout << ":";
        pjh.next();
        compute_dump(pjh); // let us recurse
      }
      pjh.up();
    }
    std::cout << "}";
  } else if (pjh.is_array()) {
    std::cout << "[";
    if (pjh.down()) {
      compute_dump(pjh); // let us recurse
      while (pjh.next()) {
        std::cout << ",";
        compute_dump(pjh); // let us recurse
      }
      pjh.up();
    }
    std::cout << "]";
  } else {
    pjh.print(std::cout); // just print the lone value
  }
 }
 ```
 The following function will find all user.id integers:
 ```C
 void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
  switch (i.get_type()) {
  case '{':
    if (i.down()) {
      do {
        bool founduser = equals(i.get_string(), "user");
        i.next(); // move to value
        if (i.is_object()) {
          if (founduser && i.move_to_key("id")) {
            if (i.is_integer()) {
              answer.push_back(i.get_integer());
            }
            i.up();
          }
          simdjson_traverse(answer, i);
        } else if (i.is_array()) {
          simdjson_traverse(answer, i);
        }
      } while (i.next());
      i.up();
    }
    break;
  case '[':
    if (i.down()) {
      do {
        if (i.is_object_or_array()) {
          simdjson_traverse(answer, i);
        }
      } while (i.next());
      i.up();
    }
    break;
  case 'l':
  case 'd':
  case 'n':
  case 't':
  case 'f':
  default:
    break;
  }
 }
 ```
 ## Various References
 - [Google double-conv](https://github.com/google/double-conversion/)
--- a/benchmark/distinctuseridcompetition.cpp
+++ b/benchmark/distinctuseridcompetition.cpp
@ -1,5 +1,7 @@
 #include "simdjson/jsonparser.h"
 #include <algorithm>
 #include <unistd.h>
 #include <vector>
 #include "benchmark.h"
@ -26,108 +28,112 @@ name;
 #include "sajson.h"
 #include "fastjson.cpp"
 #include "fastjson_dom.cpp"
 #include "gason.cpp"
 #include "json11.cpp"
 #include "sajson.h"
 extern "C" {
 #include "ujdecode.h"
 #include "ultrajsondec.c"
 }
 using namespace rapidjson;
 using namespace std;
 bool equals(const char *s1, const char *s2) { return strcmp(s1, s2) == 0; }
 void remove_duplicates(vector<int64_t> &v) {
  std::sort(v.begin(), v.end());
  auto last = std::unique(v.begin(), v.end());
  v.erase(last, v.end());
 }
 void print_vec(vector<int64_t> &v) {
  for (auto i : v) {
    std::cout << i << " ";
  }
  std::cout << std::endl;
 }
 void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
  switch (i.get_type()) {
  case '{':
    if (i.down()) {
      do {
        bool founduser = equals(i.get_string(), "user");
        i.next(); // move to value
        if (i.is_object()) {
          if (founduser && i.move_to_key("id")) {
            if (i.is_integer()) {
              answer.push_back(i.get_integer());
            }
            i.up();
          }
          simdjson_traverse(answer, i);
        } else if (i.is_array()) {
          simdjson_traverse(answer, i);
        }
      } while (i.next());
      i.up();
    }
    break;
  case '[':
    if (i.down()) {
      do {
        if (i.is_object_or_array()) {
          simdjson_traverse(answer, i);
        }
      } while (i.next());
      i.up();
    }
    break;
  case 'l':
  case 'd':
  case 'n':
  case 't':
  case 'f':
  default:
    break;
  }
 }
 std::vector<int64_t> simdjson_computestats(const std::string_view &p) {
  std::vector<int64_t> answer;
  ParsedJson pj = build_parsed_json(p);
-  answer.valid = pj.isValid();
+  if (!pj.isValid()) {
  if (!answer.valid) {
    return answer;
  }
-  answer.number_count = 0;
+  ParsedJson::iterator i(pj);
-  answer.object_count = 0;
+
-  answer.array_count = 0;
+  simdjson_traverse(answer, i);
-  answer.null_count = 0;
+  remove_duplicates(answer);
  answer.true_count = 0;
  answer.false_count = 0;
  size_t tapeidx = 0;
  u64 tape_val = pj.tape[tapeidx++];
  u8 type = (tape_val >> 56);
  size_t howmany = 0;
  assert(type == 'r');
  howmany = tape_val & JSONVALUEMASK;
  for (; tapeidx < howmany; tapeidx++) {
    tape_val = pj.tape[tapeidx];
    // u64 payload = tape_val & JSONVALUEMASK;
    type = (tape_val >> 56);
    switch (type) {
    case 'l': // we have a long int
      answer.number_count++;
      tapeidx++; // skipping the integer
      break;
    case 'd': // we have a double
      answer.number_count++;
      tapeidx++; // skipping the double
      break;
    case 'n': // we have a null
      answer.null_count++;
      break;
    case 't': // we have a true
      answer.true_count++;
      break;
    case 'f': // we have a false
      answer.false_count++;
      break;
    case '{': // we have an object
      answer.object_count++;
      break;
    case '}': // we end an object
      break;
    case '[': // we start an array
      answer.array_count++;
      break;
    case ']': // we end an array
      break;
    default:
      break; // ignore
    }
  }
  return answer;
 }
-
+void sajson_traverse(std::vector<int64_t> &answer, const sajson::value &node) {
 void sajson_traverse(stat_t &stats, const sajson::value &node) {
  using namespace sajson;
  switch (node.get_type()) {
  case TYPE_ARRAY: {
    stats.array_count++;
    auto length = node.get_length();
    for (size_t i = 0; i < length; ++i) {
-      sajson_traverse(stats, node.get_array_element(i));
+      sajson_traverse(answer, node.get_array_element(i));
    }
    break;
  }
  case TYPE_OBJECT: {
    stats.object_count++;
    auto length = node.get_length();
    for (auto i = 0u; i < length; ++i) {
-      if(strcmp(node.get_object_key(i), "user") == 0) {
+      if (equals(node.get_object_key(i).data(), "user")) { // found a user!!!
-          auto child = node.get_object_value(i);
+        auto uservalue = node.get_object_value(i);         // get the value
-          if(child.get_type() == TYPE_OBJECT) {
+        if (uservalue.get_type() ==
-              for (auto j = 0u; j < length; ++j) {
+            TYPE_OBJECT) { // the value should be an object
-                  if(strcmp(node.get_object_key(i), "user") == 0) {
+          auto uservaluelength = uservalue.get_length();
-                  }
+          for (auto j = 0u; j < uservaluelength;
-
+               ++j) { // go through the children
            if (equals(uservalue.get_object_key(j).data(),
                       "id")) { // ah ah found id
              auto v = uservalue.get_object_value(j);
              if (v.get_type() == TYPE_INTEGER) { // check that it is an integer
                answer.push_back(v.get_integer_value()); // record it!
              } else if (v.get_type() == TYPE_DOUBLE) {
                answer.push_back((int64_t)v.get_double_value()); // record it!
              }
            }
          }
        }
      }
-      sajson_traverse(stats, node.get_object_value(i));
+      sajson_traverse(answer, node.get_object_value(i));
    }
    break;
  }
@ -143,82 +149,72 @@ void sajson_traverse(stat_t &stats, const sajson::value &node) {
  }
 }
-stat_t sasjon_computestats(const std::string_view &p) {
+std::vector<int64_t> sasjon_computestats(const std::string_view &p) {
-  stat_t answer;
+  std::vector<int64_t> answer;
  char *buffer = (char *)malloc(p.size());
  memcpy(buffer, p.data(), p.size());
  auto d = sajson::parse(sajson::dynamic_allocation(),
                         sajson::mutable_string_view(p.size(), buffer));
-  answer.valid = d.is_valid();
+  if (!d.is_valid()) {
  if (!answer.valid) {
    return answer;
  }
  answer.number_count = 0;
  answer.object_count = 0;
  answer.array_count = 0;
  answer.null_count = 0;
  answer.true_count = 0;
  answer.false_count = 0;
  sajson_traverse(answer, d.get_root());
  free(buffer);
  remove_duplicates(answer);
  return answer;
 }
-void rapid_traverse(stat_t &stats, const rapidjson::Value &v) {
+void rapid_traverse(std::vector<int64_t> &answer, const rapidjson::Value &v) {
  switch (v.GetType()) {
  case kNullType:
    stats.null_count++;
    break;
  case kFalseType:
    stats.false_count++;
    break;
  case kTrueType:
    stats.true_count++;
    break;
  case kObjectType:
    for (Value::ConstMemberIterator m = v.MemberBegin(); m != v.MemberEnd();
         ++m) {
-      rapid_traverse(stats, m->value);
+      if (equals(m->name.GetString(), "user")) {
        const rapidjson::Value &child = m->value;
        if (child.GetType() == kObjectType) {
          for (Value::ConstMemberIterator k = child.MemberBegin();
               k != child.MemberEnd(); ++k) {
            if (equals(k->name.GetString(), "id")) {
              const rapidjson::Value &val = k->value;
              if (val.GetType() == kNumberType) {
                answer.push_back(val.GetInt64());
              }
            }
          }
        }
      }
      rapid_traverse(answer, m->value);
    }
    stats.object_count++;
    break;
  case kArrayType:
    for (Value::ConstValueIterator i = v.Begin(); i != v.End();
         ++i) { // v.Size();
-      rapid_traverse(stats, *i);
+      rapid_traverse(answer, *i);
    }
    stats.array_count++;
    break;
-
+  case kNullType:
  case kFalseType:
  case kTrueType:
  case kStringType:
    break;
  case kNumberType:
-    stats.number_count++;
+  default:
    break;
  }
 }
-stat_t rapid_computestats(const std::string_view &p) {
+std::vector<int64_t> rapid_computestats(const std::string_view &p) {
-  stat_t answer;
+  std::vector<int64_t> answer;
  char *buffer = (char *)malloc(p.size() + 1);
  memcpy(buffer, p.data(), p.size());
  buffer[p.size()] = '\0';
  rapidjson::Document d;
  d.ParseInsitu<kParseValidateEncodingFlag>(buffer);
-  answer.valid = !d.HasParseError();
+  if (d.HasParseError()) {
  if (!answer.valid) {
    return answer;
  }
  answer.number_count = 0;
  answer.object_count = 0;
  answer.array_count = 0;
  answer.null_count = 0;
  answer.true_count = 0;
  answer.false_count = 0;
  rapid_traverse(answer, d);
  free(buffer);
  remove_duplicates(answer);
  return answer;
 }
@ -262,29 +258,32 @@ int main(int argc, char *argv[]) {
      std::cout << p.size() << " B ";
    std::cout << std::endl;
  }
-  stat_t s1 = simdjson_computestats(p);
+  std::vector<int64_t> s1 = simdjson_computestats(p);
  if (verbose) {
    printf("simdjson: ");
-    print_stat(s1);
+    print_vec(s1);
  }
-  stat_t s2 = rapid_computestats(p);
+  std::vector<int64_t> s2 = rapid_computestats(p);
  if (verbose) {
    printf("rapid:    ");
-    print_stat(s2);
+    print_vec(s2);
  }
-  stat_t s3 = sasjon_computestats(p);
+  std::vector<int64_t> s3 = sasjon_computestats(p);
  if (verbose) {
    printf("sasjon:   ");
-    print_stat(s3);
+    print_vec(s3);
  }
-  assert(stat_equal(s1, s2));
+  assert(s1 == s2);
-  assert(stat_equal(s1, s3));
+  assert(s1 == s3);
  size_t size = s1.size();
  int repeat = 10;
  int volume = p.size();
-  BEST_TIME("simdjson  ", simdjson_computestats(p).valid, true, , repeat,
+  BEST_TIME("simdjson  ", simdjson_computestats(p).size(), size, , repeat,
            volume, true);
-  BEST_TIME("rapid  ", rapid_computestats(p).valid, true, , repeat, volume,
+
  BEST_TIME("rapid  ", rapid_computestats(p).size(), size, , repeat, volume,
            true);
-  BEST_TIME("sasjon  ", sasjon_computestats(p).valid, true, , repeat, volume,
+  BEST_TIME("sasjon  ", sasjon_computestats(p).size(), size, , repeat, volume,
            true);
 }
--- a/benchmark/parseandstatcompetition.cpp
+++ b/benchmark/parseandstatcompetition.cpp
@ -26,16 +26,6 @@ name;
 #include "sajson.h"
 #include "fastjson.cpp"
 #include "fastjson_dom.cpp"
 #include "gason.cpp"
 #include "json11.cpp"
 #include "sajson.h"
 extern "C" {
 #include "ujdecode.h"
 #include "ultrajsondec.c"
 }
 using namespace rapidjson;
 using namespace std;
--- a/include/simdjson/parsedjson.h
+++ b/include/simdjson/parsedjson.h
@ -395,7 +395,6 @@ public:
    }
    // move forward in document order
    WARN_UNUSED
    bool move_forward() {
      if(location + 1 >= tape_length) {
        return false; // we are at the end!
@ -427,13 +426,11 @@ public:
    // retrieve the character code of what we're looking at:
    // [{"sltfn are the possibilities
    WARN_UNUSED
    really_inline u8 get_type()  const {
      return current_type;
    }
    // get the s64 value at this node; valid only if we're at "l"
    WARN_UNUSED
    really_inline s64 get_integer()  const {
       if(location + 1 >= tape_length) return 0;// default value in case of error
       return (s64) pj.tape[location + 1];
@ -441,7 +438,6 @@ public:
    // get the double value at this node; valid only if
    // we're at "d"
    WARN_UNUSED
    really_inline double get_double()  const {
       if(location + 1 >= tape_length) return NAN;// default value in case of error
       double answer;
@ -449,10 +445,54 @@ public:
       return answer;
    } 
    bool is_object_or_array() const {
      return is_object_or_array(get_type());
    }
    bool is_object() const {
      return get_type() == '{';
    }
    bool is_array() const {
      return get_type() == '[';
    }
    bool is_string() const {
      return get_type() == '"';
    }
    bool is_integer() const {
      return get_type() == 'l';
    }
    bool is_double() const {
      return get_type() == 'd';
    }
    static bool is_object_or_array(u8 type) {
      return (type == '[' || (type == '{'));
    }
    // when at {, go one level deep, looking for a given key
    // if successful, we are left pointing at the value,
    // if not, we are still pointing at the object ({)
    // (in case of repeated keys, this only finds the first one)
    bool move_to_key(const char * key) {
      if(down()) {
        do {
          assert(is_string());
          bool rightkey = (strcmp(get_string(),key)==0);
          next();
          if(rightkey) return true;
        } while(next());
        assert(up());// not found
      }
      return false;
    }
    // get the string value at this node (NULL ended); valid only if we're at "
    // note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
    // return value is valid UTF-8
    WARN_UNUSED
    really_inline const char * get_string() const {
      return  (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
    }
@ -465,7 +505,6 @@ public:
    // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { and [.
    // At the object ({) or at the array ([), you can issue a "down" to visit their content.
    // valid if we're not at the end of a scope (returns true).
    WARN_UNUSED
    really_inline bool next() { 
      if ((current_type == '[') || (current_type == '{')){
        // we need to jump
@ -498,12 +537,12 @@ public:
    }
    // Withing a given scope (series of nodes at the same depth within either an
    // array or an object), we move backward.
    // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true when starting at the end
    // of the scope.
    // At the object ({) or at the array ([), you can issue a "down" to visit their content.    
    WARN_UNUSED
    really_inline bool prev() {
      if(location - 1 < depthindex[depth].start_of_scope) return false;
      location -= 1;
@ -526,7 +565,6 @@ public:
    // within a contained scope.
    // Valid unless we are at the first level of the document
    //
    WARN_UNUSED
    really_inline bool up() {
      if(depth == 1) {
        return false; // don't allow moving back to root
@ -545,7 +583,6 @@ public:
    // that deeper scope if it not empty.
    // Thus, given [true, null, {"a":1}, [1,2]], if we are at the { node, we would move to the
    // "a" node.
    WARN_UNUSED
    really_inline bool down() {
      if(location + 1 >= tape_length) return false;
      if ((current_type == '[') || (current_type == '{')) {
--- a/jsonchecker/fail39_EXCLUDE.json
+++ b/jsonchecker/fail39_EXCLUDE.json
@ -0,0 +1 @@
 {"name":1,"name":2, "this is allowable as per the json spec": true}
--- a/scripts/parseandstat.sh
+++ b/scripts/parseandstat.sh
@ -2,6 +2,7 @@
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 cd $SCRIPTPATH/..
 make parseandstatcompetition
 echo "parsing and collecting basic stats on json documents as quickly as possible"
 echo 
 for i in $SCRIPTPATH/../jsonexamples/*.json; do
    [ -f "$i" ] || break
@ -9,3 +10,15 @@ for i in $SCRIPTPATH/../jsonexamples/*.json; do
    $SCRIPTPATH/../parseandstatcompetition $i
    echo
 done
 make distinctuseridcompetition
 echo "parsing and finding all user.id"
 echo 
 for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
    [ -f "$i" ] || break
    echo $i
    $SCRIPTPATH/../distinctuseridcompetition  jsonexamples/twitter.json
    echo
 done
--- a/tools/json2json.cpp
+++ b/tools/json2json.cpp
@ -7,49 +7,37 @@
 using namespace std;
 void compute_dump(ParsedJson::iterator &pjh) {
-  bool inobject = (pjh.get_type() == '{');
+  if (pjh.is_object()) {
  bool inarray = (pjh.get_type() == '[');
  if ((!inobject) && (!inarray)) {
    pjh.print(std::cout); // just print the lone value
    return; // we are done
  }
  // we have either an array or an object
  bool goingdown = pjh.down();
  if(!goingdown) {
      // we have an empty scope
      if(inobject) std::cout<<"{}";
      else std::cout<<"[]";
      return;
  }
  // we have a non-empty scope and we are at the beginning of it
  if (inobject) {
    assert(pjh.get_scope_type() == '{');
    std::cout << "{";
-    assert(pjh.get_type() == '"');
+    if (pjh.down()) {
-    pjh.print(std::cout); // must be a string
+      pjh.print(std::cout); // must be a string
    std::cout << ":";
    assert(pjh.next());
    compute_dump(pjh); // let us recurse
    while (pjh.next()) {
      std::cout << ",";
      assert(pjh.get_type() == '"');
      pjh.print(std::cout);
      std::cout << ":";
-      assert(pjh.next());
+      pjh.next();
      compute_dump(pjh); // let us recurse
      while (pjh.next()) {
        std::cout << ",";
        pjh.print(std::cout);
        std::cout << ":";
        pjh.next();
        compute_dump(pjh); // let us recurse
      }
      pjh.up();
    }
    std::cout << "}";
-  } else {
+  } else if (pjh.is_array()) {
    assert(pjh.get_scope_type() == '[');
    std::cout << "[";
-    compute_dump(pjh); // let us recurse
+    if (pjh.down()) {
    while (pjh.next()) {
      std::cout << ",";
      compute_dump(pjh); // let us recurse
      while (pjh.next()) {
        std::cout << ",";
        compute_dump(pjh); // let us recurse
      }
      pjh.up();
    }
    std::cout << "]";
  } else {
    pjh.print(std::cout); // just print the lone value
  }
  assert(pjh.up()); 
 }
 int main(int argc, char *argv[]) {
@ -93,7 +81,7 @@ int main(int argc, char *argv[]) {
    return EXIT_FAILURE;
  }
  bool is_ok = json_parse(p, pj); // do the parsing, return false on error
-  free((void*)p.data());
+  free((void *)p.data());
  if (!is_ok) {
    std::cerr << " Parsing failed. " << std::endl;
    return EXIT_FAILURE;
		`@ -0,0 +1 @@`
							`{"name":1,"name":2, "this is allowable as per the json spec": true}`