diff --git a/Makefile b/Makefile index 63f0fc24..26dbc56f 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ else CFLAGS += -O3 endif -MAINEXECUTABLES=parse minify +MAINEXECUTABLES=parse minify json2json TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck COMPARISONEXECUTABLES=minifiercompetition parsingcompetition allparserscheckfile @@ -42,6 +42,7 @@ test: jsoncheck numberparsingcheck stringparsingcheck ./numberparsingcheck ./stringparsingcheck ./jsoncheck + ./scripts/testjson2json.sh @echo @tput setaf 2 @echo "It looks like the code is good!" @@ -90,6 +91,10 @@ minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(MINIFIERHEAD minify: tools/minify.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES) $(CXX) $(CXXFLAGS) -o minify $(MINIFIERLIBFILES) $(LIBFILES) tools/minify.cpp -I. +json2json: tools/json2json.cpp $(HEADERS) $(LIBFILES) + $(CXX) $(CXXFLAGS) -o json2json $ tools/json2json.cpp $(LIBFILES) -I. + + ujdecode.o: $(UJSON4C_INCLUDE) $(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c diff --git a/include/simdjson/jsonformatutils.h b/include/simdjson/jsonformatutils.h index 13386ec7..be7aedd8 100644 --- a/include/simdjson/jsonformatutils.h +++ b/include/simdjson/jsonformatutils.h @@ -9,6 +9,10 @@ static inline void print_with_escapes(const unsigned char *src) { putchar('\\'); putchar('n'); break; + case '\r': + putchar('\\'); + putchar('r'); + break; case '\"': putchar('\\'); putchar('"'); @@ -23,7 +27,7 @@ static inline void print_with_escapes(const unsigned char *src) { break; default: if (*src <= 0x1F) { - printf("\\u%x", *src); + printf("\\u%04x", *src); } else putchar(*src); } diff --git a/include/simdjson/parsedjson.h b/include/simdjson/parsedjson.h index 25947bba..a20709e5 100644 --- a/include/simdjson/parsedjson.h +++ b/include/simdjson/parsedjson.h @@ -123,6 +123,7 @@ public: // print the json to stdout (should be valid) // return false if the tape is likely wrong (e.g., you did not parse a valid // JSON). + WARN_UNUSED bool printjson() { size_t tapeidx = 0; u64 tape_val = tape[tapeidx]; @@ -224,6 +225,73 @@ public: return true; } + bool dump_raw_tape() { + size_t tapeidx = 0; + u64 tape_val = tape[tapeidx++]; + u8 type = (tape_val >> 56); + size_t howmany = 0; + if (type == 'r') { + howmany = tape_val & JSONVALUEMASK; + } else { + printf("Error: no starting root node?"); + return false; + } + for (; tapeidx < howmany; tapeidx++) { + tape_val = tape[tapeidx]; + u64 payload = tape_val & JSONVALUEMASK; + type = (tape_val >> 56); + switch (type) { + case '"': // we have a string + printf("string: "); + putchar('"'); + print_with_escapes((const unsigned char *)(string_buf + payload)); + putchar('"'); + printf("\n"); + break; + case 'l': // we have a long int + if (tapeidx + 1 >= howmany) + return false; + printf("integer: "); + printf("%" PRId64, (int64_t)tape[++tapeidx]); + break; + case 'd': // we have a double + printf("float: "); + if (tapeidx + 1 >= howmany) + return false; + printf("%f", *((double *)&tape[++tapeidx])); + break; + case 'n': // we have a null + printf("null"); + break; + case 't': // we have a true + printf("true"); + break; + case 'f': // we have a false + printf("false"); + break; + case '{': // we have an object + printf("{"); + break; + case '}': // we end an object + printf("}"); + break; + case '[': // we start an array + printf("["); + break; + case ']': // we end an array + printf("]"); + break; + case 'r': // we start and end with the root node + printf("end of root"); + return false; + default: + return false; + } + } + return true; + } + + // all elements are stored on the tape using a 64-bit word. // // strings, double and ints are stored as diff --git a/scripts/testjson2json.sh b/scripts/testjson2json.sh new file mode 100755 index 00000000..c01ef828 --- /dev/null +++ b/scripts/testjson2json.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +TMPDIR1=$(mktemp -d -t simdjson1) +TMPDIR2=$(mktemp -d -t simdjson2) + +trap "exit 1" HUP INT PIPE QUIT TERM +trap "rm -rf $TMPDIR1 $TMPDIR2" EXIT + +function founderror() { + echo "code is wrong" + exit 1 +} + +make minify json2json +for i in `cd jsonexamples && ls -1 *.json`; do + echo $i + ./json2json jsonexamples/$i > $TMPDIR1/$i + ./json2json $TMPDIR1/$i > $TMPDIR2/$i + cmp $TMPDIR1/$i $TMPDIR2/$i + retVal=$? + if [ $retVal -ne 0 ]; then + founderror + fi + ./minify $TMPDIR1/$i > $TMPDIR1/minify$i + ./minify $TMPDIR2/$i > $TMPDIR2/minify$i + cmp $TMPDIR1/minify$i $TMPDIR2/minify$i + retVal=$? + if [ $retVal -ne 0 ]; then + founderror + fi + ./json2json $TMPDIR1/minify$i > $TMPDIR2/bisminify$i + cmp $TMPDIR1/$i $TMPDIR2/bisminify$i + retVal=$? + if [ $retVal -ne 0 ]; then + founderror + fi +done +echo "test successful" + +exit 0 diff --git a/src/stage34_unified.cpp b/src/stage34_unified.cpp index 309ce86a..ce09582f 100644 --- a/src/stage34_unified.cpp +++ b/src/stage34_unified.cpp @@ -170,6 +170,8 @@ bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) { } #ifdef SIMDJSON_ALLOWANYTHINGINROOT depth--; // for fall-through cases (e.g., documents containing just a string) + pj.annotate_previousloc(pj.containing_scope_offset[depth], + pj.get_current_loc()); #endif // ALLOWANYTHINGINROOT start_continue: diff --git a/tools/json2json.cpp b/tools/json2json.cpp new file mode 100644 index 00000000..d383cc55 --- /dev/null +++ b/tools/json2json.cpp @@ -0,0 +1,54 @@ +#include + +#include "simdjson/jsonparser.h" +#include "simdjson/jsonioutil.h" + +using namespace std; + +int main(int argc, char *argv[]) { + int c; + bool rawdump = false; + + while ((c = getopt (argc, argv, "d")) != -1) + switch (c) + { + case 'd': + rawdump = true; + break; + default: + abort (); + } + if (optind >= argc) { + cerr << "Reads json in, out the result of the parsing. " << endl; + cerr << "Usage: " << argv[0] << " " << endl; + exit(1); + } + const char * filename = argv[optind]; + if(optind + 1 < argc) { + cerr << "warning: ignoring everything after " << argv[optind + 1] << endl; + } + std::string_view p; + try { + p = get_corpus(filename); + } catch (const std::exception& e) { // caught by reference to base + std::cout << "Could not load the file " << filename << std::endl; + return EXIT_FAILURE; + } + ParsedJson pj; + bool allocok = pj.allocateCapacity(p.size(), 1024); + if(!allocok) { + std::cerr << "failed to allocate memory" << std::endl; + return EXIT_FAILURE; + } + bool is_ok = json_parse(p, pj); // do the parsing, return false on error + if (!is_ok) { + std::cerr << " Parsing failed. " << std::endl; + return EXIT_FAILURE; + } + is_ok = rawdump ? pj.dump_raw_tape() : pj.printjson(); + if(!is_ok) { + std::cerr << " Could not print out parsed result. " << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +}