Adding more tests.

This commit is contained in:
Daniel Lemire 2018-12-06 17:22:22 -05:00
parent 196c41e3bc
commit e2d2d2f8ff
6 changed files with 175 additions and 2 deletions

View File

@ -17,7 +17,7 @@ else
CFLAGS += -O3
endif
MAINEXECUTABLES=parse minify
MAINEXECUTABLES=parse minify json2json
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition allparserscheckfile
@ -42,6 +42,7 @@ test: jsoncheck numberparsingcheck stringparsingcheck
./numberparsingcheck
./stringparsingcheck
./jsoncheck
./scripts/testjson2json.sh
@echo
@tput setaf 2
@echo "It looks like the code is good!"
@ -90,6 +91,10 @@ minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(MINIFIERHEAD
minify: tools/minify.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
$(CXX) $(CXXFLAGS) -o minify $(MINIFIERLIBFILES) $(LIBFILES) tools/minify.cpp -I.
json2json: tools/json2json.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o json2json $ tools/json2json.cpp $(LIBFILES) -I.
ujdecode.o: $(UJSON4C_INCLUDE)
$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c

View File

@ -9,6 +9,10 @@ static inline void print_with_escapes(const unsigned char *src) {
putchar('\\');
putchar('n');
break;
case '\r':
putchar('\\');
putchar('r');
break;
case '\"':
putchar('\\');
putchar('"');
@ -23,7 +27,7 @@ static inline void print_with_escapes(const unsigned char *src) {
break;
default:
if (*src <= 0x1F) {
printf("\\u%x", *src);
printf("\\u%04x", *src);
} else
putchar(*src);
}

View File

@ -123,6 +123,7 @@ public:
// print the json to stdout (should be valid)
// return false if the tape is likely wrong (e.g., you did not parse a valid
// JSON).
WARN_UNUSED
bool printjson() {
size_t tapeidx = 0;
u64 tape_val = tape[tapeidx];
@ -224,6 +225,73 @@ public:
return true;
}
bool dump_raw_tape() {
size_t tapeidx = 0;
u64 tape_val = tape[tapeidx++];
u8 type = (tape_val >> 56);
size_t howmany = 0;
if (type == 'r') {
howmany = tape_val & JSONVALUEMASK;
} else {
printf("Error: no starting root node?");
return false;
}
for (; tapeidx < howmany; tapeidx++) {
tape_val = tape[tapeidx];
u64 payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
switch (type) {
case '"': // we have a string
printf("string: ");
putchar('"');
print_with_escapes((const unsigned char *)(string_buf + payload));
putchar('"');
printf("\n");
break;
case 'l': // we have a long int
if (tapeidx + 1 >= howmany)
return false;
printf("integer: ");
printf("%" PRId64, (int64_t)tape[++tapeidx]);
break;
case 'd': // we have a double
printf("float: ");
if (tapeidx + 1 >= howmany)
return false;
printf("%f", *((double *)&tape[++tapeidx]));
break;
case 'n': // we have a null
printf("null");
break;
case 't': // we have a true
printf("true");
break;
case 'f': // we have a false
printf("false");
break;
case '{': // we have an object
printf("{");
break;
case '}': // we end an object
printf("}");
break;
case '[': // we start an array
printf("[");
break;
case ']': // we end an array
printf("]");
break;
case 'r': // we start and end with the root node
printf("end of root");
return false;
default:
return false;
}
}
return true;
}
// all elements are stored on the tape using a 64-bit word.
//
// strings, double and ints are stored as

40
scripts/testjson2json.sh Executable file
View File

@ -0,0 +1,40 @@
#!/bin/bash
TMPDIR1=$(mktemp -d -t simdjson1)
TMPDIR2=$(mktemp -d -t simdjson2)
trap "exit 1" HUP INT PIPE QUIT TERM
trap "rm -rf $TMPDIR1 $TMPDIR2" EXIT
function founderror() {
echo "code is wrong"
exit 1
}
make minify json2json
for i in `cd jsonexamples && ls -1 *.json`; do
echo $i
./json2json jsonexamples/$i > $TMPDIR1/$i
./json2json $TMPDIR1/$i > $TMPDIR2/$i
cmp $TMPDIR1/$i $TMPDIR2/$i
retVal=$?
if [ $retVal -ne 0 ]; then
founderror
fi
./minify $TMPDIR1/$i > $TMPDIR1/minify$i
./minify $TMPDIR2/$i > $TMPDIR2/minify$i
cmp $TMPDIR1/minify$i $TMPDIR2/minify$i
retVal=$?
if [ $retVal -ne 0 ]; then
founderror
fi
./json2json $TMPDIR1/minify$i > $TMPDIR2/bisminify$i
cmp $TMPDIR1/$i $TMPDIR2/bisminify$i
retVal=$?
if [ $retVal -ne 0 ]; then
founderror
fi
done
echo "test successful"
exit 0

View File

@ -170,6 +170,8 @@ bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
}
#ifdef SIMDJSON_ALLOWANYTHINGINROOT
depth--; // for fall-through cases (e.g., documents containing just a string)
pj.annotate_previousloc(pj.containing_scope_offset[depth],
pj.get_current_loc());
#endif // ALLOWANYTHINGINROOT
start_continue:

54
tools/json2json.cpp Normal file
View File

@ -0,0 +1,54 @@
#include <unistd.h>
#include "simdjson/jsonparser.h"
#include "simdjson/jsonioutil.h"
using namespace std;
int main(int argc, char *argv[]) {
int c;
bool rawdump = false;
while ((c = getopt (argc, argv, "d")) != -1)
switch (c)
{
case 'd':
rawdump = true;
break;
default:
abort ();
}
if (optind >= argc) {
cerr << "Reads json in, out the result of the parsing. " << endl;
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1);
}
const char * filename = argv[optind];
if(optind + 1 < argc) {
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
}
std::string_view p;
try {
p = get_corpus(filename);
} catch (const std::exception& e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
if(!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
return EXIT_FAILURE;
}
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
if (!is_ok) {
std::cerr << " Parsing failed. " << std::endl;
return EXIT_FAILURE;
}
is_ok = rawdump ? pj.dump_raw_tape() : pj.printjson();
if(!is_ok) {
std::cerr << " Could not print out parsed result. " << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}