Removing parsers that are unfair.

This commit is contained in:
Daniel Lemire 2018-11-20 20:08:02 -05:00
parent 21ee490d18
commit 5bdf19bb18
4 changed files with 71 additions and 18 deletions

View File

@ -92,10 +92,10 @@ ujdecode.o: $(UJSON4C_INCLUDE)
$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c $(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS) parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp ujdecode.o -I. $(LIBFLAGS) $(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
$(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp -I. $(LIBFLAGS) $(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp $(OBJECTS) -I. $(LIBFLAGS)
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES) parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM $(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM

View File

@ -59,7 +59,9 @@ To simplify the engineering, we make some assumptions.
## Features ## Features
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers. - We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers.
- We do full UTF-8 validation as part of the parsing. - We do full UTF-8 validation as part of the parsing. (Parsers like fastjson, gason and dropbox json11 do not do UTF-8 validation.)
- We fully validate the numbers. (Parsers like gason and ultranjson will accept `[0e+]` as valid JSON.)
- We validate string content for unescaped characters. (Parsers like fastjson and ultrajson accept unescaped line breaks and tags in strings.)
## Architecture ## Architecture

View File

@ -1,4 +1,4 @@
#include <unistd.h>
#include "jsonparser/jsonparser.h" #include "jsonparser/jsonparser.h"
#include "benchmark.h" #include "benchmark.h"
@ -26,7 +26,7 @@ using namespace std;
// fastjson has a tricky interface // fastjson has a tricky interface
void on_json_error( void *, const fastjson::ErrorContext& ec) { void on_json_error( void *, const fastjson::ErrorContext& ec) {
std::cerr<<"ERROR: "<<ec.mesg<<std::endl; //std::cerr<<"ERROR: "<<ec.mesg<<std::endl;
} }
bool fastjson_parse(const char *input) { bool fastjson_parse(const char *input) {
fastjson::Token token; fastjson::Token token;
@ -37,17 +37,32 @@ bool fastjson_parse(const char *input) {
// end of fastjson stuff // end of fastjson stuff
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
if (argc < 2) { bool verbose = false;
bool all = false;
int c;
while ((c = getopt (argc, argv, "va")) != -1)
switch (c)
{
case 'v':
verbose = true;
break;
case 'a':
all = true;
break;
default:
abort ();
}
if (optind >= argc) {
cerr << "Usage: " << argv[0] << " <jsonfile>\n"; cerr << "Usage: " << argv[0] << " <jsonfile>\n";
cerr << "Or " << argv[0] << " -v <jsonfile>\n"; cerr << "Or " << argv[0] << " -v <jsonfile>\n";
cerr << "To enable parsers that are not standard compliant, use the -a flag\n";
exit(1); exit(1);
} }
bool verbose = false; const char * filename = argv[optind];
if (argc > 2) { if(optind + 1 < argc) {
if (strcmp(argv[1], "-v")) cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
verbose = true;
} }
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]); pair<u8 *, size_t> p = get_corpus(filename);
if (verbose) { if (verbose) {
std::cout << "Input has "; std::cout << "Input has ";
if (p.second > 1024 * 1024) if (p.second > 1024 * 1024)
@ -88,15 +103,15 @@ int main(int argc, char *argv[]) {
BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.second, buffer)).is_valid(), true, memcpy(buffer, p.first, p.second), repeat, volume, true); BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.second, buffer)).is_valid(), true, memcpy(buffer, p.first, p.second), repeat, volume, true);
std::string json11err; std::string json11err;
BEST_TIME("dropbox (json11) ", json11::Json::parse(buffer,json11err).is_null(), false, memcpy(buffer, p.first, p.second), repeat, volume, true); if(all) BEST_TIME("dropbox (json11) ", (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.first, p.second), repeat, volume, true);
BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.first, p.second), repeat, volume, true); if(all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.first, p.second), repeat, volume, true);
JsonValue value; JsonValue value;
JsonAllocator allocator; JsonAllocator allocator;
char *endptr; char *endptr;
BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.first, p.second), repeat, volume, true); if(all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.first, p.second), repeat, volume, true);
void *state; void *state;
BEST_TIME("ultrajson ", (UJDecode(buffer, p.second, NULL, &state) == NULL), false, memcpy(buffer, p.first, p.second), repeat, volume, true); if(all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.second, NULL, &state) == NULL), false, memcpy(buffer, p.first, p.second), repeat, volume, true);
BEST_TIME("memcpy ", (memcpy(buffer, p.first, p.second) == buffer), true, , repeat, volume, true); BEST_TIME("memcpy ", (memcpy(buffer, p.first, p.second) == buffer), true, , repeat, volume, true);
free(p.first); free(p.first);
free(ast_buffer); free(ast_buffer);

View File

@ -8,9 +8,32 @@
#include "rapidjson/reader.h" // you have to check in the submodule #include "rapidjson/reader.h" // you have to check in the submodule
#include "rapidjson/stringbuffer.h" #include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h" #include "rapidjson/writer.h"
#include "json11.cpp"
#include "sajson.h" #include "sajson.h"
#include "fastjson.cpp"
#include "fastjson_dom.cpp"
#include "gason.cpp"
extern "C"
{
#include "ultrajsondec.c"
#include "ujdecode.h"
}
using namespace rapidjson;
using namespace std;
// fastjson has a tricky interface
void on_json_error( void *, const fastjson::ErrorContext& ec) {
//std::cerr<<"ERROR: "<<ec.mesg<<std::endl;
}
bool fastjson_parse(const char *input) {
fastjson::Token token;
fastjson::dom::Chunk chunk;
std::string error_message;
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
}
// end of fastjson stuff
@ -56,11 +79,24 @@ int main(int argc, char *argv[]) {
bool rapid_correct = (d.Parse((const char *)buffer).HasParseError() == false); bool rapid_correct = (d.Parse((const char *)buffer).HasParseError() == false);
bool rapid_correct_checkencoding = (d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError() == false); bool rapid_correct_checkencoding = (d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError() == false);
bool sajson_correct = sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.second, buffer)).is_valid(); bool sajson_correct = sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.second, buffer)).is_valid();
std::string json11err;
bool dropbox_correct = (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )) == false;
bool fastjson_correct = fastjson_parse(buffer);
JsonValue value;
JsonAllocator allocator;
char *endptr;
bool gason_correct = (jsonParse(buffer, &endptr, &value, allocator) == JSON_OK);
void *state;
bool ultrajson_correct = ((UJDecode(buffer, p.second, NULL, &state) == NULL) == false);
printf("our parser : %s \n", ours_correct ? "correct":"invalid"); printf("our parser : %s \n", ours_correct ? "correct":"invalid");
printf("rapid : %s \n", rapid_correct ? "correct":"invalid"); printf("rapid : %s \n", rapid_correct ? "correct":"invalid");
printf("rapid (check encoding) : %s \n", rapid_correct_checkencoding ? "correct":"invalid"); printf("rapid (check encoding) : %s \n", rapid_correct_checkencoding ? "correct":"invalid");
printf("sajson : %s \n", sajson_correct ? "correct":"invalid"); printf("sajson : %s \n", sajson_correct ? "correct":"invalid");
printf("dropbox : %s \n", dropbox_correct ? "correct":"invalid");
printf("fastjson : %s \n", fastjson_correct ? "correct":"invalid");
printf("gason : %s \n", gason_correct ? "correct":"invalid");
printf("ultrajson : %s \n", ultrajson_correct ? "correct":"invalid");
free(buffer); free(buffer);
free(p.first); free(p.first);
deallocate_ParsedJson(pj_ptr); deallocate_ParsedJson(pj_ptr);