Getting there slowly.
This commit is contained in:
parent
f983703a2e
commit
751dce98f5
6
Makefile
6
Makefile
|
@ -24,7 +24,7 @@ endif
|
||||||
|
|
||||||
MAINEXECUTABLES=parse minify json2json
|
MAINEXECUTABLES=parse minify json2json
|
||||||
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
|
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
|
||||||
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition allparserscheckfile
|
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition allparserscheckfile
|
||||||
|
|
||||||
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
|
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
|
||||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
|
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
|
||||||
|
@ -103,6 +103,10 @@ json2json: tools/json2json.cpp $(HEADERS) $(LIBFILES)
|
||||||
ujdecode.o: $(UJSON4C_INCLUDE)
|
ujdecode.o: $(UJSON4C_INCLUDE)
|
||||||
$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c
|
$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c
|
||||||
|
|
||||||
|
parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
|
||||||
|
$(CXX) $(CXXFLAGS) -o parseandstatcompetition $(LIBFILES) benchmark/parseandstatcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
|
||||||
|
|
||||||
|
|
||||||
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
|
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
|
||||||
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
|
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,10 @@ make parsingcompetition
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Limitations
|
## Scope
|
||||||
|
|
||||||
|
We provide a fast parser. It fully validates the input according to the various specifications.
|
||||||
|
The parser builds a useful immutable (read-only) DOM (document-object model) which can be later accessed.
|
||||||
|
|
||||||
To simplify the engineering, we make some assumptions.
|
To simplify the engineering, we make some assumptions.
|
||||||
|
|
||||||
|
@ -78,6 +81,9 @@ To simplify the engineering, we make some assumptions.
|
||||||
- We only support GNU GCC and LLVM Clang at this time. There is no support for Microsoft Visual Studio, though it should not be difficult (help is invited).
|
- We only support GNU GCC and LLVM Clang at this time. There is no support for Microsoft Visual Studio, though it should not be difficult (help is invited).
|
||||||
- In cases of failure, we just report a failure without any indication as to the nature of the problem. (This can be easily improved without affecting performance.)
|
- In cases of failure, we just report a failure without any indication as to the nature of the problem. (This can be easily improved without affecting performance.)
|
||||||
|
|
||||||
|
*We do not aim to provide a general-purpose JSON library.*
|
||||||
|
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
|
- The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
|
||||||
|
|
|
@ -0,0 +1,196 @@
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "simdjson/jsonparser.h"
|
||||||
|
|
||||||
|
#include "benchmark.h"
|
||||||
|
|
||||||
|
// #define RAPIDJSON_SSE2 // bad for performance
|
||||||
|
// #define RAPIDJSON_SSE42 // bad for performance
|
||||||
|
#include "rapidjson/document.h"
|
||||||
|
#include "rapidjson/reader.h"
|
||||||
|
#include "rapidjson/stringbuffer.h"
|
||||||
|
#include "rapidjson/writer.h"
|
||||||
|
|
||||||
|
#include "sajson.h"
|
||||||
|
|
||||||
|
using namespace rapidjson;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct stat_s {
|
||||||
|
size_t number_count;
|
||||||
|
size_t object_count;
|
||||||
|
size_t array_count;
|
||||||
|
size_t null_count;
|
||||||
|
size_t true_count;
|
||||||
|
size_t false_count;
|
||||||
|
bool valid;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct stat_s stat_t;
|
||||||
|
|
||||||
|
stat_t simdjson_computestats(const std::string_view & p) {
|
||||||
|
stat_t answer;
|
||||||
|
ParsedJson pj = build_parsed_json(p);
|
||||||
|
answer.valid = pj.isValid();
|
||||||
|
if(!answer.valid) {
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
answer.number_count = 0;
|
||||||
|
answer.object_count = 0;
|
||||||
|
answer.array_count = 0;
|
||||||
|
answer.null_count = 0;
|
||||||
|
answer.true_count = 0;
|
||||||
|
answer.false_count = 0;
|
||||||
|
size_t tapeidx = 0;
|
||||||
|
u64 tape_val = pj.tape[tapeidx++];
|
||||||
|
u8 type = (tape_val >> 56);
|
||||||
|
size_t howmany = 0;
|
||||||
|
assert (type == 'r');
|
||||||
|
howmany = tape_val & JSONVALUEMASK;
|
||||||
|
tapeidx++;
|
||||||
|
for (; tapeidx < howmany; tapeidx++) {
|
||||||
|
tape_val = pj.tape[tapeidx];
|
||||||
|
u64 payload = tape_val & JSONVALUEMASK;
|
||||||
|
type = (tape_val >> 56);
|
||||||
|
switch (type) {
|
||||||
|
case 'l': // we have a long int
|
||||||
|
answer.number_count++;
|
||||||
|
tapeidx++; // skipping the integer
|
||||||
|
break;
|
||||||
|
case 'd': // we have a double
|
||||||
|
answer.number_count++;
|
||||||
|
tapeidx++; // skipping the double
|
||||||
|
break;
|
||||||
|
case 'n': // we have a null
|
||||||
|
answer.null_count++;
|
||||||
|
break;
|
||||||
|
case 't': // we have a true
|
||||||
|
answer.true_count++;
|
||||||
|
break;
|
||||||
|
case 'f': // we have a false
|
||||||
|
answer.false_count ++;
|
||||||
|
break;
|
||||||
|
case '{': // we have an object
|
||||||
|
answer.object_count ++;
|
||||||
|
break;
|
||||||
|
case '}': // we end an object
|
||||||
|
break;
|
||||||
|
case '[': // we start an array
|
||||||
|
answer.array_count ++;
|
||||||
|
break;
|
||||||
|
case ']': // we end an array
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
answer.valid = false;
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
stat_t rapid_computestats(const std::string_view & p) {
|
||||||
|
stat_t answer;
|
||||||
|
rapidjson::Document d;
|
||||||
|
d.ParseInsitu<kParseValidateEncodingFlag>(p.data());
|
||||||
|
answer.valid = ! d.HasParseError();
|
||||||
|
if(d.HasParseError()) {
|
||||||
|
|
||||||
|
}
|
||||||
|
if(!answer.valid) {
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
answer.number_count = 0;
|
||||||
|
answer.object_count = 0;
|
||||||
|
answer.array_count = 0;
|
||||||
|
answer.null_count = 0;
|
||||||
|
answer.true_count = 0;
|
||||||
|
answer.false_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
bool verbose = false;
|
||||||
|
bool all = false;
|
||||||
|
int c;
|
||||||
|
while ((c = getopt (argc, argv, "v")) != -1)
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 'v':
|
||||||
|
verbose = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
if (optind >= argc) {
|
||||||
|
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||||
|
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
const char * filename = argv[optind];
|
||||||
|
if(optind + 1 < argc) {
|
||||||
|
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||||
|
}
|
||||||
|
std::string_view p;
|
||||||
|
try {
|
||||||
|
p = get_corpus(filename);
|
||||||
|
} catch (const std::exception& e) { // caught by reference to base
|
||||||
|
std::cout << "Could not load the file " << filename << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
std::cout << "Input has ";
|
||||||
|
if (p.size() > 1024 * 1024)
|
||||||
|
std::cout << p.size() / (1024 * 1024) << " MB ";
|
||||||
|
else if (p.size() > 1024)
|
||||||
|
std::cout << p.size() / 1024 << " KB ";
|
||||||
|
else
|
||||||
|
std::cout << p.size() << " B ";
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
ParsedJson pj;
|
||||||
|
bool allocok = pj.allocateCapacity(p.size(), 1024);
|
||||||
|
|
||||||
|
if (!allocok) {
|
||||||
|
std::cerr << "can't allocate memory" << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
int repeat = 10;
|
||||||
|
int volume = p.size();
|
||||||
|
BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).isValid(), true, , repeat, volume, true);
|
||||||
|
|
||||||
|
BEST_TIME("simdjson (static alloc) ", json_parse(p, pj), true, , repeat, volume, true);
|
||||||
|
|
||||||
|
rapidjson::Document d;
|
||||||
|
|
||||||
|
char *buffer = (char *)malloc(p.size() + 1);
|
||||||
|
memcpy(buffer, p.data(), p.size());
|
||||||
|
buffer[p.size()] = '\0';
|
||||||
|
|
||||||
|
BEST_TIME("RapidJSON",
|
||||||
|
d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
|
||||||
|
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
BEST_TIME("RapidJSON (insitu)", d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError(), false,
|
||||||
|
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
|
||||||
|
BEST_TIME("sajson (dynamic mem, insitu)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
|
||||||
|
size_t astbuffersize = p.size();
|
||||||
|
size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t));
|
||||||
|
|
||||||
|
BEST_TIME("sajson (static alloc, insitu)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
std::string json11err;
|
||||||
|
if(all) BEST_TIME("dropbox (json11) ", (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
|
||||||
|
if(all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
JsonValue value;
|
||||||
|
JsonAllocator allocator;
|
||||||
|
char *endptr;
|
||||||
|
if(all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
void *state;
|
||||||
|
if(all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
BEST_TIME("memcpy ", (memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat, volume, true);
|
||||||
|
free((void*)p.data());
|
||||||
|
free(ast_buffer);
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
|
|
@ -97,7 +97,8 @@ int main(int argc, char *argv[]) {
|
||||||
char *buffer = (char *)malloc(p.size() + 1);
|
char *buffer = (char *)malloc(p.size() + 1);
|
||||||
memcpy(buffer, p.data(), p.size());
|
memcpy(buffer, p.data(), p.size());
|
||||||
buffer[p.size()] = '\0';
|
buffer[p.size()] = '\0';
|
||||||
|
//
|
||||||
|
// Todo: It is possible to preallocate a block of memory with RapidJSON using a MemoryAllocator.
|
||||||
BEST_TIME("RapidJSON",
|
BEST_TIME("RapidJSON",
|
||||||
d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
|
d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
|
||||||
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
|
||||||
|
|
|
@ -283,7 +283,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// all elements are stored on the tape using a 64-bit word.
|
// all nodes are stored on the tape using a 64-bit word.
|
||||||
//
|
//
|
||||||
// strings, double and ints are stored as
|
// strings, double and ints are stored as
|
||||||
// a 64-bit word with a pointer to the actual value
|
// a 64-bit word with a pointer to the actual value
|
||||||
|
@ -327,18 +327,20 @@ public:
|
||||||
explicit iterator(ParsedJson &pj_)
|
explicit iterator(ParsedJson &pj_)
|
||||||
: pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) {
|
: pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) {
|
||||||
if(pj.isValid()) {
|
if(pj.isValid()) {
|
||||||
depthindex = new size_t[pj.depthcapacity];
|
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||||
if(depthindex == NULL) return;
|
if(depthindex == NULL) return;
|
||||||
depthindex[0] = 0;
|
depthindex[0].start_of_scope = location;
|
||||||
current_val = pj.tape[location++];
|
current_val = pj.tape[location++];
|
||||||
current_type = (current_val >> 56);
|
current_type = (current_val >> 56);
|
||||||
|
depthindex[0].scope_type = current_type;
|
||||||
if (current_type == 'r') {
|
if (current_type == 'r') {
|
||||||
tape_length = current_val & JSONVALUEMASK;
|
tape_length = current_val & JSONVALUEMASK;
|
||||||
if(location < tape_length) {
|
if(location < tape_length) {
|
||||||
current_val = pj.tape[location];
|
current_val = pj.tape[location];
|
||||||
current_type = (current_val >> 56);
|
current_type = (current_val >> 56);
|
||||||
depth++;
|
depth++;
|
||||||
depthindex[depth] = location;
|
depthindex[depth].start_of_scope = location;
|
||||||
|
depthindex[depth].scope_type = current_type;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -351,7 +353,7 @@ public:
|
||||||
pj(o.pj), depth(o.depth), location(o.location),
|
pj(o.pj), depth(o.depth), location(o.location),
|
||||||
tape_length(o.tape_length), current_type(o.current_type),
|
tape_length(o.tape_length), current_type(o.current_type),
|
||||||
current_val(o.current_val), depthindex(NULL) {
|
current_val(o.current_val), depthindex(NULL) {
|
||||||
depthindex = new size_t[pj.depthcapacity];
|
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||||
if(depthindex != NULL) {
|
if(depthindex != NULL) {
|
||||||
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
|
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
|
||||||
} else {
|
} else {
|
||||||
|
@ -365,24 +367,104 @@ public:
|
||||||
current_val(o.current_val), depthindex(o.depthindex) {
|
current_val(o.current_val), depthindex(o.depthindex) {
|
||||||
o.depthindex = NULL;// we take ownship
|
o.depthindex = NULL;// we take ownship
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
bool isOk() const {
|
bool isOk() const {
|
||||||
return location < tape_length;
|
return location < tape_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// useful for debuging purposes
|
||||||
size_t get_tape_location() const {
|
size_t get_tape_location() const {
|
||||||
return location;
|
return location;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t get_tape_lenght() const {
|
// useful for debuging purposes
|
||||||
|
size_t get_tape_length() const {
|
||||||
return tape_length;
|
return tape_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return true if we can do the navigation, false
|
// returns the current depth (start at 1 with 0 reserved for the fictitious root node)
|
||||||
|
size_t get_depth() const {
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A scope is a series of nodes at the same depth, typically it is either an object ({) or an array ([).
|
||||||
|
// The root node has type 'r'.
|
||||||
|
u8 get_scope_type() const {
|
||||||
|
return depthindex[depth].scope_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
// move forward in document order
|
||||||
|
WARN_UNUSED
|
||||||
|
bool move_forward() {
|
||||||
|
if(location + 1 >= tape_length) {
|
||||||
|
return false; // we are at the end!
|
||||||
|
}
|
||||||
|
// we are entering a new scope
|
||||||
|
if ((current_type == '[') || (current_type == '{')){
|
||||||
|
depth++;
|
||||||
|
depthindex[depth].start_of_scope = location;
|
||||||
|
depthindex[depth].scope_type = current_type;
|
||||||
|
}
|
||||||
|
location = location + 1;
|
||||||
|
current_val = pj.tape[location];
|
||||||
|
current_type = (current_val >> 56);
|
||||||
|
// if we encounter a scope closure, we need to move up
|
||||||
|
while ((current_type == ']') || (current_type == '}')) {
|
||||||
|
if(location + 1 >= tape_length) {
|
||||||
|
return false; // we are at the end!
|
||||||
|
}
|
||||||
|
depth--;
|
||||||
|
if(depth == 0) {
|
||||||
|
return false; // should not be necessary
|
||||||
|
}
|
||||||
|
location = location + 1;
|
||||||
|
current_val = pj.tape[location];
|
||||||
|
current_type = (current_val >> 56);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retrieve the character code of what we're looking at:
|
||||||
|
// [{"sltfn are the possibilities
|
||||||
|
WARN_UNUSED
|
||||||
|
really_inline u8 get_type() const {
|
||||||
|
return current_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the s64 value at this node; valid only if we're at "l"
|
||||||
|
WARN_UNUSED
|
||||||
|
really_inline s64 get_integer() const {
|
||||||
|
if(location + 1 >= tape_length) return 0;// default value in case of error
|
||||||
|
return (s64) pj.tape[location + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the double value at this node; valid only if
|
||||||
|
// we're at "d"
|
||||||
|
WARN_UNUSED
|
||||||
|
really_inline double get_double() const {
|
||||||
|
if(location + 1 >= tape_length) return NAN;// default value in case of error
|
||||||
|
double answer;
|
||||||
|
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the string value at this node (NULL ended); valid only if we're at "
|
||||||
|
// note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
|
||||||
|
// return value is valid UTF-8
|
||||||
|
WARN_UNUSED
|
||||||
|
really_inline const char * get_string() const {
|
||||||
|
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// throughout return true if we can do the navigation, false
|
||||||
// otherwise
|
// otherwise
|
||||||
|
|
||||||
// withing a give scope, we move forward
|
// Withing a given scope (series of nodes at the same depth within either an
|
||||||
// valid if we're not at the end of a scope (returns true)
|
// array or an object), we move forward.
|
||||||
|
// Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { and [.
|
||||||
|
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
|
||||||
|
// valid if we're not at the end of a scope (returns true).
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool next() {
|
really_inline bool next() {
|
||||||
if ((current_type == '[') || (current_type == '{')){
|
if ((current_type == '[') || (current_type == '{')){
|
||||||
|
@ -415,17 +497,22 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// valid if we're not at the start of a scope
|
|
||||||
|
// Withing a given scope (series of nodes at the same depth within either an
|
||||||
|
// array or an object), we move backward.
|
||||||
|
// Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true when starting at the end
|
||||||
|
// of the scope.
|
||||||
|
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool prev() {
|
really_inline bool prev() {
|
||||||
if(location - 1 < depthindex[depth]) return false;
|
if(location - 1 < depthindex[depth].start_of_scope) return false;
|
||||||
location -= 1;
|
location -= 1;
|
||||||
current_val = pj.tape[location];
|
current_val = pj.tape[location];
|
||||||
current_type = (current_val >> 56);
|
current_type = (current_val >> 56);
|
||||||
if ((current_type == ']') || (current_type == '}')){
|
if ((current_type == ']') || (current_type == '}')){
|
||||||
// we need to jump
|
// we need to jump
|
||||||
size_t new_location = ( current_val & JSONVALUEMASK);
|
size_t new_location = ( current_val & JSONVALUEMASK);
|
||||||
if(new_location < depthindex[depth]) {
|
if(new_location < depthindex[depth].start_of_scope) {
|
||||||
return false; // shoud never happen
|
return false; // shoud never happen
|
||||||
}
|
}
|
||||||
location = new_location;
|
location = new_location;
|
||||||
|
@ -435,8 +522,10 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Moves back to either the containing array or object (type { or [) from
|
||||||
// valid unless we are at the first level of the document
|
// within a contained scope.
|
||||||
|
// Valid unless we are at the first level of the document
|
||||||
|
//
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool up() {
|
really_inline bool up() {
|
||||||
if(depth == 1) {
|
if(depth == 1) {
|
||||||
|
@ -452,8 +541,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// valid if we're at a [ or { and it starts a non-empty scope; moves us to start of
|
// Valid if we're at a [ or { and it starts a non-empty scope; moves us to start of
|
||||||
// that deeper scope if it not empty
|
// that deeper scope if it not empty.
|
||||||
|
// Thus, given [true, null, {"a":1}, [1,2]], if we are at the { node, we would move to the
|
||||||
|
// "a" node.
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool down() {
|
really_inline bool down() {
|
||||||
if(location + 1 >= tape_length) return false;
|
if(location + 1 >= tape_length) return false;
|
||||||
|
@ -464,7 +555,8 @@ public:
|
||||||
}
|
}
|
||||||
depth++;
|
depth++;
|
||||||
location = location + 1;
|
location = location + 1;
|
||||||
depthindex[depth] = location;
|
depthindex[depth].start_of_scope = location;
|
||||||
|
depthindex[depth].scope_type = current_type;
|
||||||
current_val = pj.tape[location];
|
current_val = pj.tape[location];
|
||||||
current_type = (current_val >> 56);
|
current_type = (current_val >> 56);
|
||||||
return true;
|
return true;
|
||||||
|
@ -472,9 +564,10 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// move us to the start of our current scope
|
// move us to the start of our current scope,
|
||||||
|
// a scope is a series of nodes at the same level
|
||||||
void to_start_scope() {
|
void to_start_scope() {
|
||||||
location = depthindex[depth];
|
location = depthindex[depth].start_of_scope;
|
||||||
current_val = pj.tape[location];
|
current_val = pj.tape[location];
|
||||||
current_type = (current_val >> 56);
|
current_type = (current_val >> 56);
|
||||||
}
|
}
|
||||||
|
@ -522,33 +615,7 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// retrieve the character code of what we're looking at:
|
typedef struct {size_t start_of_scope; u8 scope_type;} scopeindex_t;
|
||||||
// [{"sltfn are the possibilities
|
|
||||||
really_inline u8 get_type() const {
|
|
||||||
return current_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the s64 value at this node; valid only if we're at "l"
|
|
||||||
really_inline s64 get_integer() const {
|
|
||||||
if(location + 1 >= tape_length) return 0;// default value in case of error
|
|
||||||
return (s64) pj.tape[location + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the double value at this node; valid only if
|
|
||||||
// we're at "d"
|
|
||||||
really_inline double get_double() const {
|
|
||||||
if(location + 1 >= tape_length) return NAN;// default value in case of error
|
|
||||||
double answer;
|
|
||||||
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
|
|
||||||
return answer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the string value at this node (NULL ended); valid only if we're at "
|
|
||||||
// note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
|
|
||||||
// return value is valid UTF-8
|
|
||||||
really_inline const char * get_string() const {
|
|
||||||
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
@ -560,7 +627,7 @@ private:
|
||||||
size_t tape_length;
|
size_t tape_length;
|
||||||
u8 current_type;
|
u8 current_type;
|
||||||
u64 current_val;
|
u64 current_val;
|
||||||
size_t *depthindex;
|
scopeindex_t *depthindex;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ void compute_dump(ParsedJson::iterator &pjh) {
|
||||||
}
|
}
|
||||||
// we have a non-empty scope and we are at the beginning of it
|
// we have a non-empty scope and we are at the beginning of it
|
||||||
if (inobject) {
|
if (inobject) {
|
||||||
|
assert(pjh.get_scope_type() == '{');
|
||||||
std::cout << "{";
|
std::cout << "{";
|
||||||
assert(pjh.get_type() == '"');
|
assert(pjh.get_type() == '"');
|
||||||
pjh.print(std::cout); // must be a string
|
pjh.print(std::cout); // must be a string
|
||||||
|
@ -39,6 +40,7 @@ void compute_dump(ParsedJson::iterator &pjh) {
|
||||||
}
|
}
|
||||||
std::cout << "}";
|
std::cout << "}";
|
||||||
} else {
|
} else {
|
||||||
|
assert(pjh.get_scope_type() == '[');
|
||||||
std::cout << "[";
|
std::cout << "[";
|
||||||
compute_dump(pjh); // let us recurse
|
compute_dump(pjh); // let us recurse
|
||||||
while (pjh.next()) {
|
while (pjh.next()) {
|
||||||
|
|
Loading…
Reference in New Issue