Updating amalgamation script.

This commit is contained in:
Daniel Lemire 2019-02-22 15:42:44 -05:00
parent 3640ab9dd3
commit 44f39815d3
4 changed files with 749 additions and 661 deletions

View File

@ -18,6 +18,8 @@ $SCRIPTPATH/src/jsonminifier.cpp
$SCRIPTPATH/src/jsonparser.cpp
$SCRIPTPATH/src/stage1_find_marks.cpp
$SCRIPTPATH/src/stage2_build_tape.cpp
$SCRIPTPATH/src/parsedjson.cpp
$SCRIPTPATH/src/parsedjsoniterator.cpp
"
# order matters
@ -94,7 +96,7 @@ cat <<< '
#include "simdjson.h"
#include "simdjson.cpp"
int main(int argc, char *argv[]) {
const char * filename = argv[1];
const char * filename = argv[1];
std::string_view p = get_corpus(filename);
ParsedJson pj = build_parsed_json(p); // do the parsing
if( ! pj.isValid() ) {
@ -117,7 +119,7 @@ echo "Giving final instructions:"
CPPBIN=${DEMOCPP%%.*}
echo "Try :"
echo "c++ -march=native -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} jsonexamples/twitter.json "
echo "c++ -march=native -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json "
SINGLEHDR=$SCRIPTPATH/singleheader
echo "Copying files to $SCRIPTPATH/singleheader "
@ -126,9 +128,10 @@ echo "c++ -march=native -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} .
cp ${AMAL_C} ${AMAL_H} ${DEMOCPP} $SINGLEHDR
ls $SINGLEHDR
cd $SINGLEHDR && c++ -march=native -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json
lowercase(){
echo "$1" | tr 'A-Z' 'a-z'
}
OS=`lowercase \`uname\``

View File

@ -1,10 +1,10 @@
/* auto-generated on Fri 4 Jan 2019 20:12:48 EST. Do not edit! */
/* auto-generated on Fri 22 Feb 2019 15:42:34 EST. Do not edit! */
#include <iostream>
#include "simdjson.h"
#include "simdjson.cpp"
int main(int argc, char *argv[]) {
const char * filename = argv[1];
const char * filename = argv[1];
std::string_view p = get_corpus(filename);
ParsedJson pj = build_parsed_json(p); // do the parsing
if( ! pj.isValid() ) {

View File

@ -1,4 +1,4 @@
/* auto-generated on Fri 4 Jan 2019 20:12:48 EST. Do not edit! */
/* auto-generated on Fri 22 Feb 2019 15:42:34 EST. Do not edit! */
#include "simdjson.h"
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
@ -341,12 +341,6 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
}
}
bool isok = find_structural_bits(buf, len, pj);
/*if (isok) {
isok = flatten_indexes(len, pj);
} else {
if(reallocated) free((void*)buf);
return false;
}*/
if (isok) {
isok = unified_machine(buf, len, pj);
} else {
@ -377,35 +371,6 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
#define SIMDJSON_UTF8VALIDATE
#endif
#ifndef NO_PDEP_WIDTH
#define NO_PDEP_WIDTH 8
#endif
#define SET_BIT(i) \
base_ptr[base + i] = (uint32_t)idx - 64 + trailingzeroes(structurals); \
structurals = structurals & (structurals - 1);
#define SET_BIT1 SET_BIT(0)
#define SET_BIT2 SET_BIT1 SET_BIT(1)
#define SET_BIT3 SET_BIT2 SET_BIT(2)
#define SET_BIT4 SET_BIT3 SET_BIT(3)
#define SET_BIT5 SET_BIT4 SET_BIT(4)
#define SET_BIT6 SET_BIT5 SET_BIT(5)
#define SET_BIT7 SET_BIT6 SET_BIT(6)
#define SET_BIT8 SET_BIT7 SET_BIT(7)
#define SET_BIT9 SET_BIT8 SET_BIT(8)
#define SET_BIT10 SET_BIT9 SET_BIT(9)
#define SET_BIT11 SET_BIT10 SET_BIT(10)
#define SET_BIT12 SET_BIT11 SET_BIT(11)
#define SET_BIT13 SET_BIT12 SET_BIT(12)
#define SET_BIT14 SET_BIT13 SET_BIT(13)
#define SET_BIT15 SET_BIT14 SET_BIT(14)
#define SET_BIT16 SET_BIT15 SET_BIT(15)
#define CALL(macro, ...) macro(__VA_ARGS__)
#define SET_BITLOOPN(n) SET_BIT##n
// It seems that many parsers do UTF-8 validation.
// RapidJSON does not do it by default, but a flag
// allows it.
@ -413,6 +378,7 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
#endif
using namespace std;
// a straightforward comparison of a mask against input. 5 uops; would be
// cheaper in AVX512.
really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi,
@ -463,7 +429,7 @@ WARN_UNUSED
#ifndef _MSC_VER
__builtin_prefetch(buf + idx + 128);
#endif
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
#ifdef SIMDJSON_UTF8VALIDATE
__m256i highbit = _mm256_set1_epi8(0x80);
@ -528,12 +494,23 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base += 8;
}
base = next_base;
@ -695,12 +672,23 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base += 8;
}
base = next_base;
// How do we build up a user traversable data structure
@ -788,12 +776,23 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base += 8;
}
base = next_base;
@ -1323,3 +1322,579 @@ fail:
return false;
}
/* end file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
/* begin file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
ParsedJson::ParsedJson() : bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
current_loc(0), n_structural_indexes(0),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
ParsedJson::~ParsedJson() {
deallocate();
}
ParsedJson::ParsedJson(ParsedJson && p)
: bytecapacity(std::move(p.bytecapacity)),
depthcapacity(std::move(p.depthcapacity)),
tapecapacity(std::move(p.tapecapacity)),
stringcapacity(std::move(p.stringcapacity)),
current_loc(std::move(p.current_loc)),
n_structural_indexes(std::move(p.n_structural_indexes)),
structural_indexes(std::move(p.structural_indexes)),
tape(std::move(p.tape)),
containing_scope_offset(std::move(p.containing_scope_offset)),
ret_address(std::move(p.ret_address)),
string_buf(std::move(p.string_buf)),
current_string_buf_loc(std::move(p.current_string_buf_loc)),
isvalid(std::move(p.isvalid)) {
p.structural_indexes=NULL;
p.tape=NULL;
p.containing_scope_offset=NULL;
p.ret_address=NULL;
p.string_buf=NULL;
p.current_string_buf_loc=NULL;
}
WARN_UNUSED
bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
if ((maxdepth == 0) || (len == 0)) {
std::cerr << "capacities must be non-zero " << std::endl;
return false;
}
if (len > 0) {
if ((len <= bytecapacity) && (depthcapacity < maxdepth))
return true;
deallocate();
}
isvalid = false;
bytecapacity = 0; // will only set it to len after allocations are a success
n_structural_indexes = 0;
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
structural_indexes = new uint32_t[max_structures];
size_t localtapecapacity = ROUNDUP_N(len, 64);
size_t localstringcapacity = ROUNDUP_N(len + 32, 64);
string_buf = new uint8_t[localstringcapacity];
tape = new uint64_t[localtapecapacity];
containing_scope_offset = new uint32_t[maxdepth];
#ifdef SIMDJSON_USE_COMPUTED_GOTO
ret_address = new void *[maxdepth];
#else
ret_address = new char[maxdepth];
#endif
if ((string_buf == NULL) || (tape == NULL) ||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) {
std::cerr << "Could not allocate memory" << std::endl;
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
return false;
}
bytecapacity = len;
depthcapacity = maxdepth;
tapecapacity = localtapecapacity;
stringcapacity = localstringcapacity;
return true;
}
bool ParsedJson::isValid() const {
return isvalid;
}
void ParsedJson::deallocate() {
bytecapacity = 0;
depthcapacity = 0;
tapecapacity = 0;
stringcapacity = 0;
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
isvalid = false;
}
void ParsedJson::init() {
current_string_buf_loc = string_buf;
current_loc = 0;
isvalid = false;
}
WARN_UNUSED
bool ParsedJson::printjson(std::ostream &os) {
if(!isvalid) return false;
size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56);
size_t howmany = 0;
if (type == 'r') {
howmany = tape_val & JSONVALUEMASK;
} else {
fprintf(stderr, "Error: no starting root node?");
return false;
}
if (howmany > tapecapacity) {
fprintf(stderr,
"We may be exceeding the tape capacity. Is this a valid document?\n");
return false;
}
tapeidx++;
bool *inobject = new bool[depthcapacity];
size_t *inobjectidx = new size_t[depthcapacity];
int depth = 1; // only root at level 0
inobjectidx[depth] = 0;
inobject[depth] = false;
for (; tapeidx < howmany; tapeidx++) {
tape_val = tape[tapeidx];
uint64_t payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
if (!inobject[depth]) {
if ((inobjectidx[depth] > 0) && (type != ']'))
os << ",";
inobjectidx[depth]++;
} else { // if (inobject) {
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
(type != '}'))
os << ",";
if (((inobjectidx[depth] & 1) == 1))
os << ":";
inobjectidx[depth]++;
}
switch (type) {
case '"': // we have a string
os << '"';
print_with_escapes((const unsigned char *)(string_buf + payload));
os << '"';
break;
case 'l': // we have a long int
if (tapeidx + 1 >= howmany)
return false;
os << (int64_t)tape[++tapeidx];
break;
case 'd': // we have a double
if (tapeidx + 1 >= howmany)
return false;
double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer;
break;
case 'n': // we have a null
os << "null";
break;
case 't': // we have a true
os << "true";
break;
case 'f': // we have a false
os << "false";
break;
case '{': // we have an object
os << '{';
depth++;
inobject[depth] = true;
inobjectidx[depth] = 0;
break;
case '}': // we end an object
depth--;
os << '}';
break;
case '[': // we start an array
os << '[';
depth++;
inobject[depth] = false;
inobjectidx[depth] = 0;
break;
case ']': // we end an array
depth--;
os << ']';
break;
case 'r': // we start and end with the root node
fprintf(stderr, "should we be hitting the root node?\n");
delete[] inobject;
delete[] inobjectidx;
return false;
default:
fprintf(stderr, "bug %c\n", type);
delete[] inobject;
delete[] inobjectidx;
return false;
}
}
delete[] inobject;
delete[] inobjectidx;
return true;
}
WARN_UNUSED
bool ParsedJson::dump_raw_tape(std::ostream &os) {
if(!isvalid) return false;
size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56);
os << tapeidx << " : " << type;
tapeidx++;
size_t howmany = 0;
if (type == 'r') {
howmany = tape_val & JSONVALUEMASK;
} else {
fprintf(stderr, "Error: no starting root node?");
return false;
}
os << "\t// pointing to " << howmany <<" (right after last node)\n";
uint64_t payload;
for (; tapeidx < howmany; tapeidx++) {
os << tapeidx << " : ";
tape_val = tape[tapeidx];
payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
switch (type) {
case '"': // we have a string
os << "string \"";
print_with_escapes((const unsigned char *)(string_buf + payload));
os << '"';
os << '\n';
break;
case 'l': // we have a long int
if (tapeidx + 1 >= howmany)
return false;
os << "integer " << (int64_t)tape[++tapeidx] << "\n";
break;
case 'd': // we have a double
os << "float ";
if (tapeidx + 1 >= howmany)
return false;
double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer << '\n';
break;
case 'n': // we have a null
os << "null\n";
break;
case 't': // we have a true
os << "true\n";
break;
case 'f': // we have a false
os << "false\n";
break;
case '{': // we have an object
os << "{\t// pointing to next tape location " << payload << " (first node after the scope) \n";
break;
case '}': // we end an object
os << "}\t// pointing to previous tape location " << payload << " (start of the scope) \n";
break;
case '[': // we start an array
os << "[\t// pointing to next tape location " << payload << " (first node after the scope) \n";
break;
case ']': // we end an array
os << "]\t// pointing to previous tape location " << payload << " (start of the scope) \n";
break;
case 'r': // we start and end with the root node
printf("end of root\n");
return false;
default:
return false;
}
}
tape_val = tape[tapeidx];
payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
return true;
}
/* end file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
/* begin file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) {
if(pj.isValid()) {
depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex == NULL) return;
depthindex[0].start_of_scope = location;
current_val = pj.tape[location++];
current_type = (current_val >> 56);
depthindex[0].scope_type = current_type;
if (current_type == 'r') {
tape_length = current_val & JSONVALUEMASK;
if(location < tape_length) {
current_val = pj.tape[location];
current_type = (current_val >> 56);
depth++;
depthindex[depth].start_of_scope = location;
depthindex[depth].scope_type = current_type;
}
}
}
}
ParsedJson::iterator::~iterator() {
delete[] depthindex;
}
ParsedJson::iterator::iterator(const iterator &o):
pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(NULL) {
depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex != NULL) {
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
} else {
tape_length = 0;
}
}
ParsedJson::iterator::iterator(iterator &&o):
pj(o.pj), depth(std::move(o.depth)), location(std::move(o.location)),
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)),
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) {
o.depthindex = NULL;// we take ownership
}
WARN_UNUSED
bool ParsedJson::iterator::isOk() const {
return location < tape_length;
}
// useful for debuging purposes
size_t ParsedJson::iterator::get_tape_location() const {
return location;
}
// useful for debuging purposes
size_t ParsedJson::iterator::get_tape_length() const {
return tape_length;
}
// returns the current depth (start at 1 with 0 reserved for the fictitious root node)
size_t ParsedJson::iterator::get_depth() const {
return depth;
}
// A scope is a series of nodes at the same depth, typically it is either an object ({) or an array ([).
// The root node has type 'r'.
uint8_t ParsedJson::iterator::get_scope_type() const {
return depthindex[depth].scope_type;
}
bool ParsedJson::iterator::move_forward() {
if(location + 1 >= tape_length) {
return false; // we are at the end!
}
// we are entering a new scope
if ((current_type == '[') || (current_type == '{')){
depth++;
depthindex[depth].start_of_scope = location;
depthindex[depth].scope_type = current_type;
}
location = location + 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
// if we encounter a scope closure, we need to move up
while ((current_type == ']') || (current_type == '}')) {
if(location + 1 >= tape_length) {
return false; // we are at the end!
}
depth--;
if(depth == 0) {
return false; // should not be necessary
}
location = location + 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
}
return true;
}
uint8_t ParsedJson::iterator::get_type() const {
return current_type;
}
int64_t ParsedJson::iterator::get_integer() const {
if(location + 1 >= tape_length) return 0;// default value in case of error
return (int64_t) pj.tape[location + 1];
}
double ParsedJson::iterator::get_double() const {
if(location + 1 >= tape_length) return NAN;// default value in case of error
double answer;
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
return answer;
}
const char * ParsedJson::iterator::get_string() const {
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
}
bool ParsedJson::iterator::is_object_or_array() const {
return is_object_or_array(get_type());
}
bool ParsedJson::iterator::is_object() const {
return get_type() == '{';
}
bool ParsedJson::iterator::is_array() const {
return get_type() == '[';
}
bool ParsedJson::iterator::is_string() const {
return get_type() == '"';
}
bool ParsedJson::iterator::is_integer() const {
return get_type() == 'l';
}
bool ParsedJson::iterator::is_double() const {
return get_type() == 'd';
}
bool ParsedJson::iterator::is_object_or_array(uint8_t type) {
return (type == '[' || (type == '{'));
}
bool ParsedJson::iterator::move_to_key(const char * key) {
if(down()) {
do {
assert(is_string());
bool rightkey = (strcmp(get_string(),key)==0);
next();
if(rightkey) return true;
} while(next());
assert(up());// not found
}
return false;
}
bool ParsedJson::iterator::next() {
if ((current_type == '[') || (current_type == '{')){
// we need to jump
size_t npos = ( current_val & JSONVALUEMASK);
if(npos >= tape_length) {
return false; // shoud never happen unless at the root
}
uint64_t nextval = pj.tape[npos];
uint8_t nexttype = (nextval >> 56);
if((nexttype == ']') || (nexttype == '}')) {
return false; // we reached the end of the scope
}
location = npos;
current_val = nextval;
current_type = nexttype;
return true;
} else {
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
if(location + increment >= tape_length) return false;
uint64_t nextval = pj.tape[location + increment];
uint8_t nexttype = (nextval >> 56);
if((nexttype == ']') || (nexttype == '}')) {
return false; // we reached the end of the scope
}
location = location + increment;
current_val = nextval;
current_type = nexttype;
return true;
}
}
bool ParsedJson::iterator::prev() {
if(location - 1 < depthindex[depth].start_of_scope) return false;
location -= 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
if ((current_type == ']') || (current_type == '}')){
// we need to jump
size_t new_location = ( current_val & JSONVALUEMASK);
if(new_location < depthindex[depth].start_of_scope) {
return false; // shoud never happen
}
location = new_location;
current_val = pj.tape[location];
current_type = (current_val >> 56);
}
return true;
}
bool ParsedJson::iterator::up() {
if(depth == 1) {
return false; // don't allow moving back to root
}
to_start_scope();
// next we just move to the previous value
depth--;
location -= 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
return true;
}
bool ParsedJson::iterator::down() {
if(location + 1 >= tape_length) return false;
if ((current_type == '[') || (current_type == '{')) {
size_t npos = (current_val & JSONVALUEMASK);
if(npos == location + 2) {
return false; // we have an empty scope
}
depth++;
location = location + 1;
depthindex[depth].start_of_scope = location;
depthindex[depth].scope_type = current_type;
current_val = pj.tape[location];
current_type = (current_val >> 56);
return true;
}
return false;
}
void ParsedJson::iterator::to_start_scope() {
location = depthindex[depth].start_of_scope;
current_val = pj.tape[location];
current_type = (current_val >> 56);
}
bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
if(!isOk()) return false;
switch (current_type) {
case '"': // we have a string
os << '"';
if(escape_strings) {
print_with_escapes(get_string(), os);
} else {
os << get_string();
}
os << '"';
break;
case 'l': // we have a long int
os << get_integer();
break;
case 'd':
os << get_double();
break;
case 'n': // we have a null
os << "null";
break;
case 't': // we have a true
os << "true";
break;
case 'f': // we have a false
os << "false";
break;
case '{': // we have an object
case '}': // we end an object
case '[': // we start an array
case ']': // we end an array
os << (char) current_type;
break;
default:
return false;
}
return true;
}
/* end file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */

View File

@ -1,4 +1,4 @@
/* auto-generated on Fri 4 Jan 2019 20:12:48 EST. Do not edit! */
/* auto-generated on Fri 22 Feb 2019 15:42:34 EST. Do not edit! */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
#ifndef SIMDJSON_PORTABILITY_H
#define SIMDJSON_PORTABILITY_H
@ -56,7 +56,12 @@ static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *re
/* result might be undefined when input_num is zero */
static inline int trailingzeroes(uint64_t input_num) {
#ifdef __BMI__
return _tzcnt_u64(input_num);
#else
#warning "BMI is missing?"
return __builtin_ctzll(input_num);
#endif
}
/* result might be undefined when input_num is zero */
@ -124,64 +129,64 @@ static inline void aligned_free(void *memblock) {
#endif /* end of include PORTABILITY_H */
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
#ifndef SIMDJSON_COMMON_DEFS_H
#define SIMDJSON_COMMON_DEFS_H
#include <cassert>
// the input buf should be readable up to buf + SIMDJSON_PADDING
#define SIMDJSON_PADDING sizeof(__m256i)
#ifndef _MSC_VER
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
// also in Intel's compiler), but won't work in MSVC.
#define SIMDJSON_USE_COMPUTED_GOTO
#endif
// Align to N-byte boundary
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
#ifdef _MSC_VER
#define really_inline inline
#define never_inline inline
#define UNUSED
#define WARN_UNUSED
#ifndef likely
#define likely(x) x
#endif
#ifndef unlikely
#define unlikely(x) x
#endif
#else
#define really_inline inline __attribute__((always_inline, unused))
#define never_inline inline __attribute__((noinline, unused))
#define UNUSED __attribute__((unused))
#define WARN_UNUSED __attribute__((warn_unused_result))
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#endif // MSC_VER
#endif // COMMON_DEFS_H
#ifndef SIMDJSON_COMMON_DEFS_H
#define SIMDJSON_COMMON_DEFS_H
#include <cassert>
// the input buf should be readable up to buf + SIMDJSON_PADDING
#define SIMDJSON_PADDING sizeof(__m256i)
#ifndef _MSC_VER
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
// also in Intel's compiler), but won't work in MSVC.
#define SIMDJSON_USE_COMPUTED_GOTO
#endif
// Align to N-byte boundary
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
#ifdef _MSC_VER
#define really_inline inline
#define never_inline inline
#define UNUSED
#define WARN_UNUSED
#ifndef likely
#define likely(x) x
#endif
#ifndef unlikely
#define unlikely(x) x
#endif
#else
#define really_inline inline __attribute__((always_inline, unused))
#define never_inline inline __attribute__((noinline, unused))
#define UNUSED __attribute__((unused))
#define WARN_UNUSED __attribute__((warn_unused_result))
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#endif // MSC_VER
#endif // COMMON_DEFS_H
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsoncharutils.h */
#ifndef SIMDJSON_JSONCHARUTILS_H
@ -263,7 +268,7 @@ const char digittoval[256] = {
}*/
// returns a value with the highest bit set if it is not valid
uint32_t hex_to_u32_nocheck(const uint8_t *src) {
static inline uint32_t hex_to_u32_nocheck(const uint8_t *src) {// strictly speaking, static inline is a C-ism
uint8_t v1 = src[0];
uint8_t v2 = src[1];
uint8_t v3 = src[2];
@ -35777,269 +35782,31 @@ public:
// create a ParsedJson container with zero capacity, call allocateCapacity to
// allocate memory
ParsedJson()
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
current_loc(0), n_structural_indexes(0),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
ParsedJson();
~ParsedJson();
ParsedJson(ParsedJson && p);
// if needed, allocate memory so that the object is able to process JSON
// documents having up to len butes and maxdepth "depth"
WARN_UNUSED
inline bool allocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) {
if ((maxdepth == 0) || (len == 0)) {
std::cerr << "capacities must be non-zero " << std::endl;
return false;
}
if (len > 0) {
if ((len <= bytecapacity) && (depthcapacity < maxdepth))
return true;
deallocate();
}
isvalid = false;
bytecapacity = 0; // will only set it to len after allocations are a success
n_structural_indexes = 0;
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
structural_indexes = new uint32_t[max_structures];
size_t localtapecapacity = ROUNDUP_N(len, 64);
size_t localstringcapacity = ROUNDUP_N(len, 64);
string_buf = new uint8_t[localstringcapacity];
tape = new uint64_t[localtapecapacity];
containing_scope_offset = new uint32_t[maxdepth];
#ifdef SIMDJSON_USE_COMPUTED_GOTO
ret_address = new void *[maxdepth];
#else
ret_address = new char[maxdepth];
#endif
if ((string_buf == NULL) || (tape == NULL) ||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) {
std::cerr << "Could not allocate memory" << std::endl;
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
return false;
}
bool allocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH);
bytecapacity = len;
depthcapacity = maxdepth;
tapecapacity = localtapecapacity;
stringcapacity = localstringcapacity;
return true;
}
bool isValid() const {
return isvalid;
}
bool isValid() const;
// deallocate memory and set capacity to zero, called automatically by the
// destructor
void deallocate() {
bytecapacity = 0;
depthcapacity = 0;
tapecapacity = 0;
stringcapacity = 0;
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
isvalid = false;
}
~ParsedJson() { deallocate(); }
void deallocate();
// this should be called when parsing (right before writing the tapes)
void init() {
current_string_buf_loc = string_buf;
current_loc = 0;
isvalid = false;
}
void init();
// print the json to stdout (should be valid)
// return false if the tape is likely wrong (e.g., you did not parse a valid
// JSON).
WARN_UNUSED
bool printjson(std::ostream &os) {
if(!isvalid) return false;
size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56);
size_t howmany = 0;
if (type == 'r') {
howmany = tape_val & JSONVALUEMASK;
} else {
fprintf(stderr, "Error: no starting root node?");
return false;
}
if (howmany > tapecapacity) {
fprintf(stderr,
"We may be exceeding the tape capacity. Is this a valid document?\n");
return false;
}
tapeidx++;
bool *inobject = new bool[depthcapacity];
size_t *inobjectidx = new size_t[depthcapacity];
int depth = 1; // only root at level 0
inobjectidx[depth] = 0;
inobject[depth] = false;
for (; tapeidx < howmany; tapeidx++) {
tape_val = tape[tapeidx];
uint64_t payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
if (!inobject[depth]) {
if ((inobjectidx[depth] > 0) && (type != ']'))
os << ",";
inobjectidx[depth]++;
} else { // if (inobject) {
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
(type != '}'))
os << ",";
if (((inobjectidx[depth] & 1) == 1))
os << ":";
inobjectidx[depth]++;
}
switch (type) {
case '"': // we have a string
os << '"';
print_with_escapes((const unsigned char *)(string_buf + payload));
os << '"';
break;
case 'l': // we have a long int
if (tapeidx + 1 >= howmany)
return false;
os << (int64_t)tape[++tapeidx];
break;
case 'd': // we have a double
if (tapeidx + 1 >= howmany)
return false;
double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer;
break;
case 'n': // we have a null
os << "null";
break;
case 't': // we have a true
os << "true";
break;
case 'f': // we have a false
os << "false";
break;
case '{': // we have an object
os << '{';
depth++;
inobject[depth] = true;
inobjectidx[depth] = 0;
break;
case '}': // we end an object
depth--;
os << '}';
break;
case '[': // we start an array
os << '[';
depth++;
inobject[depth] = false;
inobjectidx[depth] = 0;
break;
case ']': // we end an array
depth--;
os << ']';
break;
case 'r': // we start and end with the root node
fprintf(stderr, "should we be hitting the root node?\n");
delete[] inobject;
delete[] inobjectidx;
return false;
default:
fprintf(stderr, "bug %c\n", type);
delete[] inobject;
delete[] inobjectidx;
return false;
}
}
delete[] inobject;
delete[] inobjectidx;
return true;
}
bool printjson(std::ostream &os);
WARN_UNUSED
bool dump_raw_tape(std::ostream &os) {
if(!isvalid) return false;
size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56);
os << tapeidx << " : " << type;
tapeidx++;
size_t howmany = 0;
if (type == 'r') {
howmany = tape_val & JSONVALUEMASK;
} else {
fprintf(stderr, "Error: no starting root node?");
return false;
}
os << "\t// pointing to " << howmany <<" (right after last node)\n";
uint64_t payload;
for (; tapeidx < howmany; tapeidx++) {
os << tapeidx << " : ";
tape_val = tape[tapeidx];
payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
switch (type) {
case '"': // we have a string
os << "string \"";
print_with_escapes((const unsigned char *)(string_buf + payload));
os << '"';
os << '\n';
break;
case 'l': // we have a long int
if (tapeidx + 1 >= howmany)
return false;
os << "integer " << (int64_t)tape[++tapeidx] << "\n";
break;
case 'd': // we have a double
os << "float ";
if (tapeidx + 1 >= howmany)
return false;
double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer << '\n';
break;
case 'n': // we have a null
os << "null\n";
break;
case 't': // we have a true
os << "true\n";
break;
case 'f': // we have a false
os << "false\n";
break;
case '{': // we have an object
os << "{\t// pointing to next tape location " << payload << " (first node after the scope) \n";
break;
case '}': // we end an object
os << "}\t// pointing to previous tape location " << payload << " (start of the scope) \n";
break;
case '[': // we start an array
os << "[\t// pointing to next tape location " << payload << " (first node after the scope) \n";
break;
case ']': // we end an array
os << "]\t// pointing to previous tape location " << payload << " (start of the scope) \n";
break;
case 'r': // we start and end with the root node
printf("end of root\n");
return false;
default:
return false;
}
}
tape_val = tape[tapeidx];
payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
return true;
}
bool dump_raw_tape(std::ostream &os);
// all nodes are stored on the tape using a 64-bit word.
@ -36058,12 +35825,12 @@ public:
// this should be considered a private function
really_inline void write_tape(uint64_t val, uint8_t c) {
tape[current_loc++] = val | (((uint64_t)c) << 56);
tape[current_loc++] = val | (((uint64_t)c) << 56);
}
really_inline void write_tape_s64(int64_t i) {
write_tape(0, 'l');
tape[current_loc++] = *((uint64_t *)&i);
write_tape(0, 'l');
tape[current_loc++] = *((uint64_t *)&i);
}
really_inline void write_tape_double(double d) {
@ -36076,185 +35843,70 @@ public:
really_inline uint32_t get_current_loc() { return current_loc; }
really_inline void annotate_previousloc(uint32_t saved_loc, uint64_t val) {
tape[saved_loc] |= val;
tape[saved_loc] |= val;
}
struct iterator {
explicit iterator(ParsedJson &pj_);
~iterator();
explicit iterator(ParsedJson &pj_)
: pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) {
if(pj.isValid()) {
depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex == NULL) return;
depthindex[0].start_of_scope = location;
current_val = pj.tape[location++];
current_type = (current_val >> 56);
depthindex[0].scope_type = current_type;
if (current_type == 'r') {
tape_length = current_val & JSONVALUEMASK;
if(location < tape_length) {
current_val = pj.tape[location];
current_type = (current_val >> 56);
depth++;
depthindex[depth].start_of_scope = location;
depthindex[depth].scope_type = current_type;
}
}
}
}
~iterator() {
delete[] depthindex;
}
iterator(const iterator &o);
iterator(const iterator &o):
pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(NULL) {
depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex != NULL) {
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
} else {
tape_length = 0;
}
}
iterator(iterator &&o);
iterator(iterator &&o):
pj(o.pj), depth(std::move(o.depth)), location(std::move(o.location)),
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)),
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) {
o.depthindex = NULL;// we take ownership
}
WARN_UNUSED
bool isOk() const {
return location < tape_length;
}
bool isOk() const;
// useful for debuging purposes
size_t get_tape_location() const {
return location;
}
size_t get_tape_location() const;
// useful for debuging purposes
size_t get_tape_length() const {
return tape_length;
}
size_t get_tape_length() const;
// returns the current depth (start at 1 with 0 reserved for the fictitious root node)
size_t get_depth() const {
return depth;
}
size_t get_depth() const;
// A scope is a series of nodes at the same depth, typically it is either an object ({) or an array ([).
// The root node has type 'r'.
uint8_t get_scope_type() const {
return depthindex[depth].scope_type;
}
uint8_t get_scope_type() const;
// move forward in document order
bool move_forward() {
if(location + 1 >= tape_length) {
return false; // we are at the end!
}
// we are entering a new scope
if ((current_type == '[') || (current_type == '{')){
depth++;
depthindex[depth].start_of_scope = location;
depthindex[depth].scope_type = current_type;
}
location = location + 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
// if we encounter a scope closure, we need to move up
while ((current_type == ']') || (current_type == '}')) {
if(location + 1 >= tape_length) {
return false; // we are at the end!
}
depth--;
if(depth == 0) {
return false; // should not be necessary
}
location = location + 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
}
return true;
}
bool move_forward();
// retrieve the character code of what we're looking at:
// [{"sltfn are the possibilities
really_inline uint8_t get_type() const {
return current_type;
}
uint8_t get_type() const;
// get the int64_t value at this node; valid only if we're at "l"
really_inline int64_t get_integer() const {
if(location + 1 >= tape_length) return 0;// default value in case of error
return (int64_t) pj.tape[location + 1];
}
int64_t get_integer() const;
// get the string value at this node (NULL ended); valid only if we're at "
// note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
// return value is valid UTF-8
const char * get_string() const;
// get the double value at this node; valid only if
// we're at "d"
really_inline double get_double() const {
if(location + 1 >= tape_length) return NAN;// default value in case of error
double answer;
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
return answer;
}
double get_double() const;
bool is_object_or_array() const {
return is_object_or_array(get_type());
}
bool is_object_or_array() const;
bool is_object() const {
return get_type() == '{';
}
bool is_object() const;
bool is_array() const {
return get_type() == '[';
}
bool is_array() const;
bool is_string() const {
return get_type() == '"';
}
bool is_string() const;
bool is_integer() const {
return get_type() == 'l';
}
bool is_integer() const;
bool is_double() const {
return get_type() == 'd';
}
bool is_double() const;
static bool is_object_or_array(uint8_t type) {
return (type == '[' || (type == '{'));
}
static bool is_object_or_array(uint8_t type);
// when at {, go one level deep, looking for a given key
// if successful, we are left pointing at the value,
// if not, we are still pointing at the object ({)
// (in case of repeated keys, this only finds the first one)
bool move_to_key(const char * key) {
if(down()) {
do {
assert(is_string());
bool rightkey = (strcmp(get_string(),key)==0);
next();
if(rightkey) return true;
} while(next());
assert(up());// not found
}
return false;
}
// get the string value at this node (NULL ended); valid only if we're at "
// note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
// return value is valid UTF-8
really_inline const char * get_string() const {
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
}
bool move_to_key(const char * key);
// throughout return true if we can do the navigation, false
// otherwise
@ -36264,153 +35916,36 @@ public:
// Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { and [.
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
// valid if we're not at the end of a scope (returns true).
really_inline bool next() {
if ((current_type == '[') || (current_type == '{')){
// we need to jump
size_t npos = ( current_val & JSONVALUEMASK);
if(npos >= tape_length) {
return false; // shoud never happen unless at the root
}
uint64_t nextval = pj.tape[npos];
uint8_t nexttype = (nextval >> 56);
if((nexttype == ']') || (nexttype == '}')) {
return false; // we reached the end of the scope
}
location = npos;
current_val = nextval;
current_type = nexttype;
return true;
} else {
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
if(location + increment >= tape_length) return false;
uint64_t nextval = pj.tape[location + increment];
uint8_t nexttype = (nextval >> 56);
if((nexttype == ']') || (nexttype == '}')) {
return false; // we reached the end of the scope
}
location = location + increment;
current_val = nextval;
current_type = nexttype;
return true;
}
}
bool next();
// Withing a given scope (series of nodes at the same depth within either an
// array or an object), we move backward.
// Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true when starting at the end
// of the scope.
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
really_inline bool prev() {
if(location - 1 < depthindex[depth].start_of_scope) return false;
location -= 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
if ((current_type == ']') || (current_type == '}')){
// we need to jump
size_t new_location = ( current_val & JSONVALUEMASK);
if(new_location < depthindex[depth].start_of_scope) {
return false; // shoud never happen
}
location = new_location;
current_val = pj.tape[location];
current_type = (current_val >> 56);
}
return true;
}
bool prev();
// Moves back to either the containing array or object (type { or [) from
// within a contained scope.
// Valid unless we are at the first level of the document
//
really_inline bool up() {
if(depth == 1) {
return false; // don't allow moving back to root
}
to_start_scope();
// next we just move to the previous value
depth--;
location -= 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
return true;
}
bool up();
// Valid if we're at a [ or { and it starts a non-empty scope; moves us to start of
// that deeper scope if it not empty.
// Thus, given [true, null, {"a":1}, [1,2]], if we are at the { node, we would move to the
// "a" node.
really_inline bool down() {
if(location + 1 >= tape_length) return false;
if ((current_type == '[') || (current_type == '{')) {
size_t npos = (current_val & JSONVALUEMASK);
if(npos == location + 2) {
return false; // we have an empty scope
}
depth++;
location = location + 1;
depthindex[depth].start_of_scope = location;
depthindex[depth].scope_type = current_type;
current_val = pj.tape[location];
current_type = (current_val >> 56);
return true;
}
return false;
}
bool down();
// move us to the start of our current scope,
// a scope is a series of nodes at the same level
void to_start_scope() {
location = depthindex[depth].start_of_scope;
current_val = pj.tape[location];
current_type = (current_val >> 56);
}
void to_start_scope();
// void to_end_scope(); // move us to
// the start of our current scope; always succeeds
// print the thing we're currently pointing at
bool print(std::ostream &os, bool escape_strings = true) const {
if(!isOk()) return false;
switch (current_type) {
case '"': // we have a string
os << '"';
if(escape_strings) {
print_with_escapes(get_string(), os);
} else {
os << get_string();
}
os << '"';
break;
case 'l': // we have a long int
os << get_integer();
break;
case 'd':
os << get_double();
break;
case 'n': // we have a null
os << "null";
break;
case 't': // we have a true
os << "true";
break;
case 'f': // we have a false
os << "false";
break;
case '{': // we have an object
case '}': // we end an object
case '[': // we start an array
case ']': // we end an array
os << (char) current_type;
break;
default:
return false;
}
return true;
}
bool print(std::ostream &os, bool escape_strings = true) const;
typedef struct {size_t start_of_scope; uint8_t scope_type;} scopeindex_t;
private:
@ -36424,10 +35959,8 @@ private:
uint8_t current_type;
uint64_t current_val;
scopeindex_t *depthindex;
};
size_t bytecapacity; // indicates how many bits are meant to be supported
size_t depthcapacity; // how deep we can go
@ -36450,36 +35983,12 @@ private:
uint8_t *current_string_buf_loc;
bool isvalid;
ParsedJson(ParsedJson && p)
: bytecapacity(std::move(p.bytecapacity)),
depthcapacity(std::move(p.depthcapacity)),
tapecapacity(std::move(p.tapecapacity)),
stringcapacity(std::move(p.stringcapacity)),
current_loc(std::move(p.current_loc)),
n_structural_indexes(std::move(p.n_structural_indexes)),
structural_indexes(std::move(p.structural_indexes)),
tape(std::move(p.tape)),
containing_scope_offset(std::move(p.containing_scope_offset)),
ret_address(std::move(p.ret_address)),
string_buf(std::move(p.string_buf)),
current_string_buf_loc(std::move(p.current_string_buf_loc)),
isvalid(std::move(p.isvalid)) {
p.structural_indexes=NULL;
p.tape=NULL;
p.containing_scope_offset=NULL;
p.ret_address=NULL;
p.string_buf=NULL;
p.current_string_buf_loc=NULL;
}
private :
// we don't want the default constructor to be called
ParsedJson(const ParsedJson & p); // we don't want the default constructor to be called
// we don't want the assignment to be called
ParsedJson & operator=(const ParsedJson&o);
};
@ -37223,6 +36732,7 @@ void init_state_machine();
WARN_UNUSED
bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
WARN_UNUSED
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
return unified_machine((const uint8_t *)buf,len,pj);