Adding dynamic memory allocation.

This commit is contained in:
Daniel Lemire 2018-12-06 21:44:26 -05:00
parent 8589a0588b
commit c2913d5d69
7 changed files with 93 additions and 24 deletions

View File

@ -14,13 +14,29 @@ Goal: Speed up the parsing of JSON per se.
const char * filename = ... //
std::string_view p = get_corpus(filename);
ParsedJson pj;
size_t maxdepth = 1024; // support documents have nesting "depth" up to 1024
pj.allocateCapacity(p.size(), maxdepth); // allocate memory for parsing up to p.size() bytes
pj.allocateCapacity(p.size()); // allocate memory for parsing up to p.size() bytes
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
// parsing is done!
// js can be reused with other json_parse calls.
```
It is also possible to use a simply API if you do not mind having the overhead
of memory allocation:
```C
#include "simdjson/jsonparser.h"
/...
const char * filename = ... //
std::string_view p = get_corpus(filename);
ParsedJson pj = build_parsed_json(p); // do the parsing
if( ! pj.isValid() ) {
// something went wrong
}
```
ParsedJson build_parsed_json(const std::string_view &s)
## Usage

View File

@ -78,7 +78,7 @@ int main(int argc, char *argv[]) {
}
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.size() << " bytes)" << endl;
ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
bool allocok = pj.allocateCapacity(p.size());
if(!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
return EXIT_FAILURE;

View File

@ -89,6 +89,7 @@ int main(int argc, char *argv[]) {
int repeat = 10;
int volume = p.size();
BEST_TIME("simdjson", json_parse(p, pj), true, , repeat, volume, true);
BEST_TIME("simdjson (with dyn alloc) ", build_parsed_json(p).isValid(), true, , repeat, volume, true);
rapidjson::Document d;

View File

@ -8,7 +8,8 @@
#include "simdjson/stage34_unified.h"
// Parse a document found in buf, need to preallocate ParsedJson.
// Return false in case of a failure.
// Return false in case of a failure. You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused.
// The string should be NULL terminated.
WARN_UNUSED
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);
@ -23,3 +24,20 @@ WARN_UNUSED
static inline bool json_parse(const std::string_view &s, ParsedJson &pj) {
return json_parse(s.data(), s.size(), pj);
}
// Build a ParsedJson object. You can check validity
// by calling pj.isValid(). This does memory allocation.
WARN_UNUSED
ParsedJson build_parsed_json(const u8 *buf, size_t len);
WARN_UNUSED
static inline ParsedJson build_parsed_json(const char * buf, size_t len) {
return build_parsed_json((const u8 *) buf, len);
}
// convenience function
WARN_UNUSED
static inline ParsedJson build_parsed_json(const std::string_view &s) {
return build_parsed_json(s.data(), s.size());
}

View File

@ -14,26 +14,12 @@
#include "simdjson/jsonformatutils.h"
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF;
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF
#define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
struct ParsedJson {
public:
size_t bytecapacity; // indicates how many bits are meant to be supported by
// structurals
size_t depthcapacity; // how deep we can go
size_t tapecapacity;
size_t stringcapacity;
u32 current_loc;
u8 *structurals;
u32 n_structural_indexes;
u32 *structural_indexes;
u64 *tape;
u32 *containing_scope_offset;
void **ret_address;
u8 *string_buf; // should be at least bytecapacity
u8 *current_string_buf_loc;
// create a ParsedJson container with zero capacity, call allocateCapacity to
// allocate memory
@ -41,12 +27,12 @@ public:
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
current_loc(0), structurals(NULL), n_structural_indexes(0),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL) {}
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
// if needed, allocate memory so that the object is able to process JSON
// documents having up to len butes and maxdepth "depth"
WARN_UNUSED
inline bool allocateCapacity(size_t len, size_t maxdepth) {
inline bool allocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) {
if ((maxdepth == 0) || (len == 0)) {
std::cerr << "capacities must be non-zero " << std::endl;
return false;
@ -56,6 +42,7 @@ public:
return true;
deallocate();
}
isvalid = false;
bytecapacity = 0; // will only set it to len after allocations are a success
if (posix_memalign((void **)&structurals, 8, ROUNDUP_N(len, 64) / 8)) {
std::cerr << "Could not allocate memory for structurals" << std::endl;
@ -97,6 +84,10 @@ public:
return true;
}
bool isValid() const {
return isvalid;
}
// deallocate memory and set capacity to zero, called automatically by the
// destructor
void deallocate() {
@ -110,6 +101,7 @@ public:
delete[] string_buf;
delete[] structural_indexes;
free(structurals);
isvalid = false;
}
~ParsedJson() { deallocate(); }
@ -118,6 +110,7 @@ public:
void init() {
current_string_buf_loc = string_buf;
current_loc = 0;
isvalid = false;
}
// print the json to stdout (should be valid)
@ -125,6 +118,7 @@ public:
// JSON).
WARN_UNUSED
bool printjson() {
if(!isvalid) return false;
size_t tapeidx = 0;
u64 tape_val = tape[tapeidx];
u8 type = (tape_val >> 56);
@ -227,6 +221,7 @@ public:
WARN_UNUSED
bool dump_raw_tape() {
if(!isvalid) return false;
size_t tapeidx = 0;
u64 tape_val = tape[tapeidx++];
u8 type = (tape_val >> 56);
@ -374,6 +369,32 @@ public:
};
#endif
size_t bytecapacity; // indicates how many bits are meant to be supported by
// structurals
size_t depthcapacity; // how deep we can go
size_t tapecapacity;
size_t stringcapacity;
u32 current_loc;
u8 *structurals;
u32 n_structural_indexes;
u32 *structural_indexes;
u64 *tape;
u32 *containing_scope_offset;
void **ret_address;
u8 *string_buf; // should be at least bytecapacity
u8 *current_string_buf_loc;
bool isvalid;
ParsedJson(const ParsedJson && p); // we don't want the default constructor to be called
private :
ParsedJson(const ParsedJson & p); // we don't want the default constructor to be called
};
#ifdef DEBUG

View File

@ -23,3 +23,15 @@ bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
return isok;
}
WARN_UNUSED
ParsedJson build_parsed_json(const u8 *buf, size_t len) {
ParsedJson pj;
bool ok = pj.allocateCapacity(len);
if(ok) {
ok = json_parse(buf, len, pj);
assert(ok == pj.isValid());
} else {
std::cerr << "failure during memory allocation " << std::endl;
}
return pj;
}

View File

@ -432,8 +432,9 @@ succeed:
#ifdef DEBUG
pj.dump_tapes();
pj.dump_raw_tape();
#endif
pj.isvalid = true;
return true;
fail: