Adding dynamic memory allocation.
This commit is contained in:
parent
8589a0588b
commit
c2913d5d69
20
README.md
20
README.md
|
@ -14,13 +14,29 @@ Goal: Speed up the parsing of JSON per se.
|
|||
const char * filename = ... //
|
||||
std::string_view p = get_corpus(filename);
|
||||
ParsedJson pj;
|
||||
size_t maxdepth = 1024; // support documents have nesting "depth" up to 1024
|
||||
pj.allocateCapacity(p.size(), maxdepth); // allocate memory for parsing up to p.size() bytes
|
||||
pj.allocateCapacity(p.size()); // allocate memory for parsing up to p.size() bytes
|
||||
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
|
||||
// parsing is done!
|
||||
// js can be reused with other json_parse calls.
|
||||
```
|
||||
|
||||
It is also possible to use a simply API if you do not mind having the overhead
|
||||
of memory allocation:
|
||||
|
||||
```C
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
/...
|
||||
|
||||
const char * filename = ... //
|
||||
std::string_view p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
// something went wrong
|
||||
}
|
||||
```
|
||||
|
||||
ParsedJson build_parsed_json(const std::string_view &s)
|
||||
|
||||
|
||||
## Usage
|
||||
|
|
|
@ -78,7 +78,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.size() << " bytes)" << endl;
|
||||
ParsedJson pj;
|
||||
bool allocok = pj.allocateCapacity(p.size(), 1024);
|
||||
bool allocok = pj.allocateCapacity(p.size());
|
||||
if(!allocok) {
|
||||
std::cerr << "failed to allocate memory" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
|
|
@ -89,6 +89,7 @@ int main(int argc, char *argv[]) {
|
|||
int repeat = 10;
|
||||
int volume = p.size();
|
||||
BEST_TIME("simdjson", json_parse(p, pj), true, , repeat, volume, true);
|
||||
BEST_TIME("simdjson (with dyn alloc) ", build_parsed_json(p).isValid(), true, , repeat, volume, true);
|
||||
|
||||
rapidjson::Document d;
|
||||
|
||||
|
|
|
@ -8,7 +8,8 @@
|
|||
#include "simdjson/stage34_unified.h"
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused.
|
||||
// The string should be NULL terminated.
|
||||
WARN_UNUSED
|
||||
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);
|
||||
|
@ -23,3 +24,20 @@ WARN_UNUSED
|
|||
static inline bool json_parse(const std::string_view &s, ParsedJson &pj) {
|
||||
return json_parse(s.data(), s.size(), pj);
|
||||
}
|
||||
|
||||
|
||||
// Build a ParsedJson object. You can check validity
|
||||
// by calling pj.isValid(). This does memory allocation.
|
||||
WARN_UNUSED
|
||||
ParsedJson build_parsed_json(const u8 *buf, size_t len);
|
||||
|
||||
WARN_UNUSED
|
||||
static inline ParsedJson build_parsed_json(const char * buf, size_t len) {
|
||||
return build_parsed_json((const u8 *) buf, len);
|
||||
}
|
||||
|
||||
// convenience function
|
||||
WARN_UNUSED
|
||||
static inline ParsedJson build_parsed_json(const std::string_view &s) {
|
||||
return build_parsed_json(s.data(), s.size());
|
||||
}
|
|
@ -14,26 +14,12 @@
|
|||
|
||||
#include "simdjson/jsonformatutils.h"
|
||||
|
||||
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF;
|
||||
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF
|
||||
|
||||
#define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
|
||||
|
||||
struct ParsedJson {
|
||||
public:
|
||||
size_t bytecapacity; // indicates how many bits are meant to be supported by
|
||||
// structurals
|
||||
size_t depthcapacity; // how deep we can go
|
||||
size_t tapecapacity;
|
||||
size_t stringcapacity;
|
||||
u32 current_loc;
|
||||
u8 *structurals;
|
||||
u32 n_structural_indexes;
|
||||
u32 *structural_indexes;
|
||||
|
||||
u64 *tape;
|
||||
u32 *containing_scope_offset;
|
||||
void **ret_address;
|
||||
|
||||
u8 *string_buf; // should be at least bytecapacity
|
||||
u8 *current_string_buf_loc;
|
||||
|
||||
// create a ParsedJson container with zero capacity, call allocateCapacity to
|
||||
// allocate memory
|
||||
|
@ -41,12 +27,12 @@ public:
|
|||
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
|
||||
current_loc(0), structurals(NULL), n_structural_indexes(0),
|
||||
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
|
||||
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL) {}
|
||||
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
|
||||
|
||||
// if needed, allocate memory so that the object is able to process JSON
|
||||
// documents having up to len butes and maxdepth "depth"
|
||||
WARN_UNUSED
|
||||
inline bool allocateCapacity(size_t len, size_t maxdepth) {
|
||||
inline bool allocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) {
|
||||
if ((maxdepth == 0) || (len == 0)) {
|
||||
std::cerr << "capacities must be non-zero " << std::endl;
|
||||
return false;
|
||||
|
@ -56,6 +42,7 @@ public:
|
|||
return true;
|
||||
deallocate();
|
||||
}
|
||||
isvalid = false;
|
||||
bytecapacity = 0; // will only set it to len after allocations are a success
|
||||
if (posix_memalign((void **)&structurals, 8, ROUNDUP_N(len, 64) / 8)) {
|
||||
std::cerr << "Could not allocate memory for structurals" << std::endl;
|
||||
|
@ -97,6 +84,10 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
bool isValid() const {
|
||||
return isvalid;
|
||||
}
|
||||
|
||||
// deallocate memory and set capacity to zero, called automatically by the
|
||||
// destructor
|
||||
void deallocate() {
|
||||
|
@ -110,6 +101,7 @@ public:
|
|||
delete[] string_buf;
|
||||
delete[] structural_indexes;
|
||||
free(structurals);
|
||||
isvalid = false;
|
||||
}
|
||||
|
||||
~ParsedJson() { deallocate(); }
|
||||
|
@ -118,6 +110,7 @@ public:
|
|||
void init() {
|
||||
current_string_buf_loc = string_buf;
|
||||
current_loc = 0;
|
||||
isvalid = false;
|
||||
}
|
||||
|
||||
// print the json to stdout (should be valid)
|
||||
|
@ -125,6 +118,7 @@ public:
|
|||
// JSON).
|
||||
WARN_UNUSED
|
||||
bool printjson() {
|
||||
if(!isvalid) return false;
|
||||
size_t tapeidx = 0;
|
||||
u64 tape_val = tape[tapeidx];
|
||||
u8 type = (tape_val >> 56);
|
||||
|
@ -227,6 +221,7 @@ public:
|
|||
|
||||
WARN_UNUSED
|
||||
bool dump_raw_tape() {
|
||||
if(!isvalid) return false;
|
||||
size_t tapeidx = 0;
|
||||
u64 tape_val = tape[tapeidx++];
|
||||
u8 type = (tape_val >> 56);
|
||||
|
@ -374,6 +369,32 @@ public:
|
|||
};
|
||||
|
||||
#endif
|
||||
|
||||
size_t bytecapacity; // indicates how many bits are meant to be supported by
|
||||
// structurals
|
||||
|
||||
size_t depthcapacity; // how deep we can go
|
||||
size_t tapecapacity;
|
||||
size_t stringcapacity;
|
||||
u32 current_loc;
|
||||
u8 *structurals;
|
||||
u32 n_structural_indexes;
|
||||
|
||||
u32 *structural_indexes;
|
||||
|
||||
u64 *tape;
|
||||
u32 *containing_scope_offset;
|
||||
void **ret_address;
|
||||
|
||||
u8 *string_buf; // should be at least bytecapacity
|
||||
u8 *current_string_buf_loc;
|
||||
bool isvalid;
|
||||
ParsedJson(const ParsedJson && p); // we don't want the default constructor to be called
|
||||
|
||||
private :
|
||||
ParsedJson(const ParsedJson & p); // we don't want the default constructor to be called
|
||||
|
||||
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
|
|
|
@ -23,3 +23,15 @@ bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
|
|||
return isok;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
ParsedJson build_parsed_json(const u8 *buf, size_t len) {
|
||||
ParsedJson pj;
|
||||
bool ok = pj.allocateCapacity(len);
|
||||
if(ok) {
|
||||
ok = json_parse(buf, len, pj);
|
||||
assert(ok == pj.isValid());
|
||||
} else {
|
||||
std::cerr << "failure during memory allocation " << std::endl;
|
||||
}
|
||||
return pj;
|
||||
}
|
||||
|
|
|
@ -432,8 +432,9 @@ succeed:
|
|||
|
||||
|
||||
#ifdef DEBUG
|
||||
pj.dump_tapes();
|
||||
pj.dump_raw_tape();
|
||||
#endif
|
||||
pj.isvalid = true;
|
||||
return true;
|
||||
|
||||
fail:
|
||||
|
|
Loading…
Reference in New Issue