Make valstat-ish parse APIs
This commit is contained in:
parent
bc8bc7d1a8
commit
1f76737510
8
Makefile
8
Makefile
|
@ -70,10 +70,10 @@ LIBHEADERS_HASWELL= src/haswell/bitmanipulation.h src/haswell/bitmask.h src/h
|
|||
LIBHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
|
||||
LIBHEADERS=src/jsoncharutils.h src/simdprune_tables.h $(LIBHEADERS_GENERIC) $(LIBHEADERS_ARM64) $(LIBHEADERS_HASWELL) $(LIBHEADERS_WESTMERE)
|
||||
|
||||
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document/iterator.h include/simdjson/document/parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/architecture.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
|
||||
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document_iterator.h include/simdjson/document_parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/architecture.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
|
||||
HEADERS=$(PUBHEADERS) $(LIBHEADERS)
|
||||
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document/parser.cpp
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document_parser.cpp
|
||||
MINIFIERHEADERS=include/simdjson/jsonminifier.h
|
||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||
|
||||
|
@ -205,7 +205,7 @@ basictests:tests/basictests.cpp $(HEADERS) $(LIBFILES)
|
|||
|
||||
|
||||
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
|
||||
$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document_parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
|
||||
|
||||
integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o integer_tests $(LIBFILES) tests/integer_tests.cpp -I. $(LIBFLAGS)
|
||||
|
@ -213,7 +213,7 @@ integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
|
|||
|
||||
|
||||
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
|
||||
$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document_parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
|
||||
|
||||
pointercheck:tests/pointercheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o pointercheck $(LIBFILES) tests/pointercheck.cpp -I. $(LIBFLAGS)
|
||||
|
|
32
README.md
32
README.md
|
@ -121,35 +121,29 @@ Another strategy is to reuse pre-allocated buffers. That is, you avoid reallocat
|
|||
|
||||
The main API involves populating a `ParsedJson` object which hosts a fully navigable document-object-model (DOM) view of the JSON document. The DOM can be accessed using [JSON Pointer](https://tools.ietf.org/html/rfc6901) paths, for example. The main function is `json_parse` which takes a string containing the JSON document as well as a reference to pre-allocated `ParsedJson` object (which can be reused multiple time). Once you have populated the `ParsedJson` object you can navigate through the DOM with an iterator (e.g., created by `ParsedJson::Iterator pjh(pj)`, see 'Navigating the parsed document').
|
||||
|
||||
```C
|
||||
#include "simdjson/jsonparser.h"
|
||||
using namespace simdjson;
|
||||
// Samples:
|
||||
// Load a document from a file
|
||||
// Read a particular key / value from the document
|
||||
// Iterate over an array of things
|
||||
|
||||
/...
|
||||
|
||||
const char * filename = ... //
|
||||
|
||||
// use whatever means you want to get a string (UTF-8) of your JSON document
|
||||
padded_string p = get_corpus(filename);
|
||||
ParsedJson pj;
|
||||
pj.allocate_capacity(p.size()); // allocate memory for parsing up to p.size() bytes
|
||||
const int res = json_parse(p, pj); // do the parsing, return 0 on success
|
||||
// parsing is done!
|
||||
if (res != 0) {
|
||||
// You can use the "simdjson/simdjson.h" header to access the error message
|
||||
std::cout << "Error parsing:" << simdjson::error_message(res) << std::endl;
|
||||
```c++
|
||||
#include "simdjson.h"
|
||||
auto doc = simdjson::document::load("myfile.json");
|
||||
cout << doc;
|
||||
for (auto i=doc.begin(); i<doc.end(); i++) {
|
||||
cout << doc[i];
|
||||
}
|
||||
// the ParsedJson document can be used here
|
||||
// pj can be reused with other json_parse calls.
|
||||
```
|
||||
|
||||
It is also possible to use a simpler API if you do not mind having the overhead
|
||||
A slightly simpler API is available if you don't mind having the overhead
|
||||
of memory allocation with each new JSON document:
|
||||
|
||||
```C
|
||||
#include "simdjson/jsonparser.h"
|
||||
using namespace simdjson;
|
||||
|
||||
document doc = document::parse("myfile.json");
|
||||
cout << doc;
|
||||
/...
|
||||
|
||||
const char * filename = ... //
|
||||
|
|
|
@ -23,7 +23,7 @@ jsonparser.cpp
|
|||
stage1_find_marks.cpp
|
||||
stage2_build_tape.cpp
|
||||
document.cpp
|
||||
document/parser.cpp
|
||||
document_parser.cpp
|
||||
"
|
||||
|
||||
# order matters
|
||||
|
@ -38,8 +38,8 @@ simdjson/padded_string.h
|
|||
simdjson/jsonioutil.h
|
||||
simdjson/jsonminifier.h
|
||||
simdjson/document.h
|
||||
simdjson/document/iterator.h
|
||||
simdjson/document/parser.h
|
||||
simdjson/document_iterator.h
|
||||
simdjson/document_parser.h
|
||||
simdjson/parsedjson.h
|
||||
simdjson/stage1_find_marks.h
|
||||
simdjson/stage2_build_tape.h
|
||||
|
@ -151,11 +151,11 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
const char * filename = argv[1];
|
||||
simdjson::padded_string p = simdjson::get_corpus(filename);
|
||||
simdjson::document doc;
|
||||
if (!simdjson::document::try_parse(p, doc)) { // do the parsing
|
||||
std::cout << "document::try_parse not valid" << std::endl;
|
||||
auto [doc, error] = simdjson::document::parse(p); // do the parsing
|
||||
if (error) {
|
||||
std::cout << "document::parse not valid" << std::endl;
|
||||
} else {
|
||||
std::cout << "document::try_parse valid" << std::endl;
|
||||
std::cout << "document::parse valid" << std::endl;
|
||||
}
|
||||
if(argc == 2) {
|
||||
return EXIT_SUCCESS;
|
||||
|
|
|
@ -68,8 +68,7 @@ simdjson_compute_stats(const simdjson::padded_string &p) {
|
|||
|
||||
__attribute__((noinline)) bool
|
||||
simdjson_just_parse(const simdjson::padded_string &p) {
|
||||
simdjson::document doc;
|
||||
return simdjson::document::try_parse(p, doc) == simdjson::SUCCESS;
|
||||
return simdjson::document::parse(p).error != simdjson::SUCCESS;
|
||||
}
|
||||
|
||||
void sajson_traverse(std::vector<int64_t> &answer, const sajson::value &node) {
|
||||
|
|
|
@ -2,8 +2,8 @@ set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
|
|||
set(SIMDJSON_INCLUDE
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/common_defs.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/document.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/document/iterator.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/document/parser.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/document_iterator.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/document_parser.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/isadetection.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonformatutils.h
|
||||
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
namespace simdjson {
|
||||
|
||||
template<size_t max_depth> class document_iterator;
|
||||
class document_parser;
|
||||
|
||||
class document {
|
||||
public:
|
||||
|
@ -27,6 +28,9 @@ public:
|
|||
document &operator=(const document &o) = delete;
|
||||
|
||||
using iterator = document_iterator<DEFAULT_MAX_DEPTH>;
|
||||
class parser;
|
||||
class doc_result;
|
||||
class doc_ref_result;
|
||||
|
||||
//
|
||||
// Tell whether this document has been parsed, or is just empty.
|
||||
|
@ -43,8 +47,6 @@ public:
|
|||
WARN_UNUSED
|
||||
bool dump_raw_tape(std::ostream &os) const;
|
||||
|
||||
class parser;
|
||||
|
||||
//
|
||||
// Parse a JSON document.
|
||||
//
|
||||
|
@ -53,35 +55,10 @@ public:
|
|||
//
|
||||
// Throws invalid_json if the JSON is invalid.
|
||||
//
|
||||
static document parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
|
||||
static document parse(const char *buf, size_t len, bool realloc_if_needed = true) {
|
||||
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
||||
}
|
||||
static document parse(const std::string &s, bool realloc_if_needed = true) {
|
||||
return parse(s.data(), s.length(), realloc_if_needed);
|
||||
}
|
||||
static document parse(const padded_string &s) {
|
||||
return parse(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
//
|
||||
// Parse a JSON document.
|
||||
//
|
||||
// If you will be parsing more than one JSON document, it's recommended to create a
|
||||
// document::parser object instead, keeping internal buffers around for efficiency reasons.
|
||||
//
|
||||
// Returns != SUCCESS if the JSON is invalid.
|
||||
//
|
||||
static WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
|
||||
static WARN_UNUSED error_code try_parse(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) {
|
||||
return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
|
||||
}
|
||||
static WARN_UNUSED error_code try_parse(const std::string &s, document &dst, bool realloc_if_needed = true) {
|
||||
return try_parse(s.data(), s.length(), dst, realloc_if_needed);
|
||||
}
|
||||
static WARN_UNUSED error_code try_parse(const padded_string &s, document &dst) {
|
||||
return try_parse(s.data(), s.length(), dst, false);
|
||||
}
|
||||
static doc_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
|
||||
static doc_result parse(const char *buf, size_t len, bool realloc_if_needed = true);
|
||||
static doc_result parse(const std::string &s, bool realloc_if_needed = true);
|
||||
static doc_result parse(const padded_string &s);
|
||||
|
||||
std::unique_ptr<uint64_t[]> tape;
|
||||
std::unique_ptr<uint8_t[]> string_buf;// should be at least byte_capacity
|
||||
|
@ -90,30 +67,46 @@ private:
|
|||
bool set_capacity(size_t len);
|
||||
};
|
||||
|
||||
class document::doc_result {
|
||||
private:
|
||||
doc_result(document &&_doc, error_code _error) : doc(std::move(_doc)), error(_error) { }
|
||||
doc_result(document &&_doc) : doc(std::move(_doc)), error(SUCCESS) { }
|
||||
doc_result(error_code _error) : doc(), error(_error) { }
|
||||
friend class document;
|
||||
public:
|
||||
~doc_result()=default;
|
||||
|
||||
operator bool() noexcept { return error == SUCCESS; }
|
||||
operator document() {
|
||||
if (!*this) {
|
||||
throw invalid_json(error);
|
||||
}
|
||||
return std::move(doc);
|
||||
}
|
||||
document doc;
|
||||
error_code error;
|
||||
};
|
||||
|
||||
class document::doc_ref_result {
|
||||
public:
|
||||
doc_ref_result(document &_doc, error_code _error) : doc(_doc), error(_error) { }
|
||||
~doc_ref_result()=default;
|
||||
|
||||
operator bool() noexcept { return error == SUCCESS; }
|
||||
operator document&() {
|
||||
if (!*this) {
|
||||
throw invalid_json(error);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
document& doc;
|
||||
error_code error;
|
||||
};
|
||||
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#include "simdjson/document/parser.h"
|
||||
#include "simdjson/document/iterator.h"
|
||||
|
||||
// Implementations
|
||||
namespace simdjson {
|
||||
|
||||
inline WARN_UNUSED document document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
|
||||
document::parser parser;
|
||||
if (!parser.allocate_capacity(len)) {
|
||||
throw invalid_json(parser.error = MEMALLOC);
|
||||
}
|
||||
return parser.parse_new(buf, len, realloc_if_needed);
|
||||
}
|
||||
|
||||
inline WARN_UNUSED error_code document::try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed) noexcept {
|
||||
document::parser parser;
|
||||
if (!parser.allocate_capacity(len)) {
|
||||
return parser.error = MEMALLOC;
|
||||
}
|
||||
return parser.try_parse_into(buf, len, dst, realloc_if_needed);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
#include "simdjson/document_parser.h"
|
||||
#include "simdjson/document_iterator.h"
|
||||
|
||||
#endif // SIMDJSON_DOCUMENT_H
|
|
@ -1,349 +0,0 @@
|
|||
#ifndef SIMDJSON_DOCUMENT_PARSER_H
|
||||
#define SIMDJSON_DOCUMENT_PARSER_H
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
#include "simdjson/document.h"
|
||||
#include "simdjson/padded_string.h"
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
class document::parser {
|
||||
public:
|
||||
//
|
||||
// Create a JSON parser with zero capacity. Call allocate_capacity() to initialize it.
|
||||
//
|
||||
parser()=default;
|
||||
~parser()=default;
|
||||
|
||||
// this is a move only class
|
||||
parser(parser &&p) = default;
|
||||
parser(const parser &p) = delete;
|
||||
parser &operator=(parser &&o) = default;
|
||||
parser &operator=(const parser &o) = delete;
|
||||
|
||||
//
|
||||
// Parse a JSON document and return a reference to it.
|
||||
//
|
||||
// The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
// documents because it reuses the same buffers, but you *must* use the document before you
|
||||
// destroy the parser or call parse() again.
|
||||
//
|
||||
// Throws invalid_json if the JSON is invalid.
|
||||
//
|
||||
const document &parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
|
||||
const document &parse(const char *buf, size_t len, bool realloc_if_needed = true) {
|
||||
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
||||
}
|
||||
const document &parse(const std::string &s, bool realloc_if_needed = true) {
|
||||
return parse(s.data(), s.length(), realloc_if_needed);
|
||||
}
|
||||
const document &parse(const padded_string &s) {
|
||||
return parse(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
//
|
||||
// Parse a JSON document and take the result.
|
||||
//
|
||||
// The document can be used even after the parser is deallocated or parse() is called again.
|
||||
//
|
||||
// Throws invalid_json if the JSON is invalid.
|
||||
//
|
||||
document parse_new(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
|
||||
document parse_new(const char *buf, size_t len, bool realloc_if_needed = true) {
|
||||
return parse_new((const uint8_t *)buf, len, realloc_if_needed);
|
||||
}
|
||||
document parse_new(const std::string &s, bool realloc_if_needed = true) {
|
||||
return parse_new(s.data(), s.length(), realloc_if_needed);
|
||||
}
|
||||
document parse_new(const padded_string &s) {
|
||||
return parse_new(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
//
|
||||
// Parse a JSON document and set doc to a pointer to it.
|
||||
//
|
||||
// The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
// documents because it reuses the same buffers, but you *must* use the document before you
|
||||
// destroy the parser or call parse() again.
|
||||
//
|
||||
// Returns != SUCCESS if the JSON is invalid.
|
||||
//
|
||||
WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept;
|
||||
WARN_UNUSED error_code try_parse(const char *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept {
|
||||
return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
|
||||
}
|
||||
WARN_UNUSED error_code try_parse(const std::string &s, const document *&dst, bool realloc_if_needed = true) noexcept {
|
||||
return try_parse(s.data(), s.length(), dst, realloc_if_needed);
|
||||
}
|
||||
WARN_UNUSED error_code try_parse(const padded_string &s, const document *&dst) noexcept {
|
||||
return try_parse(s.data(), s.length(), dst, false);
|
||||
}
|
||||
|
||||
//
|
||||
// Parse a JSON document and fill in dst.
|
||||
//
|
||||
// The document can be used even after the parser is deallocated or parse() is called again.
|
||||
//
|
||||
// Returns != SUCCESS if the JSON is invalid.
|
||||
//
|
||||
WARN_UNUSED error_code try_parse_into(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
|
||||
WARN_UNUSED error_code try_parse_into(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept {
|
||||
return try_parse_into((const uint8_t *)buf, len, dst, realloc_if_needed);
|
||||
}
|
||||
WARN_UNUSED error_code try_parse_into(const std::string &s, document &dst, bool realloc_if_needed = true) noexcept {
|
||||
return try_parse_into(s.data(), s.length(), dst, realloc_if_needed);
|
||||
}
|
||||
WARN_UNUSED error_code try_parse_into(const padded_string &s, document &dst) noexcept {
|
||||
return try_parse_into(s.data(), s.length(), dst, false);
|
||||
}
|
||||
|
||||
//
|
||||
// Current capacity: the largest document this parser can support without reallocating.
|
||||
//
|
||||
size_t capacity() {
|
||||
return _capacity;
|
||||
}
|
||||
|
||||
//
|
||||
// The maximum level of nested object and arrays supported by this parser.
|
||||
//
|
||||
size_t max_depth() {
|
||||
return _max_depth;
|
||||
}
|
||||
|
||||
// if needed, allocate memory so that the object is able to process JSON
|
||||
// documents having up to capacity bytes and max_depth "depth"
|
||||
WARN_UNUSED bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) {
|
||||
return set_capacity(capacity) && set_max_depth(max_depth);
|
||||
}
|
||||
|
||||
// type aliases for backcompat
|
||||
using Iterator = document::iterator;
|
||||
using InvalidJSON = invalid_json;
|
||||
|
||||
// Next location to write to in the tape
|
||||
uint32_t current_loc{0};
|
||||
|
||||
// structural indices passed from stage 1 to stage 2
|
||||
uint32_t n_structural_indexes{0};
|
||||
std::unique_ptr<uint32_t[]> structural_indexes;
|
||||
|
||||
// location and return address of each open { or [
|
||||
std::unique_ptr<uint32_t[]> containing_scope_offset;
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
std::unique_ptr<void*[]> ret_address;
|
||||
#else
|
||||
std::unique_ptr<char[]> ret_address;
|
||||
#endif
|
||||
|
||||
// Next place to write a string
|
||||
uint8_t *current_string_buf_loc;
|
||||
|
||||
bool valid{false};
|
||||
error_code error{simdjson::UNINITIALIZED};
|
||||
|
||||
// Document we're writing to
|
||||
document doc;
|
||||
|
||||
// returns true if the document parsed was valid
|
||||
bool is_valid() const;
|
||||
|
||||
// return an error code corresponding to the last parsing attempt, see
|
||||
// simdjson.h will return simdjson::UNITIALIZED if no parsing was attempted
|
||||
int get_error_code() const;
|
||||
|
||||
// return the string equivalent of "get_error_code"
|
||||
std::string get_error_message() const;
|
||||
|
||||
//
|
||||
// for backcompat with ParsedJson
|
||||
//
|
||||
|
||||
// print the json to std::ostream (should be valid)
|
||||
// return false if the tape is likely wrong (e.g., you did not parse a valid
|
||||
// JSON).
|
||||
WARN_UNUSED
|
||||
bool print_json(std::ostream &os) const;
|
||||
WARN_UNUSED
|
||||
bool dump_raw_tape(std::ostream &os) const;
|
||||
|
||||
// this should be called when parsing (right before writing the tapes)
|
||||
void init_stage2();
|
||||
|
||||
really_inline error_code on_error(error_code new_error_code) {
|
||||
error = new_error_code;
|
||||
return new_error_code;
|
||||
}
|
||||
really_inline error_code on_success(error_code success_code) {
|
||||
error = success_code;
|
||||
valid = true;
|
||||
return success_code;
|
||||
}
|
||||
really_inline bool on_start_document(uint32_t depth) {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, 'r');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_start_object(uint32_t depth) {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, '{');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_start_array(uint32_t depth) {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, '[');
|
||||
return true;
|
||||
}
|
||||
// TODO we're not checking this bool
|
||||
really_inline bool on_end_document(uint32_t depth) {
|
||||
// write our doc.tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
write_tape(containing_scope_offset[depth], 'r');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_end_object(uint32_t depth) {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope_offset[depth], '}');
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_end_array(uint32_t depth) {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope_offset[depth], ']');
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool on_true_atom() {
|
||||
write_tape(0, 't');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_false_atom() {
|
||||
write_tape(0, 'f');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_null_atom() {
|
||||
write_tape(0, 'n');
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline uint8_t *on_start_string() {
|
||||
/* we advance the point, accounting for the fact that we have a NULL
|
||||
* termination */
|
||||
write_tape(current_string_buf_loc - doc.string_buf.get(), '"');
|
||||
return current_string_buf_loc + sizeof(uint32_t);
|
||||
}
|
||||
|
||||
really_inline bool on_end_string(uint8_t *dst) {
|
||||
uint32_t str_length = dst - (current_string_buf_loc + sizeof(uint32_t));
|
||||
// TODO check for overflow in case someone has a crazy string (>=4GB?)
|
||||
// But only add the overflow check when the document itself exceeds 4GB
|
||||
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
|
||||
memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
|
||||
// NULL termination is still handy if you expect all your strings to
|
||||
// be NULL terminated? It comes at a small cost
|
||||
*dst = 0;
|
||||
current_string_buf_loc = dst + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool on_number_s64(int64_t value) {
|
||||
write_tape(0, 'l');
|
||||
std::memcpy(&doc.tape[current_loc], &value, sizeof(value));
|
||||
++current_loc;
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_number_u64(uint64_t value) {
|
||||
write_tape(0, 'u');
|
||||
doc.tape[current_loc++] = value;
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_number_double(double value) {
|
||||
write_tape(0, 'd');
|
||||
static_assert(sizeof(value) == sizeof(doc.tape[current_loc]), "mismatch size");
|
||||
memcpy(&doc.tape[current_loc++], &value, sizeof(double));
|
||||
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// Called before a parse is initiated.
|
||||
//
|
||||
// - Returns CAPACITY if the document is too large
|
||||
// - Returns MEMALLOC if we needed to allocate memory and could not
|
||||
//
|
||||
WARN_UNUSED error_code init_parse(size_t len);
|
||||
|
||||
const document &get_document() const {
|
||||
if (!is_valid()) {
|
||||
throw invalid_json(error);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
private:
|
||||
//
|
||||
// The maximum document length this parser supports.
|
||||
//
|
||||
// Buffers are large enough to handle any document up to this length.
|
||||
//
|
||||
size_t _capacity{0};
|
||||
|
||||
//
|
||||
// The maximum depth (number of nested objects and arrays) supported by this parser.
|
||||
//
|
||||
// Defaults to DEFAULT_MAX_DEPTH.
|
||||
//
|
||||
size_t _max_depth{0};
|
||||
|
||||
// all nodes are stored on the doc.tape using a 64-bit word.
|
||||
//
|
||||
// strings, double and ints are stored as
|
||||
// a 64-bit word with a pointer to the actual value
|
||||
//
|
||||
//
|
||||
//
|
||||
// for objects or arrays, store [ or { at the beginning and } and ] at the
|
||||
// end. For the openings ([ or {), we annotate them with a reference to the
|
||||
// location on the doc.tape of the end, and for then closings (} and ]), we
|
||||
// annotate them with a reference to the location of the opening
|
||||
//
|
||||
//
|
||||
|
||||
// this should be considered a private function
|
||||
really_inline void write_tape(uint64_t val, uint8_t c) {
|
||||
doc.tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
|
||||
}
|
||||
|
||||
really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
|
||||
doc.tape[saved_loc] |= val;
|
||||
}
|
||||
|
||||
WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept;
|
||||
|
||||
//
|
||||
// Set the current capacity: the largest document this parser can support without reallocating.
|
||||
//
|
||||
// This will allocate *or deallocate* as necessary.
|
||||
//
|
||||
// Returns false if allocation fails.
|
||||
//
|
||||
WARN_UNUSED bool set_capacity(size_t capacity);
|
||||
|
||||
//
|
||||
// Set the maximum level of nested object and arrays supported by this parser.
|
||||
//
|
||||
// This will allocate *or deallocate* as necessary.
|
||||
//
|
||||
// Returns false if allocation fails.
|
||||
//
|
||||
WARN_UNUSED bool set_max_depth(size_t max_depth);
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_DOCUMENT_PARSER_H
|
|
@ -0,0 +1,597 @@
|
|||
#ifndef SIMDJSON_DOCUMENT_PARSER_H
|
||||
#define SIMDJSON_DOCUMENT_PARSER_H
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
#include "simdjson/document.h"
|
||||
#include "simdjson/padded_string.h"
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
class document::parser {
|
||||
public:
|
||||
//
|
||||
// Create a JSON parser with zero capacity. Call allocate_capacity() to initialize it.
|
||||
//
|
||||
parser()=default;
|
||||
~parser()=default;
|
||||
|
||||
// this is a move only class
|
||||
parser(document::parser &&p) = default;
|
||||
parser(const document::parser &p) = delete;
|
||||
parser &operator=(document::parser &&o) = default;
|
||||
parser &operator=(const document::parser &o) = delete;
|
||||
|
||||
//
|
||||
// Parse a JSON document and return a reference to it.
|
||||
//
|
||||
// The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
// documents because it reuses the same buffers, but you *must* use the document before you
|
||||
// destroy the parser or call parse() again.
|
||||
//
|
||||
// Throws invalid_json if the JSON is invalid.
|
||||
//
|
||||
inline doc_ref_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
|
||||
inline doc_ref_result parse(const char *buf, size_t len, bool realloc_if_needed = true);
|
||||
inline doc_ref_result parse(const std::string &s, bool realloc_if_needed = true);
|
||||
inline doc_ref_result parse(const padded_string &s);
|
||||
|
||||
//
|
||||
// Current capacity: the largest document this parser can support without reallocating.
|
||||
//
|
||||
size_t capacity() {
|
||||
return _capacity;
|
||||
}
|
||||
|
||||
//
|
||||
// The maximum level of nested object and arrays supported by this parser.
|
||||
//
|
||||
size_t max_depth() {
|
||||
return _max_depth;
|
||||
}
|
||||
|
||||
// if needed, allocate memory so that the object is able to process JSON
|
||||
// documents having up to capacity bytes and max_depth "depth"
|
||||
WARN_UNUSED bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) {
|
||||
return set_capacity(capacity) && set_max_depth(max_depth);
|
||||
}
|
||||
|
||||
// type aliases for backcompat
|
||||
using Iterator = document::iterator;
|
||||
using InvalidJSON = invalid_json;
|
||||
class doc_result;
|
||||
|
||||
// Next location to write to in the tape
|
||||
uint32_t current_loc{0};
|
||||
|
||||
// structural indices passed from stage 1 to stage 2
|
||||
uint32_t n_structural_indexes{0};
|
||||
std::unique_ptr<uint32_t[]> structural_indexes;
|
||||
|
||||
// location and return address of each open { or [
|
||||
std::unique_ptr<uint32_t[]> containing_scope_offset;
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
std::unique_ptr<void*[]> ret_address;
|
||||
#else
|
||||
std::unique_ptr<char[]> ret_address;
|
||||
#endif
|
||||
|
||||
// Next place to write a string
|
||||
uint8_t *current_string_buf_loc;
|
||||
|
||||
bool valid{false};
|
||||
error_code error{simdjson::UNINITIALIZED};
|
||||
|
||||
// Document we're writing to
|
||||
document doc;
|
||||
|
||||
// returns true if the document parsed was valid
|
||||
bool is_valid() const;
|
||||
|
||||
// return an error code corresponding to the last parsing attempt, see
|
||||
// simdjson.h will return simdjson::UNITIALIZED if no parsing was attempted
|
||||
int get_error_code() const;
|
||||
|
||||
// return the string equivalent of "get_error_code"
|
||||
std::string get_error_message() const;
|
||||
|
||||
//
|
||||
// for backcompat with ParsedJson
|
||||
//
|
||||
|
||||
// print the json to std::ostream (should be valid)
|
||||
// return false if the tape is likely wrong (e.g., you did not parse a valid
|
||||
// JSON).
|
||||
WARN_UNUSED
|
||||
bool print_json(std::ostream &os) const;
|
||||
WARN_UNUSED
|
||||
bool dump_raw_tape(std::ostream &os) const;
|
||||
|
||||
// this should be called when parsing (right before writing the tapes)
|
||||
void init_stage2();
|
||||
|
||||
really_inline error_code on_error(error_code new_error_code) {
|
||||
error = new_error_code;
|
||||
return new_error_code;
|
||||
}
|
||||
really_inline error_code on_success(error_code success_code) {
|
||||
error = success_code;
|
||||
valid = true;
|
||||
return success_code;
|
||||
}
|
||||
really_inline bool on_start_document(uint32_t depth) {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, 'r');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_start_object(uint32_t depth) {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, '{');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_start_array(uint32_t depth) {
|
||||
containing_scope_offset[depth] = current_loc;
|
||||
write_tape(0, '[');
|
||||
return true;
|
||||
}
|
||||
// TODO we're not checking this bool
|
||||
really_inline bool on_end_document(uint32_t depth) {
|
||||
// write our doc.tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
write_tape(containing_scope_offset[depth], 'r');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_end_object(uint32_t depth) {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope_offset[depth], '}');
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_end_array(uint32_t depth) {
|
||||
// write our doc.tape location to the header scope
|
||||
write_tape(containing_scope_offset[depth], ']');
|
||||
annotate_previous_loc(containing_scope_offset[depth], current_loc);
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool on_true_atom() {
|
||||
write_tape(0, 't');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_false_atom() {
|
||||
write_tape(0, 'f');
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_null_atom() {
|
||||
write_tape(0, 'n');
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline uint8_t *on_start_string() {
|
||||
/* we advance the point, accounting for the fact that we have a NULL
|
||||
* termination */
|
||||
write_tape(current_string_buf_loc - doc.string_buf.get(), '"');
|
||||
return current_string_buf_loc + sizeof(uint32_t);
|
||||
}
|
||||
|
||||
really_inline bool on_end_string(uint8_t *dst) {
|
||||
uint32_t str_length = dst - (current_string_buf_loc + sizeof(uint32_t));
|
||||
// TODO check for overflow in case someone has a crazy string (>=4GB?)
|
||||
// But only add the overflow check when the document itself exceeds 4GB
|
||||
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
|
||||
memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
|
||||
// NULL termination is still handy if you expect all your strings to
|
||||
// be NULL terminated? It comes at a small cost
|
||||
*dst = 0;
|
||||
current_string_buf_loc = dst + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool on_number_s64(int64_t value) {
|
||||
write_tape(0, 'l');
|
||||
std::memcpy(&doc.tape[current_loc], &value, sizeof(value));
|
||||
++current_loc;
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_number_u64(uint64_t value) {
|
||||
write_tape(0, 'u');
|
||||
doc.tape[current_loc++] = value;
|
||||
return true;
|
||||
}
|
||||
really_inline bool on_number_double(double value) {
|
||||
write_tape(0, 'd');
|
||||
static_assert(sizeof(value) == sizeof(doc.tape[current_loc]), "mismatch size");
|
||||
memcpy(&doc.tape[current_loc++], &value, sizeof(double));
|
||||
// doc.tape[doc.current_loc++] = *((uint64_t *)&d);
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// Called before a parse is initiated.
|
||||
//
|
||||
// - Returns CAPACITY if the document is too large
|
||||
// - Returns MEMALLOC if we needed to allocate memory and could not
|
||||
//
|
||||
WARN_UNUSED error_code init_parse(size_t len);
|
||||
|
||||
const document &get_document() const {
|
||||
if (!is_valid()) {
|
||||
throw invalid_json(error);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
private:
|
||||
//
|
||||
// The maximum document length this parser supports.
|
||||
//
|
||||
// Buffers are large enough to handle any document up to this length.
|
||||
//
|
||||
size_t _capacity{0};
|
||||
|
||||
//
|
||||
// The maximum depth (number of nested objects and arrays) supported by this parser.
|
||||
//
|
||||
// Defaults to DEFAULT_MAX_DEPTH.
|
||||
//
|
||||
size_t _max_depth{0};
|
||||
|
||||
// all nodes are stored on the doc.tape using a 64-bit word.
|
||||
//
|
||||
// strings, double and ints are stored as
|
||||
// a 64-bit word with a pointer to the actual value
|
||||
//
|
||||
//
|
||||
//
|
||||
// for objects or arrays, store [ or { at the beginning and } and ] at the
|
||||
// end. For the openings ([ or {), we annotate them with a reference to the
|
||||
// location on the doc.tape of the end, and for then closings (} and ]), we
|
||||
// annotate them with a reference to the location of the opening
|
||||
//
|
||||
//
|
||||
|
||||
// this should be considered a private function
|
||||
really_inline void write_tape(uint64_t val, uint8_t c) {
|
||||
doc.tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
|
||||
}
|
||||
|
||||
really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
|
||||
doc.tape[saved_loc] |= val;
|
||||
}
|
||||
|
||||
//
|
||||
// Set the current capacity: the largest document this parser can support without reallocating.
|
||||
//
|
||||
// This will allocate *or deallocate* as necessary.
|
||||
//
|
||||
// Returns false if allocation fails.
|
||||
//
|
||||
WARN_UNUSED bool set_capacity(size_t capacity);
|
||||
|
||||
//
|
||||
// Set the maximum level of nested object and arrays supported by this parser.
|
||||
//
|
||||
// This will allocate *or deallocate* as necessary.
|
||||
//
|
||||
// Returns false if allocation fails.
|
||||
//
|
||||
WARN_UNUSED bool set_max_depth(size_t max_depth);
|
||||
};
|
||||
|
||||
//
|
||||
// C API (json_parse and build_parsed_json) declarations
|
||||
//
|
||||
|
||||
// Parse a document found in buf.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// You can also check validity by calling parser.is_valid(). The same document::parser can
|
||||
// be reused for other documents.
|
||||
//
|
||||
// If realloc_if_needed is true (default) then a temporary buffer is created
|
||||
// when needed during processing (a copy of the input string is made). The input
|
||||
// buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage). The document::parser object can be reused.
|
||||
|
||||
int json_parse(const uint8_t *buf, size_t len, document::parser &parser,
|
||||
bool realloc_if_needed = true);
|
||||
|
||||
// Parse a document found in buf.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// You can also check validity
|
||||
// by calling parser.is_valid(). The same document::parser can be reused for other
|
||||
// documents.
|
||||
//
|
||||
// If realloc_if_needed is true (default) then a temporary buffer is created
|
||||
// when needed during processing (a copy of the input string is made). The input
|
||||
// buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage). The document::parser object can be reused.
|
||||
int json_parse(const char *buf, size_t len, document::parser &parser,
|
||||
bool realloc_if_needed = true);
|
||||
|
||||
// We do not want to allow implicit conversion from C string to std::string.
|
||||
int json_parse(const char *buf, document::parser &parser) = delete;
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
inline int json_parse(const std::string &s, document::parser &parser) {
|
||||
return json_parse(s.data(), s.length(), parser, true);
|
||||
}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// You can also check validity
|
||||
// by calling parser.is_valid(). The same document::parser can be reused for other
|
||||
// documents.
|
||||
inline int json_parse(const padded_string &s, document::parser &parser) {
|
||||
return json_parse(s.data(), s.length(), parser, false);
|
||||
}
|
||||
|
||||
// Build a document::parser object. You can check validity
|
||||
// by calling parser.is_valid(). This does the memory allocation needed for
|
||||
// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
|
||||
// created when needed during processing (a copy of the input string is made).
|
||||
//
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage).
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
document::parser build_parsed_json(const uint8_t *buf, size_t len,
|
||||
bool realloc_if_needed = true);
|
||||
|
||||
WARN_UNUSED
|
||||
// Build a document::parser object. You can check validity
|
||||
// by calling parser.is_valid(). This does the memory allocation needed for
|
||||
// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
|
||||
// created when needed during processing (a copy of the input string is made).
|
||||
//
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage).
|
||||
//
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
inline document::parser build_parsed_json(const char *buf, size_t len,
|
||||
bool realloc_if_needed = true) {
|
||||
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len,
|
||||
realloc_if_needed);
|
||||
}
|
||||
|
||||
// We do not want to allow implicit conversion from C string to std::string.
|
||||
document::parser build_parsed_json(const char *buf) = delete;
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
|
||||
// success. You can also check validity by calling parser.is_valid(). The same
|
||||
// document::parser can be reused for other documents.
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
inline document::parser build_parsed_json(const std::string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), true);
|
||||
}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
|
||||
// success. You can also check validity by calling parser.is_valid(). The same
|
||||
// document::parser can be reused for other documents.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
inline document::parser build_parsed_json(const padded_string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Stage 1 implementation declarations
|
||||
//
|
||||
|
||||
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
|
||||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
||||
// A function like find_last_json_buf_idx may also prove useful.
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser, bool streaming);
|
||||
|
||||
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
|
||||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
||||
// A function like find_last_json_buf_idx may also prove useful.
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const char *buf, size_t len, document::parser &parser, bool streaming) {
|
||||
return find_structural_bits<T>((const uint8_t *)buf, len, parser, streaming);
|
||||
}
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser) {
|
||||
return find_structural_bits<T>(buf, len, parser, false);
|
||||
}
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const char *buf, size_t len, document::parser &parser) {
|
||||
return find_structural_bits<T>((const uint8_t *)buf, len, parser);
|
||||
}
|
||||
|
||||
//
|
||||
// Stage 2 implementation declarations
|
||||
//
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
WARN_UNUSED int
|
||||
unified_machine(const uint8_t *buf, size_t len, document::parser &parser);
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
WARN_UNUSED int
|
||||
unified_machine(const char *buf, size_t len, document::parser &parser) {
|
||||
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser);
|
||||
}
|
||||
|
||||
|
||||
// Streaming
|
||||
template <architecture T = architecture::NATIVE>
|
||||
WARN_UNUSED int
|
||||
unified_machine(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json);
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int unified_machine(const char *buf, size_t len, document::parser &parser, size_t &next_json) {
|
||||
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser, next_json);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
//
|
||||
// Inline implementation
|
||||
//
|
||||
|
||||
#include "simdjson/document_parser.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
inline document::doc_ref_result document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
|
||||
auto code = (error_code)json_parse(buf, len, *this, realloc_if_needed);
|
||||
valid = false;
|
||||
error = UNINITIALIZED;
|
||||
return document::doc_ref_result(doc, code);
|
||||
}
|
||||
really_inline document::doc_ref_result document::parser::parse(const char *buf, size_t len, bool realloc_if_needed) {
|
||||
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
||||
}
|
||||
really_inline document::doc_ref_result document::parser::parse(const std::string &s, bool realloc_if_needed) {
|
||||
return parse(s.data(), s.length(), realloc_if_needed);
|
||||
}
|
||||
really_inline document::doc_ref_result document::parser::parse(const padded_string &s) {
|
||||
return parse(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
inline document::doc_result document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
|
||||
document::parser parser;
|
||||
if (!parser.allocate_capacity(len)) {
|
||||
return MEMALLOC;
|
||||
}
|
||||
auto [doc, error] = parser.parse(buf, len, realloc_if_needed);
|
||||
return document::doc_result((document &&)doc, error);
|
||||
}
|
||||
inline document::doc_result document::parse(const char *buf, size_t len, bool realloc_if_needed) {
|
||||
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
||||
}
|
||||
inline document::doc_result document::parse(const std::string &s, bool realloc_if_needed) {
|
||||
return parse(s.data(), s.length(), realloc_if_needed);
|
||||
}
|
||||
inline document::doc_result document::parse(const padded_string &s) {
|
||||
return parse(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
// json_parse_implementation is the generic function, it is specialized for
|
||||
// various architectures, e.g., as
|
||||
// json_parse_implementation<architecture::HASWELL> or
|
||||
// json_parse_implementation<architecture::ARM64>
|
||||
template <architecture T>
|
||||
int json_parse_implementation(const uint8_t *buf, size_t len, document::parser &parser,
|
||||
bool realloc_if_needed = true) {
|
||||
int result = parser.init_parse(len);
|
||||
if (result != SUCCESS) { return result; }
|
||||
bool reallocated = false;
|
||||
if (realloc_if_needed) {
|
||||
const uint8_t *tmp_buf = buf;
|
||||
buf = (uint8_t *)allocate_padded_buffer(len);
|
||||
if (buf == NULL)
|
||||
return simdjson::MEMALLOC;
|
||||
memcpy((void *)buf, tmp_buf, len);
|
||||
reallocated = true;
|
||||
}
|
||||
int stage1_err = simdjson::find_structural_bits<T>(buf, len, parser);
|
||||
if (stage1_err != simdjson::SUCCESS) {
|
||||
if (reallocated) { // must free before we exit
|
||||
aligned_free((void *)buf);
|
||||
}
|
||||
return stage1_err;
|
||||
}
|
||||
int res = unified_machine<T>(buf, len, parser);
|
||||
if (reallocated) {
|
||||
aligned_free((void *)buf);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_DOCUMENT_PARSER_H
|
|
@ -1,219 +1,8 @@
|
|||
#ifndef SIMDJSON_JSONPARSER_H
|
||||
#define SIMDJSON_JSONPARSER_H
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/padded_string.h"
|
||||
|
||||
#include "simdjson/document.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
#include <string>
|
||||
#include "simdjson/jsonioutil.h"
|
||||
|
||||
namespace simdjson {
|
||||
// json_parse_implementation is the generic function, it is specialized for
|
||||
// various architectures, e.g., as
|
||||
// json_parse_implementation<architecture::HASWELL> or
|
||||
// json_parse_implementation<architecture::ARM64>
|
||||
template <architecture T>
|
||||
int json_parse_implementation(const uint8_t *buf, size_t len, document::parser &parser,
|
||||
bool realloc_if_needed = true) {
|
||||
int result = parser.init_parse(len);
|
||||
if (result != SUCCESS) { return result; }
|
||||
bool reallocated = false;
|
||||
if (realloc_if_needed) {
|
||||
const uint8_t *tmp_buf = buf;
|
||||
buf = (uint8_t *)allocate_padded_buffer(len);
|
||||
if (buf == NULL)
|
||||
return simdjson::MEMALLOC;
|
||||
memcpy((void *)buf, tmp_buf, len);
|
||||
reallocated = true;
|
||||
}
|
||||
int stage1_err = simdjson::find_structural_bits<T>(buf, len, parser);
|
||||
if (stage1_err != simdjson::SUCCESS) {
|
||||
if (reallocated) { // must free before we exit
|
||||
aligned_free((void *)buf);
|
||||
}
|
||||
return stage1_err;
|
||||
}
|
||||
int res = unified_machine<T>(buf, len, parser);
|
||||
if (reallocated) {
|
||||
aligned_free((void *)buf);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// Parse a document found in buf.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// You can also check validity by calling parser.is_valid(). The same document::parser can
|
||||
// be reused for other documents.
|
||||
//
|
||||
// If realloc_if_needed is true (default) then a temporary buffer is created
|
||||
// when needed during processing (a copy of the input string is made). The input
|
||||
// buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage). The document::parser object can be reused.
|
||||
|
||||
int json_parse(const uint8_t *buf, size_t len, document::parser &parser,
|
||||
bool realloc_if_needed = true);
|
||||
|
||||
// Parse a document found in buf.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// You can also check validity
|
||||
// by calling parser.is_valid(). The same document::parser can be reused for other
|
||||
// documents.
|
||||
//
|
||||
// If realloc_if_needed is true (default) then a temporary buffer is created
|
||||
// when needed during processing (a copy of the input string is made). The input
|
||||
// buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage). The document::parser object can be reused.
|
||||
int json_parse(const char *buf, size_t len, document::parser &parser,
|
||||
bool realloc_if_needed = true);
|
||||
|
||||
// We do not want to allow implicit conversion from C string to std::string.
|
||||
int json_parse(const char *buf, document::parser &parser) = delete;
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
inline int json_parse(const std::string &s, document::parser &parser) {
|
||||
return json_parse(s.data(), s.length(), parser, true);
|
||||
}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)).
|
||||
//
|
||||
// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
|
||||
// or an error code from simdjson/simdjson.h in case of failure such as
|
||||
// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
||||
// the simdjson::error_message function converts these error codes into a
|
||||
// string).
|
||||
//
|
||||
// You can also check validity
|
||||
// by calling parser.is_valid(). The same document::parser can be reused for other
|
||||
// documents.
|
||||
inline int json_parse(const padded_string &s, document::parser &parser) {
|
||||
return json_parse(s.data(), s.length(), parser, false);
|
||||
}
|
||||
|
||||
// Build a document::parser object. You can check validity
|
||||
// by calling parser.is_valid(). This does the memory allocation needed for
|
||||
// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
|
||||
// created when needed during processing (a copy of the input string is made).
|
||||
//
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage).
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
document::parser build_parsed_json(const uint8_t *buf, size_t len,
|
||||
bool realloc_if_needed = true);
|
||||
|
||||
WARN_UNUSED
|
||||
// Build a document::parser object. You can check validity
|
||||
// by calling parser.is_valid(). This does the memory allocation needed for
|
||||
// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
|
||||
// created when needed during processing (a copy of the input string is made).
|
||||
//
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if
|
||||
// realloc_if_needed is false, all bytes at and after buf + len are ignored
|
||||
// (can be garbage).
|
||||
//
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
inline document::parser build_parsed_json(const char *buf, size_t len,
|
||||
bool realloc_if_needed = true) {
|
||||
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len,
|
||||
realloc_if_needed);
|
||||
}
|
||||
|
||||
// We do not want to allow implicit conversion from C string to std::string.
|
||||
document::parser build_parsed_json(const char *buf) = delete;
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
|
||||
// success. You can also check validity by calling parser.is_valid(). The same
|
||||
// document::parser can be reused for other documents.
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
inline document::parser build_parsed_json(const std::string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), true);
|
||||
}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate document::parser with a capacity of len (e.g.,
|
||||
// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
|
||||
// success. You can also check validity by calling parser.is_valid(). The same
|
||||
// document::parser can be reused for other documents.
|
||||
//
|
||||
// The content should be a valid JSON document encoded as UTF-8. If there is a
|
||||
// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
||||
// discouraged.
|
||||
//
|
||||
// This is a convenience function which calls json_parse.
|
||||
WARN_UNUSED
|
||||
inline document::parser build_parsed_json(const padded_string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#ifndef SIMDJSON_PARSEDJSONITERATOR_H
|
||||
#define SIMDJSON_PARSEDJSONITERATOR_H
|
||||
|
||||
#include "document/iterator.h"
|
||||
#include "document_iterator.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,39 +1,6 @@
|
|||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
|
||||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
||||
// A function like find_last_json_buf_idx may also prove useful.
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser, bool streaming);
|
||||
|
||||
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
|
||||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
||||
// A function like find_last_json_buf_idx may also prove useful.
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const char *buf, size_t len, document::parser &parser, bool streaming) {
|
||||
return find_structural_bits<T>((const uint8_t *)buf, len, parser, streaming);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser) {
|
||||
return find_structural_bits<T>(buf, len, parser, false);
|
||||
}
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int find_structural_bits(const char *buf, size_t len, document::parser &parser) {
|
||||
return find_structural_bits<T>((const uint8_t *)buf, len, parser);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
||||
#include "simdjson/document.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,35 +1,6 @@
|
|||
#ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||
#define SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
WARN_UNUSED int
|
||||
unified_machine(const uint8_t *buf, size_t len, document::parser &parser);
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
WARN_UNUSED int
|
||||
unified_machine(const char *buf, size_t len, document::parser &parser) {
|
||||
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Streaming
|
||||
template <architecture T = architecture::NATIVE>
|
||||
WARN_UNUSED int
|
||||
unified_machine(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json);
|
||||
|
||||
template <architecture T = architecture::NATIVE>
|
||||
int unified_machine(const char *buf, size_t len, document::parser &parser, size_t &next_json) {
|
||||
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser, next_json);
|
||||
}
|
||||
|
||||
|
||||
} // namespace simdjson
|
||||
#include "simdjson/document.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -28,7 +28,7 @@ set(SIMDJSON_SRC
|
|||
stage1_find_marks.cpp
|
||||
stage2_build_tape.cpp
|
||||
document.cpp
|
||||
document/parser.cpp
|
||||
document_parser.cpp
|
||||
error.cpp
|
||||
)
|
||||
|
||||
|
|
|
@ -3,47 +3,6 @@
|
|||
|
||||
namespace simdjson {
|
||||
|
||||
// This is the internal one all others end up calling
|
||||
error_code document::parser::try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
|
||||
return (error_code)json_parse(buf, len, *this, realloc_if_needed);
|
||||
}
|
||||
|
||||
error_code document::parser::try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed) noexcept {
|
||||
auto result = try_parse(buf, len, realloc_if_needed);
|
||||
dst = result == SUCCESS ? &doc : nullptr;
|
||||
return result;
|
||||
}
|
||||
|
||||
error_code document::parser::try_parse_into(const uint8_t *buf, size_t len, document & dst, bool realloc_if_needed) noexcept {
|
||||
auto result = try_parse(buf, len, realloc_if_needed);
|
||||
if (result != SUCCESS) {
|
||||
return result;
|
||||
}
|
||||
// Take the document
|
||||
dst = (document&&)doc;
|
||||
valid = false; // Document has been taken; there is no valid document anymore
|
||||
error = UNINITIALIZED;
|
||||
return result;
|
||||
}
|
||||
|
||||
const document &document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
|
||||
const document *dst;
|
||||
error_code result = try_parse(buf, len, dst, realloc_if_needed);
|
||||
if (result) {
|
||||
throw invalid_json(result);
|
||||
}
|
||||
return *dst;
|
||||
}
|
||||
|
||||
document document::parser::parse_new(const uint8_t *buf, size_t len, bool realloc_if_needed) {
|
||||
document dst;
|
||||
error_code result = try_parse_into(buf, len, dst, realloc_if_needed);
|
||||
if (result) {
|
||||
throw invalid_json(result);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
error_code document::parser::init_parse(size_t len) {
|
||||
if (len > capacity()) {
|
|
@ -11,17 +11,17 @@ namespace simdjson {
|
|||
// instruction sets.
|
||||
|
||||
// function pointer type for json_parse
|
||||
using json_parse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc);
|
||||
using json_parse_functype = int(const uint8_t *buf, size_t len, document::parser &pj, bool realloc);
|
||||
|
||||
// Pointer that holds the json_parse implementation corresponding to the
|
||||
// available SIMD instruction set
|
||||
extern std::atomic<json_parse_functype *> json_parse_ptr;
|
||||
|
||||
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) {
|
||||
int json_parse(const uint8_t *buf, size_t len, document::parser &pj, bool realloc) {
|
||||
return json_parse_ptr.load(std::memory_order_relaxed)(buf, len, pj, realloc);
|
||||
}
|
||||
|
||||
int json_parse(const char *buf, size_t len, ParsedJson &pj, bool realloc) {
|
||||
int json_parse(const char *buf, size_t len, document::parser &pj, bool realloc) {
|
||||
return json_parse_ptr.load(std::memory_order_relaxed)(reinterpret_cast<const uint8_t *>(buf), len, pj,
|
||||
realloc);
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ architecture parse_architecture(char *arch) {
|
|||
}
|
||||
|
||||
// Responsible to select the best json_parse implementation
|
||||
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) {
|
||||
int json_parse_dispatch(const uint8_t *buf, size_t len, document::parser &pj, bool realloc) {
|
||||
architecture best_implementation = find_best_supported_architecture();
|
||||
// Selecting the best implementation
|
||||
switch (best_implementation) {
|
||||
|
@ -81,12 +81,12 @@ int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool rea
|
|||
std::atomic<json_parse_functype *> json_parse_ptr{&json_parse_dispatch};
|
||||
|
||||
WARN_UNUSED
|
||||
ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool realloc) {
|
||||
ParsedJson pj;
|
||||
bool ok = pj.allocate_capacity(len);
|
||||
document::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc) {
|
||||
document::parser parser;
|
||||
bool ok = parser.allocate_capacity(len);
|
||||
if (ok) {
|
||||
json_parse(buf, len, pj, realloc);
|
||||
json_parse(buf, len, parser, realloc);
|
||||
}
|
||||
return pj;
|
||||
return parser;
|
||||
}
|
||||
} // namespace simdjson
|
||||
|
|
Loading…
Reference in New Issue