Merge pull request #647 from simdjson/jkeiser/doxygen-update
Deprecate more, show less private things in doxygen
This commit is contained in:
commit
56bc8a778d
6
Doxyfile
6
Doxyfile
|
@ -582,7 +582,7 @@ HIDE_COMPOUND_REFERENCE= NO
|
|||
# the files that are included by a file in the documentation of that file.
|
||||
# The default value is: YES.
|
||||
|
||||
SHOW_INCLUDE_FILES = YES
|
||||
SHOW_INCLUDE_FILES = NO
|
||||
|
||||
# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
|
||||
# grouped member an include statement to the documentation, telling the reader
|
||||
|
@ -1045,7 +1045,7 @@ SOURCE_BROWSER = YES
|
|||
# classes and enums directly into the documentation.
|
||||
# The default value is: NO.
|
||||
|
||||
INLINE_SOURCES = YES
|
||||
INLINE_SOURCES = NO
|
||||
|
||||
# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
|
||||
# special comment blocks from generated source code fragments. Normal C, C++ and
|
||||
|
@ -2199,7 +2199,7 @@ INCLUDE_FILE_PATTERNS =
|
|||
# recursively expanded use the := operator instead of the = operator.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
PREDEFINED =
|
||||
PREDEFINED = SIMDJSON_EXCEPTIONS=1
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
|
||||
# tag can be used to specify a list of macro names that should be expanded. The
|
||||
|
|
4
Makefile
4
Makefile
|
@ -297,5 +297,5 @@ clean:
|
|||
cleandist:
|
||||
rm -f submodules $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES) $(SUPPLEMENTARYEXECUTABLES)
|
||||
|
||||
doc/api: $(HEADERS)
|
||||
doxygen
|
||||
doc/api: Doxyfile $(HEADERS)
|
||||
doxygen
|
||||
|
|
|
@ -256,7 +256,6 @@ static void print_json(State& state) noexcept {
|
|||
// Prints the number of results in twitter.json
|
||||
padded_string json = get_corpus(JSON_TEST_PATH);
|
||||
dom::parser parser;
|
||||
if (!parser.allocate_capacity(json.length())) { cerr << "allocation failed" << endl; return; }
|
||||
if (int error = json_parse(json, parser); error != SUCCESS) { cerr << error_message(error) << endl; return; }
|
||||
for (auto _ : state) {
|
||||
std::stringstream s;
|
||||
|
|
|
@ -9,10 +9,10 @@ const padded_string EMPTY_ARRAY("[]", 2);
|
|||
SIMDJSON_PUSH_DISABLE_WARNINGS
|
||||
SIMDJSON_DISABLE_DEPRECATED_WARNING
|
||||
static void json_parse(State& state) {
|
||||
dom::parser parser;
|
||||
if (parser.set_capacity(EMPTY_ARRAY.length())) { return; }
|
||||
ParsedJson pj;
|
||||
if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; }
|
||||
for (auto _ : state) {
|
||||
auto error = simdjson::json_parse(EMPTY_ARRAY, parser);
|
||||
auto error = json_parse(EMPTY_ARRAY, pj);
|
||||
if (error) { return; }
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ SIMDJSON_POP_DISABLE_WARNINGS
|
|||
BENCHMARK(json_parse);
|
||||
static void parser_parse_error_code(State& state) {
|
||||
dom::parser parser;
|
||||
if (parser.set_capacity(EMPTY_ARRAY.length())) { return; }
|
||||
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
|
||||
for (auto _ : state) {
|
||||
auto [doc, error] = parser.parse(EMPTY_ARRAY);
|
||||
if (error) { return; }
|
||||
|
@ -29,7 +29,7 @@ static void parser_parse_error_code(State& state) {
|
|||
BENCHMARK(parser_parse_error_code);
|
||||
static void parser_parse_exception(State& state) {
|
||||
dom::parser parser;
|
||||
if (parser.set_capacity(EMPTY_ARRAY.length())) { return; }
|
||||
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
|
||||
for (auto _ : state) {
|
||||
try {
|
||||
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
|
||||
|
|
|
@ -294,7 +294,10 @@ struct benchmarker {
|
|||
// Allocate dom::parser
|
||||
collector.start();
|
||||
dom::parser parser;
|
||||
bool alloc_ok = parser.allocate_capacity(json.size());
|
||||
error_code error = parser.allocate(json.size());
|
||||
if (error) {
|
||||
exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result: " + error_message(error));
|
||||
}
|
||||
event_count allocate_count = collector.end();
|
||||
allocate_stage << allocate_count;
|
||||
// Run it once to get hot buffers
|
||||
|
@ -305,14 +308,11 @@ struct benchmarker {
|
|||
}
|
||||
}
|
||||
|
||||
if (!alloc_ok) {
|
||||
exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result.");
|
||||
}
|
||||
verbose() << "[verbose] allocated memory for parsed JSON " << endl;
|
||||
|
||||
// Stage 1 (find structurals)
|
||||
collector.start();
|
||||
error_code error = active_implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
|
||||
error = active_implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
|
||||
event_count stage1_count = collector.end();
|
||||
stage1 << stage1_count;
|
||||
if (error) {
|
||||
|
|
|
@ -56,25 +56,29 @@ public:
|
|||
temp_result_vec.resize(num_events * 2 + 1);
|
||||
}
|
||||
|
||||
~LinuxEvents() { close(fd); }
|
||||
~LinuxEvents() { if (fd != -1) { close(fd); } }
|
||||
|
||||
inline void start() {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
||||
}
|
||||
if (fd != -1) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
||||
}
|
||||
|
||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void end(std::vector<unsigned long long> &results) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
||||
}
|
||||
if (fd != -1) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
||||
}
|
||||
|
||||
if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
|
||||
report_error("read");
|
||||
if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
|
||||
report_error("read");
|
||||
}
|
||||
}
|
||||
// our actual results are in slots 1,3,5, ... of this structure
|
||||
// we really should be checking our ids obtained earlier to be safe
|
||||
|
|
|
@ -36,7 +36,7 @@ int main (int argc, char *argv[]){
|
|||
for (auto i = 0; i < 3; i++) {
|
||||
//Actual test
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::error_code alloc_error = parser.set_capacity(p.size());
|
||||
simdjson::error_code alloc_error = parser.allocate(p.size());
|
||||
if (alloc_error) {
|
||||
std::cerr << alloc_error << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
|
|
@ -161,9 +161,9 @@ int main(int argc, char *argv[]) {
|
|||
#ifdef __linux__
|
||||
simdjson::dom::parser parser;
|
||||
const simdjson::implementation &stage_parser = *simdjson::active_implementation;
|
||||
bool allocok = parser.allocate_capacity(p.size());
|
||||
if (!allocok) {
|
||||
std::cerr << "failed to allocate memory" << std::endl;
|
||||
simdjson::error_code alloc_error = parser.allocate(p.size());
|
||||
if (alloc_error) {
|
||||
std::cerr << alloc_error << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
const uint32_t iterations = p.size() < 1 * 1000 * 1000 ? 1000 : 50;
|
||||
|
|
|
@ -77,7 +77,7 @@ without bound:
|
|||
|
||||
```c++
|
||||
dom::parser parser(0); // This parser will refuse to automatically grow capacity
|
||||
simdjson::error_code allocate_error = parser.set_capacity(1024*1024); // This allocates enough capacity to handle documents <= 1MB
|
||||
simdjson::error_code allocate_error = parser.allocate(1024*1024); // This allocates enough capacity to handle documents <= 1MB
|
||||
if (allocate_error) { cerr << allocate_error << endl; exit(1); }
|
||||
|
||||
for (web_request request : listen()) {
|
||||
|
|
|
@ -86,7 +86,7 @@ class tape_ref;
|
|||
namespace simdjson::dom {
|
||||
|
||||
/**
|
||||
* Represents a JSON array.
|
||||
* JSON array.
|
||||
*/
|
||||
class array : protected internal::tape_ref {
|
||||
public:
|
||||
|
@ -162,7 +162,7 @@ private:
|
|||
};
|
||||
|
||||
/**
|
||||
* Represents a JSON object.
|
||||
* JSON object.
|
||||
*/
|
||||
class object : protected internal::tape_ref {
|
||||
public:
|
||||
|
@ -340,7 +340,9 @@ public:
|
|||
*/
|
||||
bool dump_raw_tape(std::ostream &os) const noexcept;
|
||||
|
||||
/** @private Structural values. */
|
||||
std::unique_ptr<uint64_t[]> tape;
|
||||
|
||||
/** @private String values.
|
||||
*
|
||||
* Should be at least byte_capacity.
|
||||
|
@ -348,7 +350,7 @@ public:
|
|||
std::unique_ptr<uint8_t[]> string_buf;
|
||||
|
||||
private:
|
||||
inline error_code set_capacity(size_t len) noexcept;
|
||||
inline error_code allocate(size_t len) noexcept;
|
||||
template<typename T>
|
||||
friend class simdjson::minify;
|
||||
friend class parser;
|
||||
|
@ -607,15 +609,10 @@ public:
|
|||
* @param max_capacity The maximum document length the parser can automatically handle. The parser
|
||||
* will allocate more capacity on an as needed basis (when it sees documents too big to handle)
|
||||
* up to this amount. The parser still starts with zero capacity no matter what this number is:
|
||||
* to allocate an initial capacity, call set_capacity() after constructing the parser. Defaults
|
||||
* to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
|
||||
* @param max_depth The maximum depth--number of nested objects and arrays--this parser can handle.
|
||||
* This will not be allocated until parse() is called for the first time. Defaults to
|
||||
* DEFAULT_MAX_DEPTH.
|
||||
* to allocate an initial capacity, call allocate() after constructing the parser.
|
||||
* Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
|
||||
*/
|
||||
really_inline parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
|
||||
/** Deallocate the JSON parser. */
|
||||
~parser()=default;
|
||||
really_inline parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
|
||||
|
||||
/**
|
||||
* Take another parser's buffers and state.
|
||||
|
@ -632,6 +629,9 @@ public:
|
|||
parser &operator=(parser &&other) = default;
|
||||
parser &operator=(const parser &) = delete; ///< @private Disallow copying
|
||||
|
||||
/** Deallocate the JSON parser. */
|
||||
~parser()=default;
|
||||
|
||||
/**
|
||||
* Load a JSON document from a file and return a reference to it.
|
||||
*
|
||||
|
@ -658,11 +658,57 @@ public:
|
|||
*/
|
||||
inline simdjson_result<element> load(const std::string &path) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a JSON document and return a temporary reference to it.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* element doc = parser.parse(buf, len);
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
* documents because it reuses the same buffers, but you *must* use the document before you
|
||||
* destroy the parser or call parse() again.
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
|
||||
* and it is copied into an enlarged temporary buffer before parsing.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than len, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
|
||||
* realloc_if_needed is true.
|
||||
* @param len The length of the JSON.
|
||||
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
|
||||
* @return The document, or an error:
|
||||
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
|
||||
* and memory allocation fails.
|
||||
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
||||
* - other json errors if parsing fails.
|
||||
*/
|
||||
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
||||
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
|
||||
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
||||
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
|
||||
really_inline simdjson_result<element> parse(const std::string &s) noexcept;
|
||||
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
|
||||
really_inline simdjson_result<element> parse(const padded_string &s) noexcept;
|
||||
|
||||
/** @private We do not want to allow implicit conversion from C string to std::string. */
|
||||
really_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
|
||||
|
||||
/**
|
||||
* Load a file containing many JSON documents.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* for (const element doc : parser.parse_many(path)) {
|
||||
* for (const element doc : parser.load_many(path)) {
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
|
@ -715,143 +761,6 @@ public:
|
|||
*/
|
||||
inline document_stream load_many(const std::string &path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a JSON document and return a temporary reference to it.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* element doc = parser.parse(buf, len);
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
* documents because it reuses the same buffers, but you *must* use the document before you
|
||||
* destroy the parser or call parse() again.
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
|
||||
* and it is copied into an enlarged temporary buffer before parsing.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than len, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
|
||||
* realloc_if_needed is true.
|
||||
* @param len The length of the JSON.
|
||||
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
|
||||
* @return The document, or an error:
|
||||
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
|
||||
* and memory allocation fails.
|
||||
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
||||
* - other json errors if parsing fails.
|
||||
*/
|
||||
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a JSON document and return a temporary reference to it.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* const element doc = parser.parse(buf, len);
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
* documents because it reuses the same buffers, but you *must* use the document before you
|
||||
* destroy the parser or call parse() again.
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
|
||||
* and it is copied into an enlarged temporary buffer before parsing.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than len, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
|
||||
* realloc_if_needed is true.
|
||||
* @param len The length of the JSON.
|
||||
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
|
||||
* @return The document, or an error:
|
||||
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
|
||||
* and memory allocation fails.
|
||||
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
||||
* - other json errors if parsing fails.
|
||||
*/
|
||||
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a JSON document and return a temporary reference to it.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* const element doc = parser.parse(s);
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
* documents because it reuses the same buffers, but you *must* use the document before you
|
||||
* destroy the parser or call parse() again.
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* If s.capacity() is less than SIMDJSON_PADDING, the string will be copied into an enlarged
|
||||
* temporary buffer before parsing.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than len, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param s The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, or
|
||||
* a new string will be created with the extra padding.
|
||||
* @return The document, or an error:
|
||||
* - MEMALLOC if the string does not have enough padding or the parser does not have
|
||||
* enough capacity, and memory allocation fails.
|
||||
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
||||
* - other json errors if parsing fails.
|
||||
*/
|
||||
really_inline simdjson_result<element> parse(const std::string &s) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a JSON document and return a temporary reference to it.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* const element doc = parser.parse(s);
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
* documents because it reuses the same buffers, but you *must* use the document before you
|
||||
* destroy the parser or call parse() again.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param s The JSON to parse.
|
||||
* @return The document, or an error:
|
||||
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
|
||||
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
||||
* - other json errors if parsing fails.
|
||||
*/
|
||||
really_inline simdjson_result<element> parse(const padded_string &s) noexcept;
|
||||
|
||||
// We do not want to allow implicit conversion from C string to std::string.
|
||||
really_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
|
||||
|
||||
/**
|
||||
* Parse a buffer containing many JSON documents.
|
||||
*
|
||||
|
@ -913,189 +822,47 @@ public:
|
|||
* - other json errors if parsing fails.
|
||||
*/
|
||||
inline document_stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a buffer containing many JSON documents.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* for (const element doc : parser.parse_many(buf, len)) {
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
* ### Format
|
||||
*
|
||||
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
||||
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
||||
* then starts parsing the next document at that point. (It does this with more parallelism and
|
||||
* lookahead than you might think, though.)
|
||||
*
|
||||
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
||||
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
||||
* arrays or objects) MUST be separated with whitespace.
|
||||
*
|
||||
* ### Error Handling
|
||||
*
|
||||
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
||||
* it will be yielded as the first result. Iteration always stops after the first error.
|
||||
*
|
||||
* As with all other simdjson methods, non-exception error handling is readily available through
|
||||
* the same interface, requiring you to check the error before using the document:
|
||||
*
|
||||
* dom::parser parser;
|
||||
* for (auto [doc, error] : parser.parse_many(buf, len)) {
|
||||
* if (error) { cerr << error << endl; exit(1); }
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* ### Threads
|
||||
*
|
||||
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
||||
* hood to do some lookahead.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
|
||||
* @param len The length of the concatenated JSON.
|
||||
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
||||
* spot is cache-related: small enough to fit in cache, yet big enough to
|
||||
* parse as many documents as possible in one tight loop.
|
||||
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
||||
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
||||
* will yield 0 documents rather than an EMPTY error. Errors:
|
||||
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
|
||||
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
||||
* - other json errors if parsing fails
|
||||
*/
|
||||
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
|
||||
inline document_stream parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a buffer containing many JSON documents.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* for (const element doc : parser.parse_many(buf, len)) {
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
* ### Format
|
||||
*
|
||||
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
||||
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
||||
* then starts parsing the next document at that point. (It does this with more parallelism and
|
||||
* lookahead than you might think, though.)
|
||||
*
|
||||
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
||||
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
||||
* arrays or objects) MUST be separated with whitespace.
|
||||
*
|
||||
* ### Error Handling
|
||||
*
|
||||
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
||||
* it will be yielded as the first result. Iteration always stops after the first error.
|
||||
*
|
||||
* As with all other simdjson methods, non-exception error handling is readily available through
|
||||
* the same interface, requiring you to check the error before using the document:
|
||||
*
|
||||
* dom::parser parser;
|
||||
* for (auto [doc, error] : parser.parse_many(buf, len)) {
|
||||
* if (error) { cerr << error << endl; exit(1); }
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* ### Threads
|
||||
*
|
||||
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
||||
* hood to do some lookahead.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
|
||||
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
||||
* spot is cache-related: small enough to fit in cache, yet big enough to
|
||||
* parse as many documents as possible in one tight loop.
|
||||
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
||||
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
||||
* will yield 0 documents rather than an EMPTY error. Errors:
|
||||
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
|
||||
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
||||
* - other json errors if parsing fails
|
||||
*/
|
||||
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
|
||||
inline document_stream parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
||||
|
||||
/**
|
||||
* Parse a buffer containing many JSON documents.
|
||||
*
|
||||
* dom::parser parser;
|
||||
* for (const element doc : parser.parse_many(buf, len)) {
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
* ### Format
|
||||
*
|
||||
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
||||
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
||||
* then starts parsing the next document at that point. (It does this with more parallelism and
|
||||
* lookahead than you might think, though.)
|
||||
*
|
||||
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
||||
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
||||
* arrays or objects) MUST be separated with whitespace.
|
||||
*
|
||||
* ### Error Handling
|
||||
*
|
||||
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
||||
* it will be yielded as the first result. Iteration always stops after the first error.
|
||||
*
|
||||
* As with all other simdjson methods, non-exception error handling is readily available through
|
||||
* the same interface, requiring you to check the error before using the document:
|
||||
*
|
||||
* dom::parser parser;
|
||||
* for (auto [doc, error] : parser.parse_many(buf, len)) {
|
||||
* if (error) { cerr << error << endl; exit(1); }
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
* ### Threads
|
||||
*
|
||||
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
||||
* hood to do some lookahead.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param s The concatenated JSON to parse.
|
||||
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
||||
* spot is cache-related: small enough to fit in cache, yet big enough to
|
||||
* parse as many documents as possible in one tight loop.
|
||||
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
||||
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
||||
* will yield 0 documents rather than an EMPTY error. Errors:
|
||||
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
|
||||
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
||||
* - other json errors if parsing fails
|
||||
*/
|
||||
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
|
||||
inline document_stream parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
||||
|
||||
// We do not want to allow implicit conversion from C string to std::string.
|
||||
/** @private We do not want to allow implicit conversion from C string to std::string. */
|
||||
really_inline simdjson_result<element> parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;
|
||||
|
||||
/**
|
||||
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
|
||||
* and `max_depth` depth.
|
||||
*
|
||||
* @param capacity The new capacity.
|
||||
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
|
||||
* @return The error, if there is one.
|
||||
*/
|
||||
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
|
||||
|
||||
/**
|
||||
* @private deprecated because it returns bool instead of error_code, which is our standard for
|
||||
* failures. Use allocate() instead.
|
||||
*
|
||||
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
|
||||
* and `max_depth` depth.
|
||||
*
|
||||
* @param capacity The new capacity.
|
||||
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
|
||||
* @return true if successful, false if allocation failed.
|
||||
*/
|
||||
[[deprecated("Use allocate() instead.")]]
|
||||
WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
|
||||
|
||||
/**
|
||||
* The largest document this parser can support without reallocating.
|
||||
*
|
||||
* @return Current capacity, in bytes.
|
||||
*/
|
||||
really_inline size_t capacity() const noexcept;
|
||||
|
||||
/**
|
||||
* The largest document this parser can automatically support.
|
||||
*
|
||||
|
@ -1105,13 +872,6 @@ public:
|
|||
*/
|
||||
really_inline size_t max_capacity() const noexcept;
|
||||
|
||||
/**
|
||||
* The largest document this parser can support without reallocating.
|
||||
*
|
||||
* @return Current capacity, in bytes.
|
||||
*/
|
||||
really_inline size_t capacity() const noexcept;
|
||||
|
||||
/**
|
||||
* The maximum level of nested object and arrays supported by this parser.
|
||||
*
|
||||
|
@ -1130,45 +890,10 @@ public:
|
|||
*/
|
||||
really_inline void set_max_capacity(size_t max_capacity) noexcept;
|
||||
|
||||
/**
|
||||
* Set capacity. This is the largest document this parser can support without reallocating.
|
||||
*
|
||||
* This will allocate or deallocate as necessary.
|
||||
*
|
||||
* @param capacity The new capacity, in bytes.
|
||||
*
|
||||
* @return MEMALLOC if unsuccessful, SUCCESS otherwise.
|
||||
*/
|
||||
WARN_UNUSED inline error_code set_capacity(size_t capacity) noexcept;
|
||||
|
||||
/**
|
||||
* Set the maximum level of nested object and arrays supported by this parser.
|
||||
*
|
||||
* This will allocate or deallocate as necessary.
|
||||
*
|
||||
* @param max_depth The new maximum depth, in bytes.
|
||||
*
|
||||
* @return MEMALLOC if unsuccessful, SUCCESS otherwise.
|
||||
*/
|
||||
WARN_UNUSED inline error_code set_max_depth(size_t max_depth) noexcept;
|
||||
|
||||
/**
|
||||
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
|
||||
* and `max_depth` depth.
|
||||
*
|
||||
* Equivalent to calling set_capacity() and set_max_depth().
|
||||
*
|
||||
* @param capacity The new capacity.
|
||||
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
|
||||
* @return true if successful, false if allocation failed.
|
||||
*/
|
||||
WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
|
||||
|
||||
// type aliases for backcompat
|
||||
/** @deprecated Use the new DOM API instead */
|
||||
/** @private Use the new DOM API instead */
|
||||
class Iterator;
|
||||
/** @deprecated Use simdjson_error instead */
|
||||
using InvalidJSON = simdjson_error;
|
||||
/** @private Use simdjson_error instead */
|
||||
using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
|
||||
|
||||
/** @private Next location to write to in the tape */
|
||||
uint32_t current_loc{0};
|
||||
|
@ -1191,37 +916,39 @@ public:
|
|||
/** @private Next write location in the string buf for stage 2 parsing */
|
||||
uint8_t *current_string_buf_loc;
|
||||
|
||||
/** @deprecated Use `if (parser.parse(...).error)` instead */
|
||||
/** @private Use `if (parser.parse(...).error())` instead */
|
||||
bool valid{false};
|
||||
/** @deprecated Use `parser.parse(...).error` instead */
|
||||
/** @private Use `parser.parse(...).error()` instead */
|
||||
error_code error{UNINITIALIZED};
|
||||
|
||||
/** @deprecated Use `parser.parse(...).doc` instead */
|
||||
/** @private Use `parser.parse(...).value()` instead */
|
||||
document doc;
|
||||
|
||||
// returns true if the document parsed was valid
|
||||
/** @private returns true if the document parsed was valid */
|
||||
[[deprecated("Use the result of parser.parse() instead")]]
|
||||
inline bool is_valid() const noexcept;
|
||||
|
||||
// return an error code corresponding to the last parsing attempt, see
|
||||
// simdjson.h will return UNITIALIZED if no parsing was attempted
|
||||
/**
|
||||
* @private return an error code corresponding to the last parsing attempt, see
|
||||
* simdjson.h will return UNITIALIZED if no parsing was attempted
|
||||
*/
|
||||
[[deprecated("Use the result of parser.parse() instead")]]
|
||||
inline int get_error_code() const noexcept;
|
||||
|
||||
// return the string equivalent of "get_error_code"
|
||||
/** @private return the string equivalent of "get_error_code" */
|
||||
[[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]]
|
||||
inline std::string get_error_message() const noexcept;
|
||||
|
||||
/** @private */
|
||||
[[deprecated("Use cout << on the result of parser.parse() instead")]]
|
||||
inline bool print_json(std::ostream &os) const noexcept;
|
||||
|
||||
/** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
|
||||
inline bool dump_raw_tape(std::ostream &os) const noexcept;
|
||||
|
||||
//
|
||||
// Parser callbacks: these are internal!
|
||||
//
|
||||
// TODO find a way to do this without exposing the interface or crippling performance
|
||||
//
|
||||
|
||||
/** @private this should be called when parsing (right before writing the tapes) */
|
||||
inline void init_stage2() noexcept;
|
||||
|
@ -1244,13 +971,6 @@ public:
|
|||
really_inline bool on_number_double(double value) noexcept; ///< @private
|
||||
|
||||
private:
|
||||
/**
|
||||
* The maximum document length this parser supports.
|
||||
*
|
||||
* Buffers are large enough to handle any document up to this length.
|
||||
*/
|
||||
size_t _capacity{0};
|
||||
|
||||
/**
|
||||
* The maximum document length this parser will automatically support.
|
||||
*
|
||||
|
@ -1258,12 +978,19 @@ private:
|
|||
*/
|
||||
size_t _max_capacity;
|
||||
|
||||
/**
|
||||
* The maximum document length this parser supports.
|
||||
*
|
||||
* Buffers are large enough to handle any document up to this length.
|
||||
*/
|
||||
size_t _capacity{0};
|
||||
|
||||
/**
|
||||
* The maximum depth (number of nested objects and arrays) supported by this parser.
|
||||
*
|
||||
* Defaults to DEFAULT_MAX_DEPTH.
|
||||
*/
|
||||
size_t _max_depth;
|
||||
size_t _max_depth{0};
|
||||
|
||||
/**
|
||||
* The loaded buffer (reused each time load() is called)
|
||||
|
@ -1441,9 +1168,9 @@ inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::ob
|
|||
template<>
|
||||
struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> {
|
||||
public:
|
||||
really_inline simdjson_result() noexcept;
|
||||
really_inline simdjson_result(dom::element &&value) noexcept;
|
||||
really_inline simdjson_result(error_code error) noexcept;
|
||||
really_inline simdjson_result() noexcept; ///< @private
|
||||
really_inline simdjson_result(dom::element &&value) noexcept; ///< @private
|
||||
really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
inline simdjson_result<bool> is_null() const noexcept;
|
||||
template<typename T>
|
||||
|
@ -1477,9 +1204,9 @@ public:
|
|||
template<>
|
||||
struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> {
|
||||
public:
|
||||
really_inline simdjson_result() noexcept;
|
||||
really_inline simdjson_result(dom::array value) noexcept;
|
||||
really_inline simdjson_result(error_code error) noexcept;
|
||||
really_inline simdjson_result() noexcept; ///< @private
|
||||
really_inline simdjson_result(dom::array value) noexcept; ///< @private
|
||||
really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept;
|
||||
inline simdjson_result<dom::element> at(size_t index) const noexcept;
|
||||
|
@ -1494,9 +1221,9 @@ public:
|
|||
template<>
|
||||
struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> {
|
||||
public:
|
||||
really_inline simdjson_result() noexcept;
|
||||
really_inline simdjson_result(dom::object value) noexcept;
|
||||
really_inline simdjson_result(error_code error) noexcept;
|
||||
really_inline simdjson_result() noexcept; ///< @private
|
||||
really_inline simdjson_result(dom::object value) noexcept; ///< @private
|
||||
really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept;
|
||||
inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace simdjson {
|
|||
*/
|
||||
enum error_code {
|
||||
SUCCESS = 0, ///< No error
|
||||
SUCCESS_AND_HAS_MORE, ///< No error and buffer still has more data
|
||||
SUCCESS_AND_HAS_MORE, ///< @private No error and buffer still has more data
|
||||
CAPACITY, ///< This parser can't support a document that big
|
||||
MEMALLOC, ///< Error allocating memory, most likely out of memory
|
||||
TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error
|
||||
|
@ -165,19 +165,19 @@ struct simdjson_result_base : public std::pair<T, error_code> {
|
|||
template<typename T>
|
||||
struct simdjson_result : public internal::simdjson_result_base<T> {
|
||||
/**
|
||||
* Create a new empty result with error = UNINITIALIZED.
|
||||
* @private Create a new empty result with error = UNINITIALIZED.
|
||||
*/
|
||||
really_inline simdjson_result() noexcept;
|
||||
/**
|
||||
* Create a new error result.
|
||||
* @private Create a new error result.
|
||||
*/
|
||||
really_inline simdjson_result(T &&value) noexcept;
|
||||
/**
|
||||
* Create a new successful result.
|
||||
* @private Create a new successful result.
|
||||
*/
|
||||
really_inline simdjson_result(error_code error_code) noexcept;
|
||||
/**
|
||||
* Create a new result with both things (use if you don't want to branch when creating the result).
|
||||
* @private Create a new result with both things (use if you don't want to branch when creating the result).
|
||||
*/
|
||||
really_inline simdjson_result(T &&value, error_code error) noexcept;
|
||||
|
||||
|
@ -220,11 +220,12 @@ struct simdjson_result : public internal::simdjson_result_base<T> {
|
|||
/**
|
||||
* @deprecated This is an alias and will be removed, use error_code instead
|
||||
*/
|
||||
using ErrorValues = error_code;
|
||||
using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code;
|
||||
|
||||
/**
|
||||
* @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead.
|
||||
*/
|
||||
[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]]
|
||||
inline const std::string &error_message(int error) noexcept;
|
||||
|
||||
} // namespace simdjson
|
||||
|
|
|
@ -38,6 +38,8 @@ public:
|
|||
virtual const std::string &description() const { return _description; }
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
* The instruction sets this implementation is compiled against.
|
||||
*
|
||||
* @return a mask of all required `instruction_set` values
|
||||
|
@ -45,6 +47,8 @@ public:
|
|||
virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; };
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
* Run a full document parse (ensure_capacity, stage1 and stage2).
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -57,6 +61,8 @@ public:
|
|||
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
* Run a full document parse (ensure_capacity, stage1 and stage2).
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -70,6 +76,8 @@ public:
|
|||
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
* Stage 1 of the document parser.
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -83,6 +91,8 @@ public:
|
|||
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
* Stage 2 of the document parser.
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -95,6 +105,8 @@ public:
|
|||
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* @private For internal implementation use
|
||||
*
|
||||
* Stage 2 of the document parser for parser::parse_many.
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -108,9 +120,10 @@ public:
|
|||
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) const noexcept = 0;
|
||||
|
||||
protected:
|
||||
/** @private Construct an implementation with the given name and description. For subclasses. */
|
||||
really_inline implementation(
|
||||
const std::string &name,
|
||||
const std::string &description,
|
||||
std::string_view name,
|
||||
std::string_view description,
|
||||
uint32_t required_instruction_sets
|
||||
) :
|
||||
_name(name),
|
||||
|
@ -192,8 +205,8 @@ public:
|
|||
*/
|
||||
class detect_best_supported_implementation_on_first_use final : public implementation {
|
||||
public:
|
||||
const std::string& name() const noexcept final { return set_best()->name(); }
|
||||
const std::string& description() const noexcept final { return set_best()->description(); }
|
||||
const std::string &name() const noexcept final { return set_best()->name(); }
|
||||
const std::string &description() const noexcept final { return set_best()->description(); }
|
||||
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
|
||||
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept final {
|
||||
return set_best()->parse(buf, len, parser);
|
||||
|
|
|
@ -193,7 +193,7 @@ inline element document::root() const noexcept {
|
|||
#define RETURN_ERROR(CODE, MESSAGE) return REPORT_ERROR((CODE), (MESSAGE));
|
||||
|
||||
WARN_UNUSED
|
||||
inline error_code document::set_capacity(size_t capacity) noexcept {
|
||||
inline error_code document::allocate(size_t capacity) noexcept {
|
||||
if (capacity == 0) {
|
||||
string_buf.reset();
|
||||
tape.reset();
|
||||
|
@ -310,11 +310,11 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
|
|||
//
|
||||
// parser inline implementation
|
||||
//
|
||||
really_inline parser::parser(size_t max_capacity, size_t max_depth) noexcept
|
||||
: _max_capacity{max_capacity}, _max_depth{max_depth}, loaded_bytes(nullptr, &aligned_free_char) {}
|
||||
really_inline parser::parser(size_t max_capacity) noexcept
|
||||
: _max_capacity{max_capacity}, loaded_bytes(nullptr, &aligned_free_char) {}
|
||||
inline bool parser::is_valid() const noexcept { return valid; }
|
||||
inline int parser::get_error_code() const noexcept { return error; }
|
||||
inline std::string parser::get_error_message() const noexcept { return error_message(int(error)); }
|
||||
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
|
||||
inline bool parser::print_json(std::ostream &os) const noexcept {
|
||||
if (!valid) { return false; }
|
||||
os << doc.root();
|
||||
|
@ -431,98 +431,97 @@ really_inline size_t parser::max_depth() const noexcept {
|
|||
}
|
||||
|
||||
WARN_UNUSED
|
||||
inline error_code parser::set_capacity(size_t capacity) noexcept {
|
||||
if (_capacity == capacity) {
|
||||
return SUCCESS;
|
||||
}
|
||||
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
|
||||
//
|
||||
// If capacity has changed, reallocate capacity-based buffers
|
||||
//
|
||||
if (_capacity != capacity) {
|
||||
// Set capacity to 0 until we finish, in case there's an error
|
||||
_capacity = 0;
|
||||
|
||||
// Set capacity to 0 until we finish, in case there's an error
|
||||
_capacity = 0;
|
||||
//
|
||||
// Reallocate the document
|
||||
//
|
||||
error_code err = doc.allocate(capacity);
|
||||
if (err) { return err; }
|
||||
|
||||
//
|
||||
// Don't allocate 0 bytes, just return.
|
||||
//
|
||||
if (capacity == 0) {
|
||||
structural_indexes.reset();
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize stage 1 output
|
||||
//
|
||||
uint32_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
|
||||
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc
|
||||
if (!structural_indexes) {
|
||||
return MEMALLOC;
|
||||
}
|
||||
|
||||
_capacity = capacity;
|
||||
|
||||
//
|
||||
// Reallocate the document
|
||||
// If capacity hasn't changed, but the document was taken, allocate a new document.
|
||||
//
|
||||
error_code err = doc.set_capacity(capacity);
|
||||
if (err) { return err; }
|
||||
|
||||
//
|
||||
// Don't allocate 0 bytes, just return.
|
||||
//
|
||||
if (capacity == 0) {
|
||||
structural_indexes.reset();
|
||||
return SUCCESS;
|
||||
} else if (!doc.tape) {
|
||||
error_code err = doc.allocate(capacity);
|
||||
if (err) { return err; }
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize stage 1 output
|
||||
// If max_depth has changed, reallocate those buffers
|
||||
//
|
||||
uint32_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
|
||||
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc
|
||||
if (!structural_indexes) {
|
||||
return MEMALLOC;
|
||||
}
|
||||
if (max_depth != _max_depth) {
|
||||
_max_depth = 0;
|
||||
|
||||
_capacity = capacity;
|
||||
if (max_depth == 0) {
|
||||
ret_address.reset();
|
||||
containing_scope_offset.reset();
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize stage 2 state
|
||||
//
|
||||
containing_scope_offset.reset(new (std::nothrow) uint32_t[max_depth]); // TODO realloc
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
ret_address.reset(new (std::nothrow) void *[max_depth]);
|
||||
#else
|
||||
ret_address.reset(new (std::nothrow) char[max_depth]);
|
||||
#endif
|
||||
|
||||
if (!ret_address || !containing_scope_offset) {
|
||||
// Could not allocate memory
|
||||
return MEMALLOC;
|
||||
}
|
||||
|
||||
_max_depth = max_depth;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
|
||||
return !allocate(capacity, max_depth);
|
||||
}
|
||||
|
||||
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
|
||||
_max_capacity = max_capacity;
|
||||
}
|
||||
|
||||
WARN_UNUSED inline error_code parser::set_max_depth(size_t max_depth) noexcept {
|
||||
if (max_depth == _max_depth && ret_address) { return SUCCESS; }
|
||||
|
||||
_max_depth = 0;
|
||||
|
||||
if (max_depth == 0) {
|
||||
ret_address.reset();
|
||||
containing_scope_offset.reset();
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize stage 2 state
|
||||
//
|
||||
containing_scope_offset.reset(new (std::nothrow) uint32_t[max_depth]); // TODO realloc
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
ret_address.reset(new (std::nothrow) void *[max_depth]);
|
||||
#else
|
||||
ret_address.reset(new (std::nothrow) char[max_depth]);
|
||||
#endif
|
||||
|
||||
if (!ret_address || !containing_scope_offset) {
|
||||
// Could not allocate memory
|
||||
return MEMALLOC;
|
||||
}
|
||||
|
||||
_max_depth = max_depth;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
WARN_UNUSED inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
|
||||
return !set_capacity(capacity) && !set_max_depth(max_depth);
|
||||
}
|
||||
|
||||
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
|
||||
// If we don't have enough capacity, (try to) automatically bump it.
|
||||
if (unlikely(desired_capacity > capacity())) {
|
||||
// If the document was taken, reallocate that too.
|
||||
// Both in one if statement to minimize unlikely branching.
|
||||
if (unlikely(desired_capacity > capacity() || !doc.tape)) {
|
||||
if (desired_capacity > max_capacity()) {
|
||||
return error = CAPACITY;
|
||||
}
|
||||
|
||||
error = set_capacity(desired_capacity);
|
||||
if (error) { return error; }
|
||||
}
|
||||
|
||||
// Allocate depth-based buffers if they aren't already.
|
||||
error = set_max_depth(max_depth());
|
||||
if (error) { return error; }
|
||||
|
||||
// If the last doc was taken, we need to allocate a new one
|
||||
if (!doc.tape) {
|
||||
error = doc.set_capacity(desired_capacity);
|
||||
if (error) { return error; }
|
||||
return allocate(desired_capacity, _max_depth > 0 ? _max_depth : DEFAULT_MAX_DEPTH);
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
|
|
|
@ -7,33 +7,33 @@ namespace simdjson {
|
|||
|
||||
// Because of template weirdness, the actual class definition is inline in the document class
|
||||
|
||||
WARN_UNUSED bool ParsedJson::Iterator::is_ok() const {
|
||||
WARN_UNUSED bool dom::parser::Iterator::is_ok() const {
|
||||
return location < tape_length;
|
||||
}
|
||||
|
||||
// useful for debugging purposes
|
||||
size_t ParsedJson::Iterator::get_tape_location() const {
|
||||
size_t dom::parser::Iterator::get_tape_location() const {
|
||||
return location;
|
||||
}
|
||||
|
||||
// useful for debugging purposes
|
||||
size_t ParsedJson::Iterator::get_tape_length() const {
|
||||
size_t dom::parser::Iterator::get_tape_length() const {
|
||||
return tape_length;
|
||||
}
|
||||
|
||||
// returns the current depth (start at 1 with 0 reserved for the fictitious root
|
||||
// node)
|
||||
size_t ParsedJson::Iterator::get_depth() const {
|
||||
size_t dom::parser::Iterator::get_depth() const {
|
||||
return depth;
|
||||
}
|
||||
|
||||
// A scope is a series of nodes at the same depth, typically it is either an
|
||||
// object ({) or an array ([). The root node has type 'r'.
|
||||
uint8_t ParsedJson::Iterator::get_scope_type() const {
|
||||
uint8_t dom::parser::Iterator::get_scope_type() const {
|
||||
return depth_index[depth].scope_type;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::move_forward() {
|
||||
bool dom::parser::Iterator::move_forward() {
|
||||
if (location + 1 >= tape_length) {
|
||||
return false; // we are at the end!
|
||||
}
|
||||
|
@ -58,14 +58,14 @@ bool ParsedJson::Iterator::move_forward() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void ParsedJson::Iterator::move_to_value() {
|
||||
void dom::parser::Iterator::move_to_value() {
|
||||
// assume that we are on a key, so move by 1.
|
||||
location += 1;
|
||||
current_val = doc.tape[location];
|
||||
current_type = (current_val >> 56);
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::move_to_key(const char *key) {
|
||||
bool dom::parser::Iterator::move_to_key(const char *key) {
|
||||
if (down()) {
|
||||
do {
|
||||
const bool right_key = (strcmp(get_string(), key) == 0);
|
||||
|
@ -79,7 +79,7 @@ bool ParsedJson::Iterator::move_to_key(const char *key) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::move_to_key_insensitive(
|
||||
bool dom::parser::Iterator::move_to_key_insensitive(
|
||||
const char *key) {
|
||||
if (down()) {
|
||||
do {
|
||||
|
@ -94,7 +94,7 @@ bool ParsedJson::Iterator::move_to_key_insensitive(
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::move_to_key(const char *key,
|
||||
bool dom::parser::Iterator::move_to_key(const char *key,
|
||||
uint32_t length) {
|
||||
if (down()) {
|
||||
do {
|
||||
|
@ -110,7 +110,7 @@ bool ParsedJson::Iterator::move_to_key(const char *key,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::move_to_index(uint32_t index) {
|
||||
bool dom::parser::Iterator::move_to_index(uint32_t index) {
|
||||
if (down()) {
|
||||
uint32_t i = 0;
|
||||
for (; i < index; i++) {
|
||||
|
@ -126,7 +126,7 @@ bool ParsedJson::Iterator::move_to_index(uint32_t index) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::prev() {
|
||||
bool dom::parser::Iterator::prev() {
|
||||
size_t target_location = location;
|
||||
to_start_scope();
|
||||
size_t npos = location;
|
||||
|
@ -150,7 +150,7 @@ bool ParsedJson::Iterator::prev() {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::up() {
|
||||
bool dom::parser::Iterator::up() {
|
||||
if (depth == 1) {
|
||||
return false; // don't allow moving back to root
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ bool ParsedJson::Iterator::up() {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::down() {
|
||||
bool dom::parser::Iterator::down() {
|
||||
if (location + 1 >= tape_length) {
|
||||
return false;
|
||||
}
|
||||
|
@ -184,13 +184,13 @@ bool ParsedJson::Iterator::down() {
|
|||
return false;
|
||||
}
|
||||
|
||||
void ParsedJson::Iterator::to_start_scope() {
|
||||
void dom::parser::Iterator::to_start_scope() {
|
||||
location = depth_index[depth].start_of_scope;
|
||||
current_val = doc.tape[location];
|
||||
current_type = (current_val >> 56);
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::next() {
|
||||
bool dom::parser::Iterator::next() {
|
||||
size_t npos;
|
||||
if ((current_type == '[') || (current_type == '{')) {
|
||||
// we need to jump
|
||||
|
@ -209,7 +209,7 @@ bool ParsedJson::Iterator::next() {
|
|||
return true;
|
||||
}
|
||||
|
||||
ParsedJson::Iterator::Iterator(const ParsedJson &pj) noexcept(false)
|
||||
dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false)
|
||||
: doc(pj.doc), depth(0), location(0), tape_length(0) {
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
if (!pj.valid) { throw simdjson_error(pj.error); }
|
||||
|
@ -236,8 +236,8 @@ ParsedJson::Iterator::Iterator(const ParsedJson &pj) noexcept(false)
|
|||
}
|
||||
}
|
||||
|
||||
ParsedJson::Iterator::Iterator(
|
||||
const ParsedJson::Iterator &o) noexcept
|
||||
dom::parser::Iterator::Iterator(
|
||||
const dom::parser::Iterator &o) noexcept
|
||||
: doc(o.doc), max_depth(o.depth), depth(o.depth), location(o.location),
|
||||
tape_length(o.tape_length), current_type(o.current_type),
|
||||
current_val(o.current_val) {
|
||||
|
@ -245,11 +245,11 @@ ParsedJson::Iterator::Iterator(
|
|||
memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
|
||||
}
|
||||
|
||||
ParsedJson::Iterator::~Iterator() noexcept {
|
||||
dom::parser::Iterator::~Iterator() noexcept {
|
||||
if (depth_index) { delete[] depth_index; }
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::print(std::ostream &os, bool escape_strings) const {
|
||||
bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const {
|
||||
if (!is_ok()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -295,7 +295,7 @@ bool ParsedJson::Iterator::print(std::ostream &os, bool escape_strings) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::move_to(const char *pointer,
|
||||
bool dom::parser::Iterator::move_to(const char *pointer,
|
||||
uint32_t length) {
|
||||
char *new_pointer = nullptr;
|
||||
if (pointer[0] == '#') {
|
||||
|
@ -354,7 +354,7 @@ bool ParsedJson::Iterator::move_to(const char *pointer,
|
|||
return found;
|
||||
}
|
||||
|
||||
bool ParsedJson::Iterator::relative_move_to(const char *pointer,
|
||||
bool dom::parser::Iterator::relative_move_to(const char *pointer,
|
||||
uint32_t length) {
|
||||
if (length == 0) {
|
||||
// returns the whole document
|
||||
|
|
|
@ -15,6 +15,7 @@ namespace simdjson {
|
|||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
[[deprecated("Use padded_string::load() instead")]]
|
||||
inline padded_string get_corpus(const char *path) {
|
||||
return padded_string::load(path);
|
||||
}
|
||||
|
|
|
@ -107,8 +107,9 @@ WARN_UNUSED inline dom::parser build_parsed_json(const padded_string &s) noexcep
|
|||
return parser;
|
||||
}
|
||||
|
||||
// We do not want to allow implicit conversion from C string to std::string.
|
||||
/** @private We do not want to allow implicit conversion from C string to std::string. */
|
||||
int json_parse(const char *buf, dom::parser &parser) noexcept = delete;
|
||||
/** @private We do not want to allow implicit conversion from C string to std::string. */
|
||||
dom::parser build_parsed_json(const char *buf) noexcept = delete;
|
||||
|
||||
} // namespace simdjson
|
||||
|
|
|
@ -8,9 +8,9 @@
|
|||
namespace simdjson {
|
||||
|
||||
/**
|
||||
* @deprecated Use `document::parser` instead.
|
||||
* @deprecated Use `dom::parser` instead.
|
||||
*/
|
||||
using ParsedJson = dom::parser;
|
||||
using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser;
|
||||
|
||||
} // namespace simdjson
|
||||
#endif
|
||||
|
|
|
@ -16,9 +16,9 @@
|
|||
|
||||
namespace simdjson {
|
||||
|
||||
class [[deprecated("Use the new DOM navigation API instead (see doc/usage.md)")]] ParsedJson::Iterator {
|
||||
class [[deprecated("Use the new DOM navigation API instead (see doc/usage.md)")]] dom::parser::Iterator {
|
||||
public:
|
||||
inline Iterator(const ParsedJson &parser) noexcept(false);
|
||||
inline Iterator(const dom::parser &parser) noexcept(false);
|
||||
inline Iterator(const Iterator &o) noexcept;
|
||||
inline ~Iterator() noexcept;
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@
|
|||
#endif
|
||||
|
||||
namespace simdjson {
|
||||
// portable version of posix_memalign
|
||||
/** @private portable version of posix_memalign */
|
||||
static inline void *aligned_malloc(size_t alignment, size_t size) {
|
||||
void *p;
|
||||
#ifdef _MSC_VER
|
||||
|
@ -117,10 +117,12 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
|
|||
return p;
|
||||
}
|
||||
|
||||
/** @private */
|
||||
static inline char *aligned_malloc_char(size_t alignment, size_t size) {
|
||||
return (char *)aligned_malloc(alignment, size);
|
||||
}
|
||||
|
||||
/** @private */
|
||||
static inline void aligned_free(void *mem_block) {
|
||||
if (mem_block == nullptr) {
|
||||
return;
|
||||
|
@ -134,6 +136,7 @@ static inline void aligned_free(void *mem_block) {
|
|||
#endif
|
||||
}
|
||||
|
||||
/** @private */
|
||||
static inline void aligned_free_char(char *mem_block) {
|
||||
aligned_free((void *)mem_block);
|
||||
}
|
||||
|
|
|
@ -1195,7 +1195,6 @@ namespace format_tests {
|
|||
bool print_parser_parse_exception() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
dom::parser parser;
|
||||
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
|
||||
ostringstream s;
|
||||
s << parser.parse(DOCUMENT);
|
||||
return assert_minified(s);
|
||||
|
@ -1203,7 +1202,6 @@ namespace format_tests {
|
|||
bool print_minify_parser_parse_exception() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
dom::parser parser;
|
||||
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
|
||||
ostringstream s;
|
||||
s << minify(parser.parse(DOCUMENT));
|
||||
return assert_minified(s);
|
||||
|
|
|
@ -125,7 +125,7 @@ void performance_2() {
|
|||
// The web_request part of this is aspirational, so we compile as much as we can here
|
||||
void performance_3() {
|
||||
dom::parser parser(0); // This parser will refuse to automatically grow capacity
|
||||
simdjson::error_code allocate_error = parser.set_capacity(1024*1024); // This allocates enough capacity to handle documents <= 1MB
|
||||
simdjson::error_code allocate_error = parser.allocate(1024*1024); // This allocates enough capacity to handle documents <= 1MB
|
||||
if (allocate_error) { cerr << allocate_error << endl; exit(1); }
|
||||
|
||||
// for (web_request request : listen()) {
|
||||
|
|
Loading…
Reference in New Issue