Merge pull request #883 from simdjson/jkeiser/move-include-files

Make include files more fine-grained
This commit is contained in:
John Keiser 2020-05-19 15:48:06 -07:00 committed by GitHub
commit 561813eb2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 4901 additions and 4486 deletions

View File

@ -1,5 +1,6 @@
#include "simdjson.h"
#include <algorithm>
#include <cstring>
#include <unistd.h>
#include <vector>

View File

@ -488,3 +488,10 @@ The parsed results (`dom::document`, `dom::element`, `array`, `object`) depend o
The CPU detection, which runs the first time parsing is attempted and switches to the fastest
parser for your CPU, is transparent and thread-safe.
Backwards Compatibility
-----------------------
The only header file supported by simdjson is simdjson.h. Older versions of simdjson published a
number of other include files such as document.h or ParsedJson.h alongside simdjson.h; these headers
may be moved or removed in future versions.

View File

@ -18,20 +18,29 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
#include "simdjson/error.h"
#include "simdjson/padded_string.h"
#include "simdjson/implementation.h"
#include "simdjson/document.h"
#include "simdjson/document_stream.h"
#include "simdjson/dom/array.h"
#include "simdjson/dom/document_stream.h"
#include "simdjson/dom/document.h"
#include "simdjson/dom/element.h"
#include "simdjson/dom/object.h"
#include "simdjson/dom/parser.h"
// // Deprecated API
#include "simdjson/jsonparser.h"
#include "simdjson/parsedjson.h"
#include "simdjson/parsedjson_iterator.h"
// Deprecated API
#include "simdjson/dom/jsonparser.h"
#include "simdjson/dom/parsedjson.h"
#include "simdjson/dom/parsedjson_iterator.h"
// // Inline functions
#include "simdjson/inline/document.h"
// Inline functions
#include "simdjson/inline/array.h"
#include "simdjson/inline/document_stream.h"
#include "simdjson/inline/document.h"
#include "simdjson/inline/element.h"
#include "simdjson/inline/error.h"
#include "simdjson/inline/object.h"
#include "simdjson/inline/padded_string.h"
#include "simdjson/inline/parsedjson_iterator.h"
#include "simdjson/inline/parser.h"
#include "simdjson/inline/tape_ref.h"
SIMDJSON_POP_DISABLE_WARNINGS

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,146 @@
#ifndef SIMDJSON_DOM_ARRAY_H
#define SIMDJSON_DOM_ARRAY_H
#include "simdjson/common_defs.h"
#include "simdjson/error.h"
#include "simdjson/internal/tape_ref.h"
#include "simdjson/minify.h"
#include <ostream>
namespace simdjson {
namespace dom {
class document;
class element;
/**
* JSON array.
*/
class array : protected internal::tape_ref {
public:
/** Create a new, invalid array */
really_inline array() noexcept;
class iterator : protected internal::tape_ref {
public:
/**
* Get the actual value
*/
inline element operator*() const noexcept;
/**
* Get the next value.
*
* Part of the std::iterator interface.
*
*/
inline iterator& operator++() noexcept;
/**
* Check if these values come from the same place in the JSON.
*
* Part of the std::iterator interface.
*/
inline bool operator!=(const iterator& other) const noexcept;
private:
really_inline iterator(const document *doc, size_t json_index) noexcept;
friend class array;
};
/**
* Return the first array element.
*
* Part of the std::iterable interface.
*/
inline iterator begin() const noexcept;
/**
* One past the last array element.
*
* Part of the std::iterable interface.
*/
inline iterator end() const noexcept;
/**
* Get the size of the array (number of immediate children).
* It is a saturated value with a maximum of 0xFFFFFF: if the value
* is 0xFFFFFF then the size is 0xFFFFFF or greater.
*/
inline size_t size() const noexcept;
/**
* Get the value associated with the given JSON pointer.
*
* dom::parser parser;
* array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])");
* a.at("0/foo/a/1") == 20
* a.at("0")["foo"]["a"].at(1) == 20
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
* - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept;
/**
* Get the value at the given index.
*
* @return The value at the given index, or:
* - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
*/
inline simdjson_result<element> at(size_t index) const noexcept;
private:
really_inline array(const document *doc, size_t json_index) noexcept;
friend class element;
friend struct simdjson_result<element>;
template<typename T>
friend class simdjson::minify;
};
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const array &value);
} // namespace dom
/** The result of a JSON conversion that may fail. */
template<>
struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> {
public:
really_inline simdjson_result() noexcept; ///< @private
really_inline simdjson_result(dom::array value) noexcept; ///< @private
really_inline simdjson_result(error_code error) noexcept; ///< @private
inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept;
inline simdjson_result<dom::element> at(size_t index) const noexcept;
#if SIMDJSON_EXCEPTIONS
inline dom::array::iterator begin() const noexcept(false);
inline dom::array::iterator end() const noexcept(false);
inline size_t size() const noexcept(false);
#endif // SIMDJSON_EXCEPTIONS
};
#if SIMDJSON_EXCEPTIONS
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false);
#endif
} // namespace simdjson
#endif // SIMDJSON_DOM_ARRAY_H

View File

@ -0,0 +1,78 @@
#ifndef SIMDJSON_DOM_DOCUMENT_H
#define SIMDJSON_DOM_DOCUMENT_H
#include "simdjson/common_defs.h"
#include "simdjson/minify.h"
#include <memory>
#include <ostream>
namespace simdjson {
namespace dom {
class element;
/**
* A parsed JSON document.
*
* This class cannot be copied, only moved, to avoid unintended allocations.
*/
class document {
public:
/**
* Create a document container with zero capacity.
*
* The parser will allocate capacity as needed.
*/
document() noexcept = default;
~document() noexcept = default;
/**
* Take another document's buffers.
*
* @param other The document to take. Its capacity is zeroed and it is invalidated.
*/
document(document &&other) noexcept = default;
/** @private */
document(const document &) = delete; // Disallow copying
/**
* Take another document's buffers.
*
* @param other The document to take. Its capacity is zeroed.
*/
document &operator=(document &&other) noexcept = default;
/** @private */
document &operator=(const document &) = delete; // Disallow copying
/**
* Get the root element of this document as a JSON array.
*/
element root() const noexcept;
/**
* @private Dump the raw tape for debugging.
*
* @param os the stream to output to.
* @return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
*/
bool dump_raw_tape(std::ostream &os) const noexcept;
/** @private Structural values. */
std::unique_ptr<uint64_t[]> tape{};
/** @private String values.
*
* Should be at least byte_capacity.
*/
std::unique_ptr<uint8_t[]> string_buf{};
private:
inline error_code allocate(size_t len) noexcept;
template<typename T>
friend class simdjson::minify;
friend class parser;
}; // class document
} // namespace dom
} // namespace simdjson
#endif // SIMDJSON_DOM_DOCUMENT_H

View File

@ -1,8 +1,12 @@
#ifndef SIMDJSON_DOCUMENT_STREAM_H
#define SIMDJSON_DOCUMENT_STREAM_H
#include "simdjson/common_defs.h"
#include "simdjson/dom/parser.h"
#include "simdjson/error.h"
#ifdef SIMDJSON_THREADS_ENABLED
#include <thread>
#include "simdjson/document.h"
#endif
namespace simdjson {
namespace dom {

View File

@ -0,0 +1,337 @@
#ifndef SIMDJSON_DOM_ELEMENT_H
#define SIMDJSON_DOM_ELEMENT_H
#include "simdjson/common_defs.h"
#include "simdjson/error.h"
#include "simdjson/internal/tape_ref.h"
#include "simdjson/minify.h"
#include <ostream>
namespace simdjson {
namespace dom {
class array;
class document;
class object;
/**
* The actual concrete type of a JSON element
* This is the type it is most easily cast to with get<>.
*/
enum class element_type {
ARRAY = '[', ///< dom::array
OBJECT = '{', ///< dom::object
INT64 = 'l', ///< int64_t
UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t
DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double.
STRING = '"', ///< std::string_view
BOOL = 't', ///< bool
NULL_VALUE = 'n' ///< null
};
/**
* A JSON element.
*
* References an element in a JSON document, representing a JSON null, boolean, string, number,
* array or object.
*/
class element : protected internal::tape_ref {
public:
/** Create a new, invalid element. */
really_inline element() noexcept;
/** The type of this element. */
really_inline element_type type() const noexcept;
/** Whether this element is a json `null`. */
really_inline bool is_null() const noexcept;
/**
* Tell whether the value can be cast to provided type (T).
*
* Supported types:
* - Boolean: bool
* - Number: double, uint64_t, int64_t
* - String: std::string_view, const char *
* - Array: dom::array
* - Object: dom::object
*
* @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
*/
template<typename T>
really_inline bool is() const noexcept;
/**
* Get the value as the provided type (T).
*
* Supported types:
* - Boolean: bool
* - Number: double, uint64_t, int64_t
* - String: std::string_view, const char *
* - Array: dom::array
* - Object: dom::object
*
* @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
*
* @returns The value cast to the given type, or:
* INCORRECT_TYPE if the value cannot be cast to the given type.
*/
template<typename T>
really_inline simdjson_result<T> get() const noexcept;
#if SIMDJSON_EXCEPTIONS
/**
* Read this element as a boolean.
*
* @return The boolean value
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean.
*/
inline operator bool() const noexcept(false);
/**
* Read this element as a null-terminated string.
*
* Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string.
*
* @return The string value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
*/
inline explicit operator const char*() const noexcept(false);
/**
* Read this element as a null-terminated string.
*
* Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string.
*
* @return The string value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
*/
inline operator std::string_view() const noexcept(false);
/**
* Read this element as an unsigned integer.
*
* @return The integer value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
*/
inline operator uint64_t() const noexcept(false);
/**
* Read this element as an signed integer.
*
* @return The integer value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits
*/
inline operator int64_t() const noexcept(false);
/**
* Read this element as an double.
*
* @return The double value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
*/
inline operator double() const noexcept(false);
/**
* Read this element as a JSON array.
*
* @return The JSON array.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
*/
inline operator array() const noexcept(false);
/**
* Read this element as a JSON object (key/value pairs).
*
* @return The JSON object.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object
*/
inline operator object() const noexcept(false);
/**
* Iterate over each element in this array.
*
* @return The beginning of the iteration.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
*/
inline dom::array::iterator begin() const noexcept(false);
/**
* Iterate over each element in this array.
*
* @return The end of the iteration.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
*/
inline dom::array::iterator end() const noexcept(false);
#endif // SIMDJSON_EXCEPTIONS
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const char *key) const noexcept;
/**
* Get the value associated with the given JSON pointer.
*
* dom::parser parser;
* element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})");
* doc.at("/foo/a/1") == 20
* doc.at("/")["foo"]["a"].at(1) == 20
* doc.at("")["foo"]["a"].at(1) == 20
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
* - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept;
/**
* Get the value at the given index.
*
* @return The value at the given index, or:
* - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
*/
inline simdjson_result<element> at(size_t index) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key(const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key in a case-insensitive manner.
*
* Note: The key will be matched against **unescaped** JSON.
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept;
/** @private for debugging. Prints out the root element. */
inline bool dump_raw_tape(std::ostream &out) const noexcept;
private:
really_inline element(const document *doc, size_t json_index) noexcept;
friend class document;
friend class object;
friend class array;
friend struct simdjson_result<element>;
template<typename T>
friend class simdjson::minify;
};
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const element &value);
/**
* Print element type to an output stream.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, element_type type);
} // namespace dom
/** The result of a JSON navigation that may fail. */
template<>
struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> {
public:
really_inline simdjson_result() noexcept; ///< @private
really_inline simdjson_result(dom::element &&value) noexcept; ///< @private
really_inline simdjson_result(error_code error) noexcept; ///< @private
inline simdjson_result<dom::element_type> type() const noexcept;
inline simdjson_result<bool> is_null() const noexcept;
template<typename T>
inline simdjson_result<bool> is() const noexcept;
template<typename T>
inline simdjson_result<T> get() const noexcept;
inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept;
inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept;
inline simdjson_result<dom::element> at(size_t index) const noexcept;
inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept;
inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept;
#if SIMDJSON_EXCEPTIONS
inline operator bool() const noexcept(false);
inline explicit operator const char*() const noexcept(false);
inline operator std::string_view() const noexcept(false);
inline operator uint64_t() const noexcept(false);
inline operator int64_t() const noexcept(false);
inline operator double() const noexcept(false);
inline operator dom::array() const noexcept(false);
inline operator dom::object() const noexcept(false);
inline dom::array::iterator begin() const noexcept(false);
inline dom::array::iterator end() const noexcept(false);
#endif // SIMDJSON_EXCEPTIONS
};
#if SIMDJSON_EXCEPTIONS
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false);
#endif
} // namespace simdjson
#endif // SIMDJSON_DOM_DOCUMENT_H

View File

@ -1,10 +1,10 @@
// TODO Remove this -- deprecated API and files
#ifndef SIMDJSON_JSONPARSER_H
#define SIMDJSON_JSONPARSER_H
#ifndef SIMDJSON_DOM_JSONPARSER_H
#define SIMDJSON_DOM_JSONPARSER_H
#include "simdjson/document.h"
#include "simdjson/parsedjson.h"
#include "simdjson/dom/document.h"
#include "simdjson/dom/parsedjson.h"
#include "simdjson/jsonioutil.h"
namespace simdjson {
@ -114,4 +114,4 @@ dom::parser build_parsed_json(const char *buf) noexcept = delete;
} // namespace simdjson
#endif
#endif // SIMDJSON_DOM_JSONPARSER_H

View File

@ -0,0 +1,248 @@
#ifndef SIMDJSON_DOM_OBJECT_H
#define SIMDJSON_DOM_OBJECT_H
#include "simdjson/common_defs.h"
#include "simdjson/error.h"
#include "simdjson/internal/tape_ref.h"
#include "simdjson/minify.h"
#include <ostream>
namespace simdjson {
namespace dom {
class document;
class element;
class key_value_pair;
/**
* JSON object.
*/
class object : protected internal::tape_ref {
public:
/** Create a new, invalid object */
really_inline object() noexcept;
class iterator : protected internal::tape_ref {
public:
/**
* Get the actual key/value pair
*/
inline const key_value_pair operator*() const noexcept;
/**
* Get the next key/value pair.
*
* Part of the std::iterator interface.
*
*/
inline iterator& operator++() noexcept;
/**
* Check if these key value pairs come from the same place in the JSON.
*
* Part of the std::iterator interface.
*/
inline bool operator!=(const iterator& other) const noexcept;
/**
* Get the key of this key/value pair.
*/
inline std::string_view key() const noexcept;
/**
* Get the length (in bytes) of the key in this key/value pair.
* You should expect this function to be faster than key().size().
*/
inline uint32_t key_length() const noexcept;
/**
* Returns true if the key in this key/value pair is equal
* to the provided string_view.
*/
inline bool key_equals(const std::string_view & o) const noexcept;
/**
* Returns true if the key in this key/value pair is equal
* to the provided string_view in a case-insensitive manner.
* Case comparisons may only be handled correctly for ASCII strings.
*/
inline bool key_equals_case_insensitive(const std::string_view & o) const noexcept;
/**
* Get the key of this key/value pair.
*/
inline const char *key_c_str() const noexcept;
/**
* Get the value of this key/value pair.
*/
inline element value() const noexcept;
private:
really_inline iterator(const document *doc, size_t json_index) noexcept;
friend class object;
};
/**
* Return the first key/value pair.
*
* Part of the std::iterable interface.
*/
inline iterator begin() const noexcept;
/**
* One past the last key/value pair.
*
* Part of the std::iterable interface.
*/
inline iterator end() const noexcept;
/**
* Get the size of the object (number of keys).
* It is a saturated value with a maximum of 0xFFFFFF: if the value
* is 0xFFFFFF then the size is 0xFFFFFF or greater.
*/
inline size_t size() const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const char *key) const noexcept;
/**
* Get the value associated with the given JSON pointer.
*
* dom::parser parser;
* object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})");
* obj.at("foo/a/1") == 20
* obj.at("foo")["a"].at(1) == 20
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
* - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key(const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key in a case-insensitive manner.
* It is only guaranteed to work over ASCII inputs.
*
* Note: The key will be matched against **unescaped** JSON.
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept;
private:
really_inline object(const document *doc, size_t json_index) noexcept;
friend class element;
friend struct simdjson_result<element>;
template<typename T>
friend class simdjson::minify;
};
/**
* Key/value pair in an object.
*/
class key_value_pair {
public:
std::string_view key;
element value;
private:
really_inline key_value_pair(const std::string_view &_key, element _value) noexcept;
friend class object;
};
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const object &value);
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value);
} // namespace dom
/** The result of a JSON conversion that may fail. */
template<>
struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> {
public:
really_inline simdjson_result() noexcept; ///< @private
really_inline simdjson_result(dom::object value) noexcept; ///< @private
really_inline simdjson_result(error_code error) noexcept; ///< @private
inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept;
inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept;
inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept;
inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept;
#if SIMDJSON_EXCEPTIONS
inline dom::object::iterator begin() const noexcept(false);
inline dom::object::iterator end() const noexcept(false);
inline size_t size() const noexcept(false);
#endif // SIMDJSON_EXCEPTIONS
};
#if SIMDJSON_EXCEPTIONS
/**
* Print JSON to an output stream.
*
* By default, the value will be printed minified.
*
* @param out The output stream.
* @param value The value to print.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false);
#endif // SIMDJSON_EXCEPTIONS
} // namespace simdjson
#endif // SIMDJSON_DOM_OBJECT_H

View File

@ -1,9 +1,9 @@
// TODO Remove this -- deprecated API and files
#ifndef SIMDJSON_PARSEDJSON_H
#define SIMDJSON_PARSEDJSON_H
#ifndef SIMDJSON_DOM_PARSEDJSON_H
#define SIMDJSON_DOM_PARSEDJSON_H
#include "simdjson/document.h"
#include "simdjson/dom/document.h"
namespace simdjson {
@ -13,4 +13,5 @@ namespace simdjson {
using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser;
} // namespace simdjson
#endif
#endif // SIMDJSON_DOM_PARSEDJSON_H

View File

@ -1,7 +1,7 @@
// TODO Remove this -- deprecated API and files
#ifndef SIMDJSON_PARSEDJSON_ITERATOR_H
#define SIMDJSON_PARSEDJSON_ITERATOR_H
#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
#include <cstring>
#include <string>
@ -10,8 +10,8 @@
#include <limits>
#include <stdexcept>
#include "simdjson/document.h"
#include "simdjson/parsedjson.h"
#include "simdjson/dom/document.h"
#include "simdjson/dom/parsedjson.h"
#include "simdjson/internal/jsonformatutils.h"
namespace simdjson {
@ -266,4 +266,4 @@ public:
} // namespace simdjson
#endif
#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H

View File

@ -0,0 +1,454 @@
#ifndef SIMDJSON_DOM_PARSER_H
#define SIMDJSON_DOM_PARSER_H
#include "simdjson/common_defs.h"
#include "simdjson/dom/document.h"
#include "simdjson/error.h"
#include "simdjson/internal/tape_ref.h"
#include "simdjson/minify.h"
#include "simdjson/padded_string.h"
#include "simdjson/portability.h"
#include <memory>
#include <ostream>
#include <string>
namespace simdjson {
namespace internal {
// expectation: sizeof(scope_descriptor) = 64/8.
struct scope_descriptor {
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
uint32_t count; // how many elements in the scope
}; // struct scope_descriptor
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address;
#else
typedef char ret_address;
#endif
} // namespace internal
namespace dom {
class document_stream;
class element;
/** The default batch size for parser.parse_many() and parser.load_many() */
static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
/**
* A persistent document parser.
*
* The parser is designed to be reused, holding the internal buffers necessary to do parsing,
* as well as memory for a single document. The parsed document is overwritten on each parse.
*
* This class cannot be copied, only moved, to avoid unintended allocations.
*
* @note This is not thread safe: one parser cannot produce two documents at the same time!
*/
class parser {
public:
/**
* Create a JSON parser.
*
* The new parser will have zero capacity.
*
* @param max_capacity The maximum document length the parser can automatically handle. The parser
* will allocate more capacity on an as needed basis (when it sees documents too big to handle)
* up to this amount. The parser still starts with zero capacity no matter what this number is:
* to allocate an initial capacity, call allocate() after constructing the parser.
* Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
*/
really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
/**
* Take another parser's buffers and state.
*
* @param other The parser to take. Its capacity is zeroed.
*/
parser(parser &&other) = default;
parser(const parser &) = delete; ///< @private Disallow copying
/**
* Take another parser's buffers and state.
*
* @param other The parser to take. Its capacity is zeroed.
*/
parser &operator=(parser &&other) = default;
parser &operator=(const parser &) = delete; ///< @private Disallow copying
/** Deallocate the JSON parser. */
~parser()=default;
/**
* Load a JSON document from a file and return a reference to it.
*
* dom::parser parser;
* const element doc = parser.load("jsonexamples/twitter.json");
*
* ### IMPORTANT: Document Lifetime
*
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
* documents because it reuses the same buffers, but you *must* use the document before you
* destroy the parser or call parse() again.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than the file length, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param path The path to load.
* @return The document, or an error:
* - IO_ERROR if there was an error opening or reading the file.
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
* - other json errors if parsing fails.
*/
inline simdjson_result<element> load(const std::string &path) & noexcept;
inline simdjson_result<element> load(const std::string &path) && = delete ;
/**
* Parse a JSON document and return a temporary reference to it.
*
* dom::parser parser;
* element doc = parser.parse(buf, len);
*
* ### IMPORTANT: Document Lifetime
*
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
* documents because it reuses the same buffers, but you *must* use the document before you
* destroy the parser or call parse() again.
*
* ### REQUIRED: Buffer Padding
*
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
* those bytes are initialized to, as long as they are allocated.
*
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
* and it is copied into an enlarged temporary buffer before parsing.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than len, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
* realloc_if_needed is true.
* @param len The length of the JSON.
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
* @return The document, or an error:
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
* and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
* - other json errors if parsing fails.
*/
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const std::string &s) & noexcept;
really_inline simdjson_result<element> parse(const std::string &s) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const padded_string &s) & noexcept;
really_inline simdjson_result<element> parse(const padded_string &s) && =delete;
/** @private We do not want to allow implicit conversion from C string to std::string. */
really_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
/**
* Load a file containing many JSON documents.
*
* dom::parser parser;
* for (const element doc : parser.load_many(path)) {
* cout << std::string(doc["title"]) << endl;
* }
*
* ### Format
*
* The file must contain a series of one or more JSON documents, concatenated into a single
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
* then starts parsing the next document at that point. (It does this with more parallelism and
* lookahead than you might think, though.)
*
* documents that consist of an object or array may omit the whitespace between them, concatenating
* with no separator. documents that consist of a single primitive (i.e. documents that are not
* arrays or objects) MUST be separated with whitespace.
*
* ### Error Handling
*
* All errors are returned during iteration: if there is a global error such as memory allocation,
* it will be yielded as the first result. Iteration always stops after the first error.
*
* As with all other simdjson methods, non-exception error handling is readily available through
* the same interface, requiring you to check the error before using the document:
*
* dom::parser parser;
* for (auto [doc, error] : parser.load_many(path)) {
* if (error) { cerr << error << endl; exit(1); }
* cout << std::string(doc["title"]) << endl;
* }
*
* ### Threads
*
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
* hood to do some lookahead.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
* spot is cache-related: small enough to fit in cache, yet big enough to
* parse as many documents as possible in one tight loop.
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
* @return The stream. If there is an error, it will be returned during iteration. An empty input
* will yield 0 documents rather than an EMPTY error. Errors:
* - IO_ERROR if there was an error opening or reading the file.
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
* - other json errors if parsing fails.
*/
inline document_stream load_many(const std::string &path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/**
* Parse a buffer containing many JSON documents.
*
* dom::parser parser;
* for (const element doc : parser.parse_many(buf, len)) {
* cout << std::string(doc["title"]) << endl;
* }
*
* ### Format
*
* The buffer must contain a series of one or more JSON documents, concatenated into a single
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
* then starts parsing the next document at that point. (It does this with more parallelism and
* lookahead than you might think, though.)
*
* documents that consist of an object or array may omit the whitespace between them, concatenating
* with no separator. documents that consist of a single primitive (i.e. documents that are not
* arrays or objects) MUST be separated with whitespace.
*
* ### Error Handling
*
* All errors are returned during iteration: if there is a global error such as memory allocation,
* it will be yielded as the first result. Iteration always stops after the first error.
*
* As with all other simdjson methods, non-exception error handling is readily available through
* the same interface, requiring you to check the error before using the document:
*
* dom::parser parser;
* for (auto [doc, error] : parser.parse_many(buf, len)) {
* if (error) { cerr << error << endl; exit(1); }
* cout << std::string(doc["title"]) << endl;
* }
*
* ### REQUIRED: Buffer Padding
*
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
* those bytes are initialized to, as long as they are allocated.
*
* ### Threads
*
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
* hood to do some lookahead.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
* @param len The length of the concatenated JSON.
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
* spot is cache-related: small enough to fit in cache, yet big enough to
* parse as many documents as possible in one tight loop.
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
* @return The stream. If there is an error, it will be returned during iteration. An empty input
* will yield 0 documents rather than an EMPTY error. Errors:
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
* - other json errors if parsing fails.
*/
inline document_stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
inline document_stream parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
inline document_stream parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
inline document_stream parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @private We do not want to allow implicit conversion from C string to std::string. */
really_inline simdjson_result<element> parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;
/**
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
* and `max_depth` depth.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return The error, if there is one.
*/
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
/**
* @private deprecated because it returns bool instead of error_code, which is our standard for
* failures. Use allocate() instead.
*
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
* and `max_depth` depth.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return true if successful, false if allocation failed.
*/
[[deprecated("Use allocate() instead.")]]
WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
/**
* The largest document this parser can support without reallocating.
*
* @return Current capacity, in bytes.
*/
really_inline size_t capacity() const noexcept;
/**
* The largest document this parser can automatically support.
*
* The parser may reallocate internal buffers as needed up to this amount.
*
* @return Maximum capacity, in bytes.
*/
really_inline size_t max_capacity() const noexcept;
/**
* The maximum level of nested object and arrays supported by this parser.
*
* @return Maximum depth, in bytes.
*/
really_inline size_t max_depth() const noexcept;
/**
* Set max_capacity. This is the largest document this parser can automatically support.
*
* The parser may reallocate internal buffers as needed up to this amount.
*
* This call will not allocate or deallocate, even if capacity is currently above max_capacity.
*
* @param max_capacity The new maximum capacity, in bytes.
*/
really_inline void set_max_capacity(size_t max_capacity) noexcept;
/** @private Use the new DOM API instead */
class Iterator;
/** @private Use simdjson_error instead */
using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
/** @private Next location to write to in the tape */
uint32_t current_loc{0};
/** @private Number of structural indices passed from stage 1 to stage 2 */
uint32_t n_structural_indexes{0};
/** @private Structural indices passed from stage 1 to stage 2 */
std::unique_ptr<uint32_t[]> structural_indexes{};
/** @private Tape location of each open { or [ */
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
/** @private Return address of each open { or [ */
std::unique_ptr<internal::ret_address[]> ret_address{};
/** @private Use `if (parser.parse(...).error())` instead */
bool valid{false};
/** @private Use `parser.parse(...).error()` instead */
error_code error{UNINITIALIZED};
/** @private Use `parser.parse(...).value()` instead */
document doc{};
/** @private returns true if the document parsed was valid */
[[deprecated("Use the result of parser.parse() instead")]]
inline bool is_valid() const noexcept;
/**
* @private return an error code corresponding to the last parsing attempt, see
* simdjson.h will return UNITIALIZED if no parsing was attempted
*/
[[deprecated("Use the result of parser.parse() instead")]]
inline int get_error_code() const noexcept;
/** @private return the string equivalent of "get_error_code" */
[[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]]
inline std::string get_error_message() const noexcept;
/** @private */
[[deprecated("Use cout << on the result of parser.parse() instead")]]
inline bool print_json(std::ostream &os) const noexcept;
/** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
inline bool dump_raw_tape(std::ostream &os) const noexcept;
private:
/**
* The maximum document length this parser will automatically support.
*
* The parser will not be automatically allocated above this amount.
*/
size_t _max_capacity;
/**
* The maximum document length this parser supports.
*
* Buffers are large enough to handle any document up to this length.
*/
size_t _capacity{0};
/**
* The maximum depth (number of nested objects and arrays) supported by this parser.
*
* Defaults to DEFAULT_MAX_DEPTH.
*/
size_t _max_depth{0};
/**
* The loaded buffer (reused each time load() is called)
*/
std::unique_ptr<char[], decltype(&aligned_free_char)> loaded_bytes;
/** Capacity of loaded_bytes buffer. */
size_t _loaded_bytes_capacity{0};
// all nodes are stored on the doc.tape using a 64-bit word.
//
// strings, double and ints are stored as
// a 64-bit word with a pointer to the actual value
//
//
//
// for objects or arrays, store [ or { at the beginning and } and ] at the
// end. For the openings ([ or {), we annotate them with a reference to the
// location on the doc.tape of the end, and for then closings (} and ]), we
// annotate them with a reference to the location of the opening
//
//
/**
* Ensure we have enough capacity to handle at least desired_capacity bytes,
* and auto-allocate if not.
*/
inline error_code ensure_capacity(size_t desired_capacity) noexcept;
/** Read the file into loaded_bytes */
inline simdjson_result<size_t> read_file(const std::string &path) noexcept;
friend class parser::Iterator;
friend class document_stream;
}; // class parser
} // namespace dom
} // namespace simdjson
#endif // SIMDJSON_DOM_PARSER_H

View File

@ -3,7 +3,6 @@
#include "simdjson/common_defs.h"
#include <string>
#include <utility>
namespace simdjson {

View File

@ -1,14 +1,18 @@
#ifndef SIMDJSON_IMPLEMENTATION_H
#define SIMDJSON_IMPLEMENTATION_H
#include "simdjson/common_defs.h"
#include <optional>
#include <string>
#include <atomic>
#include <vector>
#include "simdjson/document.h"
namespace simdjson {
namespace dom {
class parser;
}
/**
* An implementation of simdjson for a particular CPU architecture.
*

View File

@ -0,0 +1,153 @@
#ifndef SIMDJSON_INLINE_ARRAY_H
#define SIMDJSON_INLINE_ARRAY_H
// Inline implementations go in here.
#include "simdjson/dom/array.h"
#include "simdjson/dom/element.h"
#include <utility>
namespace simdjson {
//
// simdjson_result<dom::array> inline implementation
//
really_inline simdjson_result<dom::array>::simdjson_result() noexcept
: internal::simdjson_result_base<dom::array>() {}
really_inline simdjson_result<dom::array>::simdjson_result(dom::array value) noexcept
: internal::simdjson_result_base<dom::array>(std::forward<dom::array>(value)) {}
really_inline simdjson_result<dom::array>::simdjson_result(error_code error) noexcept
: internal::simdjson_result_base<dom::array>(error) {}
#if SIMDJSON_EXCEPTIONS
inline dom::array::iterator simdjson_result<dom::array>::begin() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.begin();
}
inline dom::array::iterator simdjson_result<dom::array>::end() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.end();
}
inline size_t simdjson_result<dom::array>::size() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.size();
}
#endif // SIMDJSON_EXCEPTIONS
inline simdjson_result<dom::element> simdjson_result<dom::array>::at(const std::string_view &json_pointer) const noexcept {
if (error()) { return error(); }
return first.at(json_pointer);
}
inline simdjson_result<dom::element> simdjson_result<dom::array>::at(size_t index) const noexcept {
if (error()) { return error(); }
return first.at(index);
}
namespace dom {
//
// array inline implementation
//
really_inline array::array() noexcept : internal::tape_ref() {}
really_inline array::array(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) {}
inline array::iterator array::begin() const noexcept {
return iterator(doc, json_index + 1);
}
inline array::iterator array::end() const noexcept {
return iterator(doc, after_element() - 1);
}
inline size_t array::size() const noexcept {
return scope_count();
}
inline simdjson_result<element> array::at(const std::string_view &json_pointer) const noexcept {
// - means "the append position" or "the element after the end of the array"
// We don't support this, because we're returning a real element, not a position.
if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; }
// Read the array index
size_t array_index = 0;
size_t i;
for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) {
uint8_t digit = uint8_t(json_pointer[i] - '0');
// Check for non-digit in array index. If it's there, we're trying to get a field in an object
if (digit > 9) { return INCORRECT_TYPE; }
array_index = array_index*10 + digit;
}
// 0 followed by other digits is invalid
if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0"
// Empty string is invalid; so is a "/" with no digits before it
if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index"
// Get the child
auto child = array(doc, json_index).at(array_index);
// If there is a /, we're not done yet, call recursively.
if (i < json_pointer.length()) {
child = child.at(json_pointer.substr(i+1));
}
return child;
}
inline simdjson_result<element> array::at(size_t index) const noexcept {
size_t i=0;
for (auto element : *this) {
if (i == index) { return element; }
i++;
}
return INDEX_OUT_OF_BOUNDS;
}
//
// array::iterator inline implementation
//
really_inline array::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
inline element array::iterator::operator*() const noexcept {
return element(doc, json_index);
}
inline bool array::iterator::operator!=(const array::iterator& other) const noexcept {
return json_index != other.json_index;
}
inline array::iterator& array::iterator::operator++() noexcept {
json_index = after_element();
return *this;
}
inline std::ostream& operator<<(std::ostream& out, const array &value) {
return out << minify<array>(value);
}
} // namespace dom
template<>
inline std::ostream& minify<dom::array>::print(std::ostream& out) {
out << '[';
auto iter = value.begin();
auto end = value.end();
if (iter != end) {
out << minify<dom::element>(*iter);
for (++iter; iter != end; ++iter) {
out << "," << minify<dom::element>(*iter);
}
}
return out << ']';
}
#if SIMDJSON_EXCEPTIONS
template<>
inline std::ostream& minify<simdjson_result<dom::array>>::print(std::ostream& out) {
if (value.error()) { throw simdjson_error(value.error()); }
return out << minify<dom::array>(value.first);
}
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false) {
return out << minify<simdjson_result<dom::array>>(value);
}
#endif
} // namespace simdjson
#endif // SIMDJSON_INLINE_ARRAY_H

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,10 @@
#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H
#define SIMDJSON_INLINE_DOCUMENT_STREAM_H
#include "simdjson/document_stream.h"
#include "simdjson/dom/document_stream.h"
#include <algorithm>
#include <limits>
#include <stdexcept>
#include <thread>
namespace simdjson {
namespace internal {

View File

@ -0,0 +1,441 @@
#ifndef SIMDJSON_INLINE_ELEMENT_H
#define SIMDJSON_INLINE_ELEMENT_H
#include "simdjson/dom/array.h"
#include "simdjson/dom/element.h"
#include "simdjson/dom/object.h"
#include <cstring>
#include <utility>
namespace simdjson {
//
// simdjson_result<dom::element> inline implementation
//
really_inline simdjson_result<dom::element>::simdjson_result() noexcept
: internal::simdjson_result_base<dom::element>() {}
really_inline simdjson_result<dom::element>::simdjson_result(dom::element &&value) noexcept
: internal::simdjson_result_base<dom::element>(std::forward<dom::element>(value)) {}
really_inline simdjson_result<dom::element>::simdjson_result(error_code error) noexcept
: internal::simdjson_result_base<dom::element>(error) {}
inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type() const noexcept {
if (error()) { return error(); }
return first.type();
}
inline simdjson_result<bool> simdjson_result<dom::element>::is_null() const noexcept {
if (error()) { return error(); }
return first.is_null();
}
template<typename T>
inline simdjson_result<bool> simdjson_result<dom::element>::is() const noexcept {
if (error()) { return error(); }
return first.is<T>();
}
template<typename T>
inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept {
if (error()) { return error(); }
return first.get<T>();
}
inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const std::string_view &key) const noexcept {
if (error()) { return error(); }
return first[key];
}
inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const char *key) const noexcept {
if (error()) { return error(); }
return first[key];
}
inline simdjson_result<dom::element> simdjson_result<dom::element>::at(const std::string_view &json_pointer) const noexcept {
if (error()) { return error(); }
return first.at(json_pointer);
}
inline simdjson_result<dom::element> simdjson_result<dom::element>::at(size_t index) const noexcept {
if (error()) { return error(); }
return first.at(index);
}
inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key(const std::string_view &key) const noexcept {
if (error()) { return error(); }
return first.at_key(key);
}
inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key_case_insensitive(const std::string_view &key) const noexcept {
if (error()) { return error(); }
return first.at_key_case_insensitive(key);
}
#if SIMDJSON_EXCEPTIONS
inline simdjson_result<dom::element>::operator bool() const noexcept(false) {
return get<bool>();
}
inline simdjson_result<dom::element>::operator const char *() const noexcept(false) {
return get<const char *>();
}
inline simdjson_result<dom::element>::operator std::string_view() const noexcept(false) {
return get<std::string_view>();
}
inline simdjson_result<dom::element>::operator uint64_t() const noexcept(false) {
return get<uint64_t>();
}
inline simdjson_result<dom::element>::operator int64_t() const noexcept(false) {
return get<int64_t>();
}
inline simdjson_result<dom::element>::operator double() const noexcept(false) {
return get<double>();
}
inline simdjson_result<dom::element>::operator dom::array() const noexcept(false) {
return get<dom::array>();
}
inline simdjson_result<dom::element>::operator dom::object() const noexcept(false) {
return get<dom::object>();
}
inline dom::array::iterator simdjson_result<dom::element>::begin() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.begin();
}
inline dom::array::iterator simdjson_result<dom::element>::end() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.end();
}
#endif
namespace dom {
//
// element inline implementation
//
really_inline element::element() noexcept : internal::tape_ref() {}
really_inline element::element(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
inline element_type element::type() const noexcept {
auto tape_type = tape_ref_type();
return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast<element_type>(tape_type);
}
really_inline bool element::is_null() const noexcept {
return is_null_on_tape();
}
template<>
inline simdjson_result<bool> element::get<bool>() const noexcept {
if(is_true()) {
return true;
} else if(is_false()) {
return false;
}
return INCORRECT_TYPE;
}
template<>
inline simdjson_result<const char *> element::get<const char *>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::STRING: {
return get_c_str();
}
default:
return INCORRECT_TYPE;
}
}
template<>
inline simdjson_result<std::string_view> element::get<std::string_view>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::STRING:
return get_string_view();
default:
return INCORRECT_TYPE;
}
}
template<>
inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept {
if(unlikely(!is_uint64())) { // branch rarely taken
if(is_int64()) {
int64_t result = next_tape_value<int64_t>();
if (result < 0) {
return NUMBER_OUT_OF_RANGE;
}
return uint64_t(result);
}
return INCORRECT_TYPE;
}
return next_tape_value<int64_t>();
}
template<>
inline simdjson_result<int64_t> element::get<int64_t>() const noexcept {
if(unlikely(!is_int64())) { // branch rarely taken
if(is_uint64()) {
uint64_t result = next_tape_value<uint64_t>();
// Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std
if (result > uint64_t((std::numeric_limits<int64_t>::max)())) {
return NUMBER_OUT_OF_RANGE;
}
return static_cast<int64_t>(result);
}
return INCORRECT_TYPE;
}
return next_tape_value<int64_t>();
}
template<>
inline simdjson_result<double> element::get<double>() const noexcept {
// Performance considerations:
// 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight
// comparison.
// 2. Using a switch-case relies on the compiler guessing what kind of code generation
// we want... But the compiler cannot know that we expect the type to be "double"
// most of the time.
// We can expect get<double> to refer to a double type almost all the time.
// It is important to craft the code accordingly so that the compiler can use this
// information. (This could also be solved with profile-guided optimization.)
if(unlikely(!is_double())) { // branch rarely taken
if(is_uint64()) {
return double(next_tape_value<uint64_t>());
} else if(is_int64()) {
return double(next_tape_value<int64_t>());
}
return INCORRECT_TYPE;
}
// this is common:
return next_tape_value<double>();
}
template<>
inline simdjson_result<array> element::get<array>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::START_ARRAY:
return array(doc, json_index);
default:
return INCORRECT_TYPE;
}
}
template<>
inline simdjson_result<object> element::get<object>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::START_OBJECT:
return object(doc, json_index);
default:
return INCORRECT_TYPE;
}
}
template<typename T>
really_inline bool element::is() const noexcept {
auto result = get<T>();
return !result.error();
}
#if SIMDJSON_EXCEPTIONS
inline element::operator bool() const noexcept(false) { return get<bool>(); }
inline element::operator const char*() const noexcept(false) { return get<const char *>(); }
inline element::operator std::string_view() const noexcept(false) { return get<std::string_view>(); }
inline element::operator uint64_t() const noexcept(false) { return get<uint64_t>(); }
inline element::operator int64_t() const noexcept(false) { return get<int64_t>(); }
inline element::operator double() const noexcept(false) { return get<double>(); }
inline element::operator array() const noexcept(false) { return get<array>(); }
inline element::operator object() const noexcept(false) { return get<object>(); }
inline array::iterator element::begin() const noexcept(false) {
return get<array>().begin();
}
inline array::iterator element::end() const noexcept(false) {
return get<array>().end();
}
#endif // SIMDJSON_EXCEPTIONS
inline simdjson_result<element> element::operator[](const std::string_view &key) const noexcept {
return at_key(key);
}
inline simdjson_result<element> element::operator[](const char *key) const noexcept {
return at_key(key);
}
inline simdjson_result<element> element::at(const std::string_view &json_pointer) const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::START_OBJECT:
return object(doc, json_index).at(json_pointer);
case internal::tape_type::START_ARRAY:
return array(doc, json_index).at(json_pointer);
default:
return INCORRECT_TYPE;
}
}
inline simdjson_result<element> element::at(size_t index) const noexcept {
return get<array>().at(index);
}
inline simdjson_result<element> element::at_key(const std::string_view &key) const noexcept {
return get<object>().at_key(key);
}
inline simdjson_result<element> element::at_key_case_insensitive(const std::string_view &key) const noexcept {
return get<object>().at_key_case_insensitive(key);
}
inline bool element::dump_raw_tape(std::ostream &out) const noexcept {
return doc->dump_raw_tape(out);
}
inline std::ostream& operator<<(std::ostream& out, const element &value) {
return out << minify<element>(value);
}
inline std::ostream& operator<<(std::ostream& out, element_type type) {
switch (type) {
case element_type::ARRAY:
return out << "array";
case element_type::OBJECT:
return out << "object";
case element_type::INT64:
return out << "int64_t";
case element_type::UINT64:
return out << "uint64_t";
case element_type::DOUBLE:
return out << "double";
case element_type::STRING:
return out << "string";
case element_type::BOOL:
return out << "bool";
case element_type::NULL_VALUE:
return out << "null";
default:
abort();
}
}
} // namespace dom
template<>
inline std::ostream& minify<dom::element>::print(std::ostream& out) {
using tape_type=internal::tape_type;
size_t depth = 0;
constexpr size_t MAX_DEPTH = 16;
bool is_object[MAX_DEPTH];
is_object[0] = false;
bool after_value = false;
internal::tape_ref iter(value);
do {
// print commas after each value
if (after_value) {
out << ",";
}
// If we are in an object, print the next key and :, and skip to the next value.
if (is_object[depth]) {
out << '"' << internal::escape_json_string(iter.get_string_view()) << "\":";
iter.json_index++;
}
switch (iter.tape_ref_type()) {
// Arrays
case tape_type::START_ARRAY: {
// If we're too deep, we need to recurse to go deeper.
depth++;
if (unlikely(depth >= MAX_DEPTH)) {
out << minify<dom::array>(dom::array(iter.doc, iter.json_index));
iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
depth--;
break;
}
// Output start [
out << '[';
iter.json_index++;
// Handle empty [] (we don't want to come back around and print commas)
if (iter.tape_ref_type() == tape_type::END_ARRAY) {
out << ']';
depth--;
break;
}
is_object[depth] = false;
after_value = false;
continue;
}
// Objects
case tape_type::START_OBJECT: {
// If we're too deep, we need to recurse to go deeper.
depth++;
if (unlikely(depth >= MAX_DEPTH)) {
out << minify<dom::object>(dom::object(iter.doc, iter.json_index));
iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
depth--;
break;
}
// Output start {
out << '{';
iter.json_index++;
// Handle empty {} (we don't want to come back around and print commas)
if (iter.tape_ref_type() == tape_type::END_OBJECT) {
out << '}';
depth--;
break;
}
is_object[depth] = true;
after_value = false;
continue;
}
// Scalars
case tape_type::STRING:
out << '"' << internal::escape_json_string(iter.get_string_view()) << '"';
break;
case tape_type::INT64:
out << iter.next_tape_value<int64_t>();
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
break;
case tape_type::UINT64:
out << iter.next_tape_value<uint64_t>();
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
break;
case tape_type::DOUBLE:
out << iter.next_tape_value<double>();
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
break;
case tape_type::TRUE_VALUE:
out << "true";
break;
case tape_type::FALSE_VALUE:
out << "false";
break;
case tape_type::NULL_VALUE:
out << "null";
break;
// These are impossible
case tape_type::END_ARRAY:
case tape_type::END_OBJECT:
case tape_type::ROOT:
abort();
}
iter.json_index++;
after_value = true;
// Handle multiple ends in a row
while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || iter.tape_ref_type() == tape_type::END_OBJECT)) {
out << char(iter.tape_ref_type());
depth--;
iter.json_index++;
}
// Stop when we're at depth 0
} while (depth != 0);
return out;
}
#if SIMDJSON_EXCEPTIONS
template<>
inline std::ostream& minify<simdjson_result<dom::element>>::print(std::ostream& out) {
if (value.error()) { throw simdjson_error(value.error()); }
return out << minify<dom::element>(value.first);
}
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) {
return out << minify<simdjson_result<dom::element>>(value);
}
#endif
} // namespace simdjson
#endif // SIMDJSON_INLINE_ELEMENT_H

View File

@ -3,6 +3,7 @@
#include <cstring>
#include <string>
#include <utility>
#include "simdjson/error.h"
namespace simdjson {

View File

@ -0,0 +1,255 @@
#ifndef SIMDJSON_INLINE_OBJECT_H
#define SIMDJSON_INLINE_OBJECT_H
#include "simdjson/dom/element.h"
#include "simdjson/dom/object.h"
#include "simdjson/portability.h"
#include <cstring>
#include <string>
namespace simdjson {
//
// simdjson_result<dom::object> inline implementation
//
really_inline simdjson_result<dom::object>::simdjson_result() noexcept
: internal::simdjson_result_base<dom::object>() {}
really_inline simdjson_result<dom::object>::simdjson_result(dom::object value) noexcept
: internal::simdjson_result_base<dom::object>(std::forward<dom::object>(value)) {}
really_inline simdjson_result<dom::object>::simdjson_result(error_code error) noexcept
: internal::simdjson_result_base<dom::object>(error) {}
inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const std::string_view &key) const noexcept {
if (error()) { return error(); }
return first[key];
}
inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const char *key) const noexcept {
if (error()) { return error(); }
return first[key];
}
inline simdjson_result<dom::element> simdjson_result<dom::object>::at(const std::string_view &json_pointer) const noexcept {
if (error()) { return error(); }
return first.at(json_pointer);
}
inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key(const std::string_view &key) const noexcept {
if (error()) { return error(); }
return first.at_key(key);
}
inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key_case_insensitive(const std::string_view &key) const noexcept {
if (error()) { return error(); }
return first.at_key_case_insensitive(key);
}
#if SIMDJSON_EXCEPTIONS
inline dom::object::iterator simdjson_result<dom::object>::begin() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.begin();
}
inline dom::object::iterator simdjson_result<dom::object>::end() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.end();
}
inline size_t simdjson_result<dom::object>::size() const noexcept(false) {
if (error()) { throw simdjson_error(error()); }
return first.size();
}
#endif // SIMDJSON_EXCEPTIONS
namespace dom {
//
// object inline implementation
//
really_inline object::object() noexcept : internal::tape_ref() {}
really_inline object::object(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
inline object::iterator object::begin() const noexcept {
return iterator(doc, json_index + 1);
}
inline object::iterator object::end() const noexcept {
return iterator(doc, after_element() - 1);
}
inline size_t object::size() const noexcept {
return scope_count();
}
inline simdjson_result<element> object::operator[](const std::string_view &key) const noexcept {
return at_key(key);
}
inline simdjson_result<element> object::operator[](const char *key) const noexcept {
return at_key(key);
}
inline simdjson_result<element> object::at(const std::string_view &json_pointer) const noexcept {
size_t slash = json_pointer.find('/');
std::string_view key = json_pointer.substr(0, slash);
// Grab the child with the given key
simdjson_result<element> child;
// If there is an escape character in the key, unescape it and then get the child.
size_t escape = key.find('~');
if (escape != std::string_view::npos) {
// Unescape the key
std::string unescaped(key);
do {
switch (unescaped[escape+1]) {
case '0':
unescaped.replace(escape, 2, "~");
break;
case '1':
unescaped.replace(escape, 2, "/");
break;
default:
return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer");
}
escape = unescaped.find('~', escape+1);
} while (escape != std::string::npos);
child = at_key(unescaped);
} else {
child = at_key(key);
}
// If there is a /, we have to recurse and look up more of the path
if (slash != std::string_view::npos) {
child = child.at(json_pointer.substr(slash+1));
}
return child;
}
inline simdjson_result<element> object::at_key(const std::string_view &key) const noexcept {
iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) {
if (field.key_equals(key)) {
return field.value();
}
}
return NO_SUCH_FIELD;
}
// In case you wonder why we need this, please see
// https://github.com/simdjson/simdjson/issues/323
// People do seek keys in a case-insensitive manner.
inline simdjson_result<element> object::at_key_case_insensitive(const std::string_view &key) const noexcept {
iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) {
if (field.key_equals_case_insensitive(key)) {
return field.value();
}
}
return NO_SUCH_FIELD;
}
//
// object::iterator inline implementation
//
really_inline object::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
inline const key_value_pair object::iterator::operator*() const noexcept {
return key_value_pair(key(), value());
}
inline bool object::iterator::operator!=(const object::iterator& other) const noexcept {
return json_index != other.json_index;
}
inline object::iterator& object::iterator::operator++() noexcept {
json_index++;
json_index = after_element();
return *this;
}
inline std::string_view object::iterator::key() const noexcept {
return get_string_view();
}
inline uint32_t object::iterator::key_length() const noexcept {
return get_string_length();
}
inline const char* object::iterator::key_c_str() const noexcept {
return reinterpret_cast<const char *>(&doc->string_buf[size_t(tape_value()) + sizeof(uint32_t)]);
}
inline element object::iterator::value() const noexcept {
return element(doc, json_index + 1);
}
/**
* Design notes:
* Instead of constructing a string_view and then comparing it with a
* user-provided strings, it is probably more performant to have dedicated
* functions taking as a parameter the string we want to compare against
* and return true when they are equal. That avoids the creation of a temporary
* std::string_view. Though it is possible for the compiler to avoid entirely
* any overhead due to string_view, relying too much on compiler magic is
* problematic: compiler magic sometimes fail, and then what do you do?
* Also, enticing users to rely on high-performance function is probably better
* on the long run.
*/
inline bool object::iterator::key_equals(const std::string_view & o) const noexcept {
// We use the fact that the key length can be computed quickly
// without access to the string buffer.
const uint32_t len = key_length();
if(o.size() == len) {
// We avoid construction of a temporary string_view instance.
return (memcmp(o.data(), key_c_str(), len) == 0);
}
return false;
}
inline bool object::iterator::key_equals_case_insensitive(const std::string_view & o) const noexcept {
// We use the fact that the key length can be computed quickly
// without access to the string buffer.
const uint32_t len = key_length();
if(o.size() == len) {
// See For case-insensitive string comparisons, avoid char-by-char functions
// https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
// Note that it might be worth rolling our own strncasecmp function, with vectorization.
return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0);
}
return false;
}
//
// key_value_pair inline implementation
//
inline key_value_pair::key_value_pair(const std::string_view &_key, element _value) noexcept :
key(_key), value(_value) {}
inline std::ostream& operator<<(std::ostream& out, const object &value) {
return out << minify<object>(value);
}
inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value) {
return out << minify<key_value_pair>(value);
}
} // namespace dom
template<>
inline std::ostream& minify<dom::object>::print(std::ostream& out) {
out << '{';
auto pair = value.begin();
auto end = value.end();
if (pair != end) {
out << minify<dom::key_value_pair>(*pair);
for (++pair; pair != end; ++pair) {
out << "," << minify<dom::key_value_pair>(*pair);
}
}
return out << '}';
}
template<>
inline std::ostream& minify<dom::key_value_pair>::print(std::ostream& out) {
return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value;
}
#if SIMDJSON_EXCEPTIONS
template<>
inline std::ostream& minify<simdjson_result<dom::object>>::print(std::ostream& out) {
if (value.error()) { throw simdjson_error(value.error()); }
return out << minify<dom::object>(value.first);
}
inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false) {
return out << minify<simdjson_result<dom::object>>(value);
}
#endif // SIMDJSON_EXCEPTIONS
} // namespace simdjson
#endif // SIMDJSON_INLINE_OBJECT_H

View File

@ -1,7 +1,9 @@
#ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
#define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
#include "simdjson/parsedjson_iterator.h"
#include "simdjson/dom/parsedjson_iterator.h"
#include "simdjson/portability.h"
#include <cstring>
namespace simdjson {

View File

@ -0,0 +1,244 @@
#ifndef SIMDJSON_INLINE_PARSER_H
#define SIMDJSON_INLINE_PARSER_H
#include "simdjson/dom/document_stream.h"
#include "simdjson/dom/parser.h"
#include "simdjson/implementation.h"
#include "simdjson/internal/jsonformatutils.h"
#include "simdjson/portability.h"
#include <cstdio>
#include <climits>
namespace simdjson {
namespace dom {
//
// parser inline implementation
//
really_inline parser::parser(size_t max_capacity) noexcept
: _max_capacity{max_capacity},
loaded_bytes(nullptr, &aligned_free_char)
{}
inline bool parser::is_valid() const noexcept { return valid; }
inline int parser::get_error_code() const noexcept { return error; }
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
inline bool parser::print_json(std::ostream &os) const noexcept {
if (!valid) { return false; }
os << doc.root();
return true;
}
inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
return valid ? doc.dump_raw_tape(os) : false;
}
inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept {
// Open the file
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
std::FILE *fp = std::fopen(path.c_str(), "rb");
SIMDJSON_POP_DISABLE_WARNINGS
if (fp == nullptr) {
return IO_ERROR;
}
// Get the file size
if(std::fseek(fp, 0, SEEK_END) < 0) {
std::fclose(fp);
return IO_ERROR;
}
long len = std::ftell(fp);
if((len < 0) || (len == LONG_MAX)) {
std::fclose(fp);
return IO_ERROR;
}
// Make sure we have enough capacity to load the file
if (_loaded_bytes_capacity < size_t(len)) {
loaded_bytes.reset( internal::allocate_padded_buffer(len) );
if (!loaded_bytes) {
std::fclose(fp);
return MEMALLOC;
}
_loaded_bytes_capacity = len;
}
// Read the string
std::rewind(fp);
size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp);
if (std::fclose(fp) != 0 || bytes_read != size_t(len)) {
return IO_ERROR;
}
return bytes_read;
}
inline simdjson_result<element> parser::load(const std::string &path) & noexcept {
size_t len;
error_code code;
read_file(path).tie(len, code);
if (code) { return code; }
return parse(loaded_bytes.get(), len, false);
}
inline document_stream parser::load_many(const std::string &path, size_t batch_size) noexcept {
size_t len;
error_code code;
read_file(path).tie(len, code);
return document_stream(*this, (const uint8_t*)loaded_bytes.get(), len, batch_size, code);
}
inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
error_code code = ensure_capacity(len);
if (code) { return code; }
if (realloc_if_needed) {
const uint8_t *tmp_buf = buf;
buf = (uint8_t *)internal::allocate_padded_buffer(len);
if (buf == nullptr)
return MEMALLOC;
memcpy((void *)buf, tmp_buf, len);
}
code = simdjson::active_implementation->parse(buf, len, *this);
if (realloc_if_needed) {
aligned_free((void *)buf); // must free before we exit
}
if (code) { return code; }
// We're indicating validity via the simdjson_result<element>, so set the parse state back to invalid
valid = false;
error = UNINITIALIZED;
return doc.root();
}
really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
return parse((const uint8_t *)buf, len, realloc_if_needed);
}
really_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
}
really_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
return parse(s.data(), s.length(), false);
}
inline document_stream parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
return document_stream(*this, buf, len, batch_size);
}
inline document_stream parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
return parse_many((const uint8_t *)buf, len, batch_size);
}
inline document_stream parser::parse_many(const std::string &s, size_t batch_size) noexcept {
return parse_many(s.data(), s.length(), batch_size);
}
inline document_stream parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
return parse_many(s.data(), s.length(), batch_size);
}
really_inline size_t parser::capacity() const noexcept {
return _capacity;
}
really_inline size_t parser::max_capacity() const noexcept {
return _max_capacity;
}
really_inline size_t parser::max_depth() const noexcept {
return _max_depth;
}
WARN_UNUSED
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
//
// If capacity has changed, reallocate capacity-based buffers
//
if (_capacity != capacity) {
// Set capacity to 0 until we finish, in case there's an error
_capacity = 0;
//
// Reallocate the document
//
error_code err = doc.allocate(capacity);
if (err) { return err; }
//
// Don't allocate 0 bytes, just return.
//
if (capacity == 0) {
structural_indexes.reset();
return SUCCESS;
}
//
// Initialize stage 1 output
//
size_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc
if (!structural_indexes) {
return MEMALLOC;
}
_capacity = capacity;
//
// If capacity hasn't changed, but the document was taken, allocate a new document.
//
} else if (!doc.tape) {
error_code err = doc.allocate(capacity);
if (err) { return err; }
}
//
// If max_depth has changed, reallocate those buffers
//
if (max_depth != _max_depth) {
_max_depth = 0;
if (max_depth == 0) {
ret_address.reset();
containing_scope.reset();
return SUCCESS;
}
//
// Initialize stage 2 state
//
containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]); // TODO realloc
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
if (!ret_address || !containing_scope) {
// Could not allocate memory
return MEMALLOC;
}
_max_depth = max_depth;
}
return SUCCESS;
}
WARN_UNUSED
inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
return !allocate(capacity, max_depth);
}
really_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
_max_capacity = max_capacity;
}
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
// If we don't have enough capacity, (try to) automatically bump it.
// If the document was taken, reallocate that too.
// Both in one if statement to minimize unlikely branching.
if (unlikely(desired_capacity > capacity() || !doc.tape)) {
if (desired_capacity > max_capacity()) {
return error = CAPACITY;
}
return allocate(desired_capacity, _max_depth > 0 ? _max_depth : DEFAULT_MAX_DEPTH);
}
return SUCCESS;
}
} // namespace dom
} // namespace simdjson
#endif // SIMDJSON_INLINE_PARSER_H

View File

@ -0,0 +1,105 @@
#ifndef SIMDJSON_INLINE_TAPE_REF_H
#define SIMDJSON_INLINE_TAPE_REF_H
#include "simdjson/internal/tape_ref.h"
#include <cstring>
namespace simdjson {
namespace internal {
//
// tape_ref inline implementation
//
really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {}
really_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {}
// Some value types have a specific on-tape word value. It can be faster
// to check the type by doing a word-to-word comparison instead of extracting the
// most significant 8 bits.
really_inline bool tape_ref::is_double() const noexcept {
constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56;
return doc->tape[json_index] == tape_double;
}
really_inline bool tape_ref::is_int64() const noexcept {
constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56;
return doc->tape[json_index] == tape_int64;
}
really_inline bool tape_ref::is_uint64() const noexcept {
constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56;
return doc->tape[json_index] == tape_uint64;
}
really_inline bool tape_ref::is_false() const noexcept {
constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56;
return doc->tape[json_index] == tape_false;
}
really_inline bool tape_ref::is_true() const noexcept {
constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56;
return doc->tape[json_index] == tape_true;
}
really_inline bool tape_ref::is_null_on_tape() const noexcept {
constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56;
return doc->tape[json_index] == tape_null;
}
inline size_t tape_ref::after_element() const noexcept {
switch (tape_ref_type()) {
case tape_type::START_ARRAY:
case tape_type::START_OBJECT:
return matching_brace_index();
case tape_type::UINT64:
case tape_type::INT64:
case tape_type::DOUBLE:
return json_index + 2;
default:
return json_index + 1;
}
}
really_inline tape_type tape_ref::tape_ref_type() const noexcept {
return static_cast<tape_type>(doc->tape[json_index] >> 56);
}
really_inline uint64_t internal::tape_ref::tape_value() const noexcept {
return doc->tape[json_index] & internal::JSON_VALUE_MASK;
}
really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept {
return uint32_t(doc->tape[json_index]);
}
really_inline uint32_t internal::tape_ref::scope_count() const noexcept {
return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK);
}
template<typename T>
really_inline T tape_ref::next_tape_value() const noexcept {
static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit");
// Though the following is tempting...
// return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
// It is not generally safe. It is safer, and often faster to rely
// on memcpy. Yes, it is uglier, but it is also encapsulated.
T x;
memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
return x;
}
really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
uint64_t string_buf_index = size_t(tape_value());
uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return len;
}
really_inline const char * internal::tape_ref::get_c_str() const noexcept {
uint64_t string_buf_index = size_t(tape_value());
return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
}
inline std::string_view internal::tape_ref::get_string_view() const noexcept {
return std::string_view(
get_c_str(),
get_string_length()
);
}
} // namespace internal
} // namespace simdjson
#endif // SIMDJSON_INLINE_TAPE_REF_H

View File

@ -0,0 +1,51 @@
#ifndef SIMDJSON_INTERNAL_TAPE_REF_H
#define SIMDJSON_INTERNAL_TAPE_REF_H
#include "simdjson/internal/tape_type.h"
namespace simdjson {
namespace dom {
class document;
}
namespace internal {
constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF;
constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF;
/**
* A reference to an element on the tape. Internal only.
*/
class tape_ref {
public:
really_inline tape_ref() noexcept;
really_inline tape_ref(const dom::document *doc, size_t json_index) noexcept;
inline size_t after_element() const noexcept;
really_inline tape_type tape_ref_type() const noexcept;
really_inline uint64_t tape_value() const noexcept;
really_inline bool is_double() const noexcept;
really_inline bool is_int64() const noexcept;
really_inline bool is_uint64() const noexcept;
really_inline bool is_false() const noexcept;
really_inline bool is_true() const noexcept;
really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null.
really_inline uint32_t matching_brace_index() const noexcept;
really_inline uint32_t scope_count() const noexcept;
template<typename T>
really_inline T next_tape_value() const noexcept;
really_inline uint32_t get_string_length() const noexcept;
really_inline const char * get_c_str() const noexcept;
inline std::string_view get_string_view() const noexcept;
/** The document this element references. */
const dom::document *doc;
/** The index of this element on `doc.tape[]` */
size_t json_index;
};
} // namespace internal
} // namespace simdjson
#endif // SIMDJSON_INTERNAL_TAPE_REF_H

View File

@ -0,0 +1,28 @@
#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H
#define SIMDJSON_INTERNAL_TAPE_TYPE_H
namespace simdjson {
namespace internal {
/**
* The possible types in the tape.
*/
enum class tape_type {
ROOT = 'r',
START_ARRAY = '[',
START_OBJECT = '{',
END_ARRAY = ']',
END_OBJECT = '}',
STRING = '"',
INT64 = 'l',
UINT64 = 'u',
DOUBLE = 'd',
TRUE_VALUE = 't',
FALSE_VALUE = 'f',
NULL_VALUE = 'n'
}; // enum class tape_type
} // namespace internal
} // namespace simdjson
#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H

View File

@ -1,13 +1,6 @@
#ifndef SIMDJSON_JSONIOUTIL_H
#define SIMDJSON_JSONIOUTIL_H
#include <exception>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include "simdjson/common_defs.h"
#include "simdjson/padded_string.h"

55
include/simdjson/minify.h Normal file
View File

@ -0,0 +1,55 @@
#ifndef SIMDJSON_MINIFY_H
#define SIMDJSON_MINIFY_H
#include "simdjson/common_defs.h"
#include "simdjson/padded_string.h"
#include <string>
#include <ostream>
#include <sstream>
namespace simdjson {
/**
* Minifies a JSON element or document, printing the smallest possible valid JSON.
*
* dom::parser parser;
* element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded);
* cout << minify(doc) << endl; // prints [1,2,3]
*
*/
template<typename T>
class minify {
public:
/**
* Create a new minifier.
*
* @param _value The document or element to minify.
*/
inline minify(const T &_value) noexcept : value{_value} {}
/**
* Minify JSON to a string.
*/
inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
/**
* Minify JSON to an output stream.
*/
inline std::ostream& print(std::ostream& out);
private:
const T &value;
};
/**
* Minify JSON to an output stream.
*
* @param out The output stream.
* @param formatter The minifier.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
template<typename T>
inline std::ostream& operator<<(std::ostream& out, minify<T> formatter) { return formatter.print(out); }
} // namespace simdjson
#endif // SIMDJSON_MINIFY_H

View File

@ -1,5 +1,6 @@
#ifndef SIMDJSON_PADDED_STRING_H
#define SIMDJSON_PADDED_STRING_H
#include "simdjson/portability.h"
#include "simdjson/common_defs.h" // for SIMDJSON_PADDING
#include "simdjson/error.h"
@ -120,6 +121,19 @@ private:
*/
inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); }
#if SIMDJSON_EXCEPTIONS
/**
* Send padded_string instance to an output stream.
*
* @param out The output stream.
* @param s The padded_string instance.
* @throw simdjson_error if the result being printed has an error. If there is an error with the
* underlying output stream, that error will be propagated (simdjson_error will not be
* thrown).
*/
inline std::ostream& operator<<(std::ostream& out, simdjson_result<padded_string> &s) noexcept(false) { return out << s.value(); }
#endif
} // namespace simdjson
// This is deliberately outside of simdjson so that people get it without having to use the namespace

View File

@ -1,4 +1,4 @@
/* auto-generated on Tue May 19 13:32:53 PDT 2020. Do not edit! */
/* auto-generated on Tue May 19 14:39:19 PDT 2020. Do not edit! */
#include <iostream>
#include "simdjson.h"

View File

@ -1,4 +1,4 @@
/* auto-generated on Tue May 19 13:32:53 PDT 2020. Do not edit! */
/* auto-generated on Tue May 19 14:39:19 PDT 2020. Do not edit! */
/* begin file src/simdjson.cpp */
#include "simdjson.h"
@ -532,19 +532,19 @@ const std::initializer_list<const implementation *> available_implementation_poi
// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
class unsupported_implementation final : public implementation {
public:
WARN_UNUSED error_code parse(const uint8_t *, size_t, parser &) const noexcept final {
WARN_UNUSED error_code parse(const uint8_t *, size_t, dom::parser &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage1(const uint8_t *, size_t, parser &, bool) const noexcept final {
WARN_UNUSED error_code stage1(const uint8_t *, size_t, dom::parser &, bool) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage2(const uint8_t *, size_t, parser &) const noexcept final {
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage2(const uint8_t *, size_t, parser &, size_t &) const noexcept final {
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &, size_t &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}

File diff suppressed because it is too large Load Diff

View File

@ -81,19 +81,19 @@ const std::initializer_list<const implementation *> available_implementation_poi
// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
class unsupported_implementation final : public implementation {
public:
WARN_UNUSED error_code parse(const uint8_t *, size_t, parser &) const noexcept final {
WARN_UNUSED error_code parse(const uint8_t *, size_t, dom::parser &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage1(const uint8_t *, size_t, parser &, bool) const noexcept final {
WARN_UNUSED error_code stage1(const uint8_t *, size_t, dom::parser &, bool) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage2(const uint8_t *, size_t, parser &) const noexcept final {
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code stage2(const uint8_t *, size_t, parser &, size_t &) const noexcept final {
WARN_UNUSED error_code stage2(const uint8_t *, size_t, dom::parser &, size_t &) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}