/* auto-generated on Fri Mar 20 11:47:31 PDT 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H /* begin file include/simdjson/compiler_check.h */ #ifndef SIMDJSON_COMPILER_CHECK_H #define SIMDJSON_COMPILER_CHECK_H #ifndef __cplusplus #error simdjson requires a C++ compiler #endif #ifndef SIMDJSON_CPLUSPLUS #if defined(_MSVC_LANG) && !defined(__clang__) #define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) #else #define SIMDJSON_CPLUSPLUS __cplusplus #endif #endif #if (SIMDJSON_CPLUSPLUS < 201703L) #error simdjson requires a compiler compliant with the C++17 standard #endif #endif // SIMDJSON_COMPILER_CHECK_H /* end file include/simdjson/compiler_check.h */ // Public API /* begin file include/simdjson/simdjson_version.h */ // /include/simdjson/simdjson_version.h automatically generated by release.py, // do not change by hand #ifndef SIMDJSON_SIMDJSON_VERSION_H #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ #define SIMDJSON_VERSION 0.2.1 namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ SIMDJSON_VERSION_MAJOR = 0, /** * The minor version (major.MINOR.revision) of simdjson being used. */ SIMDJSON_VERSION_MINOR = 2, /** * The revision (major.minor.REVISION) of simdjson being used. */ SIMDJSON_VERSION_REVISION = 1 }; } // namespace simdjson #endif // SIMDJSON_SIMDJSON_VERSION_H /* end file include/simdjson/simdjson_version.h */ /* begin file include/simdjson/error.h */ #ifndef SIMDJSON_ERROR_H #define SIMDJSON_ERROR_H /* begin file include/simdjson/common_defs.h */ #ifndef SIMDJSON_COMMON_DEFS_H #define SIMDJSON_COMMON_DEFS_H #include /* begin file include/simdjson/portability.h */ #ifndef SIMDJSON_PORTABILITY_H #define SIMDJSON_PORTABILITY_H #include #include #include #ifdef _MSC_VER #include #endif #if defined(__x86_64__) || defined(_M_AMD64) #define IS_X86_64 1 #endif #if defined(__aarch64__) || defined(_M_ARM64) #define IS_ARM64 1 #endif // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ #undef STRINGIFY #define STRINGIFY_IMPLEMENTATION_(a) #a #define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a) #ifndef SIMDJSON_IMPLEMENTATION_FALLBACK #define SIMDJSON_IMPLEMENTATION_FALLBACK 1 #endif #if IS_ARM64 #ifndef SIMDJSON_IMPLEMENTATION_ARM64 #define SIMDJSON_IMPLEMENTATION_ARM64 1 #endif #define SIMDJSON_IMPLEMENTATION_HASWELL 0 #define SIMDJSON_IMPLEMENTATION_WESTMERE 0 #endif // IS_ARM64 #if IS_X86_64 #ifndef SIMDJSON_IMPLEMENTATION_HASWELL #define SIMDJSON_IMPLEMENTATION_HASWELL 1 #endif #ifndef SIMDJSON_IMPLEMENTATION_WESTMERE #define SIMDJSON_IMPLEMENTATION_WESTMERE 1 #endif #define SIMDJSON_IMPLEMENTATION_ARM64 0 #endif // IS_X86_64 // we are going to use runtime dispatch #ifdef IS_X86_64 #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang up // til 8.0 so TARGET_REGION and UNTARGET_REGION must be *outside* of a // namespace. #define TARGET_REGION(T) \ _Pragma(STRINGIFY( \ clang attribute push(__attribute__((target(T))), apply_to = function))) #define UNTARGET_REGION _Pragma("clang attribute pop") #elif defined(__GNUC__) // GCC is easier #define TARGET_REGION(T) \ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T))) #define UNTARGET_REGION _Pragma("GCC pop_options") #endif // clang then gcc #endif // x86 // Default target region macros don't do anything. #ifndef TARGET_REGION #define TARGET_REGION(T) #define UNTARGET_REGION #endif // under GCC and CLANG, we use these two macros #define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul,lzcnt") #define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul") #define TARGET_ARM64 // Threading is disabled #undef SIMDJSON_THREADS_ENABLED // Is threading enabled? #if defined(BOOST_HAS_THREADS) || defined(_REENTRANT) || defined(_MT) #define SIMDJSON_THREADS_ENABLED #endif #if defined(__clang__) #define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) #elif defined(__GNUC__) #define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) #else #define NO_SANITIZE_UNDEFINED #endif #ifdef _MSC_VER #include // visual studio #endif #ifdef _MSC_VER #define simdjson_strcasecmp _stricmp #else #define simdjson_strcasecmp strcasecmp #endif namespace simdjson { // portable version of posix_memalign static inline void *aligned_malloc(size_t alignment, size_t size) { void *p; #ifdef _MSC_VER p = _aligned_malloc(size, alignment); #elif defined(__MINGW32__) || defined(__MINGW64__) p = __mingw_aligned_malloc(size, alignment); #else // somehow, if this is used before including "x86intrin.h", it creates an // implicit defined warning. if (posix_memalign(&p, alignment, size) != 0) { return nullptr; } #endif return p; } static inline char *aligned_malloc_char(size_t alignment, size_t size) { return (char *)aligned_malloc(alignment, size); } static inline void aligned_free(void *mem_block) { if (mem_block == nullptr) { return; } #ifdef _MSC_VER _aligned_free(mem_block); #elif defined(__MINGW32__) || defined(__MINGW64__) __mingw_aligned_free(mem_block); #else free(mem_block); #endif } static inline void aligned_free_char(char *mem_block) { aligned_free((void *)mem_block); } } // namespace simdjson #endif // SIMDJSON_PORTABILITY_H /* end file include/simdjson/portability.h */ namespace simdjson { #ifndef SIMDJSON_EXCEPTIONS #if __cpp_exceptions #define SIMDJSON_EXCEPTIONS 1 #else #define SIMDJSON_EXCEPTIONS 0 #endif #endif /** The maximum document size supported by simdjson. */ constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; /** * The amount of padding needed in a buffer to parse JSON. * * the input buf should be readable up to buf + SIMDJSON_PADDING * this is a stopgap; there should be a better description of the * main loop and its behavior that abstracts over this * See https://github.com/lemire/simdjson/issues/174 */ constexpr size_t SIMDJSON_PADDING = 32; /** * By default, simdjson supports this many nested objects and arrays. * * This is the default for document::parser::max_depth(). */ constexpr size_t DEFAULT_MAX_DEPTH = 1024; } // namespace simdjson #if defined(__GNUC__) // Marks a block with a name so that MCA analysis can see it. #define BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); #define END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); #define DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); #else #define BEGIN_DEBUG_BLOCK(name) #define END_DEBUG_BLOCK(name) #define DEBUG_BLOCK(name, block) #endif #if !defined(_MSC_VER) && !defined(SIMDJSON_NO_COMPUTED_GOTO) // Implemented using Labels as Values which works in GCC and CLANG (and maybe // also in Intel's compiler), but won't work in MSVC. #define SIMDJSON_USE_COMPUTED_GOTO #endif // Align to N-byte boundary #define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) #define ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) #define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #ifdef _MSC_VER #define really_inline __forceinline #define never_inline __declspec(noinline) #define UNUSED #define WARN_UNUSED #ifndef likely #define likely(x) x #endif #ifndef unlikely #define unlikely(x) x #endif #else #define really_inline inline __attribute__((always_inline, unused)) #define never_inline inline __attribute__((noinline, unused)) #define UNUSED __attribute__((unused)) #define WARN_UNUSED __attribute__((warn_unused_result)) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) #endif #ifndef unlikely #define unlikely(x) __builtin_expect(!!(x), 0) #endif #endif // MSC_VER #endif // SIMDJSON_COMMON_DEFS_H /* end file include/simdjson/portability.h */ #include #include namespace simdjson { /** * All possible errors returned by simdjson. */ enum error_code { SUCCESS = 0, ///< No error SUCCESS_AND_HAS_MORE, ///< No error and buffer still has more data CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' NUMBER_ERROR, ///< Problem while parsing a number UTF8_ERROR, ///< the input is not valid UTF-8 UNINITIALIZED, ///< unknown error, or uninitialized document EMPTY, ///< no structural element found UNESCAPED_CHARS, ///< found unescaped characters in a string. UNCLOSED_STRING, ///< missing quote at the end UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture INCORRECT_TYPE, ///< JSON element has a different type than user expected NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits NO_SUCH_FIELD, ///< JSON field not found in object IO_ERROR, ///< Error reading a file UNEXPECTED_ERROR, ///< indicative of a bug in simdjson /** @private Number of error codes */ NUM_ERROR_CODES }; /** * Get the error message for the given error code. * * auto [doc, error] = document::parse("foo"); * if (error) { printf("Error: %s\n", error_message(error)); } * * @return The error message. */ inline const char *error_message(error_code error) noexcept; /** * Write the error message to the output stream */ inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; /** * Exception thrown when an exception-supporting simdjson method is called */ struct simdjson_error : public std::exception { /** * Create an exception from a simdjson error code. * @param error The error code */ simdjson_error(error_code error) noexcept : _error{error} { } /** The error message */ const char *what() const noexcept { return error_message(error()); } /** The error code */ error_code error() const noexcept { return _error; } private: /** The error code that was used */ error_code _error; }; /** * The result of a simd operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. */ template struct simdjson_result : public std::pair { /** * The error. */ error_code error() const { return this->second; } #if SIMDJSON_EXCEPTIONS /** * The value of the function. * * @throw simdjson_error if there was an error. */ T get() noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; }; /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ operator T() noexcept(false) { return get(); } #endif // SIMDJSON_EXCEPTIONS /** * Create a new error result. */ simdjson_result(error_code _error) noexcept : std::pair({}, _error) {} /** * Create a new successful result. */ simdjson_result(T _value) noexcept : std::pair(_value, SUCCESS) {} /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_result(T value, error_code error) noexcept : std::pair(value, error) {} }; /** * The result of a simd operation that could fail. * * This class is for values that must be *moved*, like padded_string and document. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. */ template struct simdjson_move_result : std::pair { /** * Move the value and the error to the provided variables. */ void tie(T& t, error_code & e) { // on the clang compiler that comes with current macOS (Apple clang version 11.0.0), // std::tie(this->json, error) = padded_string::load(filename); // fails with "benchmark/benchmarker.h:266:33: error: no viable overloaded '='"" t = std::move(this->first); e = std::move(this->second); } /** * The error. */ error_code error() const { return this->second; } #if SIMDJSON_EXCEPTIONS /** * The value of the function. * * @throw simdjson_error if there was an error. */ T move() noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::move(this->first); }; /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ operator T() noexcept(false) { return move(); } #endif /** * Create a new error result. */ simdjson_move_result(error_code error) noexcept : std::pair(T(), error) {} /** * Create a new successful result. */ simdjson_move_result(T value) noexcept : std::pair(std::move(value), SUCCESS) {} /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_move_result(T value, error_code error) noexcept : std::pair(std::move(value), error) {} }; /** * @deprecated This is an alias and will be removed, use error_code instead */ using ErrorValues = error_code; /** * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. */ inline const std::string &error_message(int error) noexcept; } // namespace simdjson #endif // SIMDJSON_ERROR_H /* end file include/simdjson/portability.h */ /* begin file include/simdjson/padded_string.h */ #ifndef SIMDJSON_PADDED_STRING_H #define SIMDJSON_PADDED_STRING_H #include #include #include namespace simdjson { /** * String with extra allocation for ease of use with document::parser::parse() * * This is a move-only class, it cannot be copied. */ struct padded_string final { /** * Create a new, empty padded string. */ explicit inline padded_string() noexcept; /** * Create a new padded string buffer. * * @param length the size of the string. */ explicit inline padded_string(size_t length) noexcept; /** * Create a new padded string by copying the given input. * * @param data the buffer to copy * @param length the number of bytes to copy */ explicit inline padded_string(const char *data, size_t length) noexcept; /** * Create a new padded string by copying the given input. * * @param str_ the string to copy */ inline padded_string(const std::string & str_ ) noexcept; /** * Create a new padded string by copying the given input. * * @param str_ the string to copy */ inline padded_string(std::string_view sv_) noexcept; /** * Move one padded string into another. * * The original padded string will be reduced to zero capacity. * * @param o the string to move. */ inline padded_string(padded_string &&o) noexcept; /** * Move one padded string into another. * * The original padded string will be reduced to zero capacity. * * @param o the string to move. */ inline padded_string &operator=(padded_string &&o) noexcept; inline void swap(padded_string &o) noexcept; ~padded_string() noexcept; /** * The length of the string. * * Does not include padding. */ size_t size() const noexcept; /** * The length of the string. * * Does not include padding. */ size_t length() const noexcept; /** * The string data. **/ const char *data() const noexcept; /** * The string data. **/ char *data() noexcept; /** * Load this padded string from a file. * * @param path the path to the file. **/ inline static simdjson_move_result load(const std::string &path) noexcept; private: padded_string &operator=(const padded_string &o) = delete; padded_string(const padded_string &o) = delete; size_t viable_size; char *data_ptr{nullptr}; }; // padded_string } // namespace simdjson namespace simdjson::internal { // low-level function to allocate memory with padding so we can read past the // "length" bytes safely. if you must provide a pointer to some data, create it // with this function: length is the max. size in bytes of the string caller is // responsible to free the memory (free(...)) inline char *allocate_padded_buffer(size_t length) noexcept; } // namespace simdjson::internal; #endif // SIMDJSON_PADDED_STRING_H /* end file include/simdjson/padded_string.h */ /* begin file include/simdjson/implementation.h */ #ifndef SIMDJSON_IMPLEMENTATION_H #define SIMDJSON_IMPLEMENTATION_H #include #include #include #include /* begin file include/simdjson/document.h */ #ifndef SIMDJSON_DOCUMENT_H #define SIMDJSON_DOCUMENT_H #include #include #include #include #include /* begin file include/simdjson/simdjson.h */ /** * @file * @deprecated We'll be removing this file so it isn't confused with the top level simdjson.h */ #ifndef SIMDJSON_SIMDJSON_H #define SIMDJSON_SIMDJSON_H #endif // SIMDJSON_H /* end file include/simdjson/simdjson.h */ namespace simdjson::internal { constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; enum class tape_type; class tape_ref; } // namespace simdjson::internal namespace simdjson { template class document_iterator; /** * A parsed JSON document. * * This class cannot be copied, only moved, to avoid unintended allocations. */ class document { public: /** * Create a document container with zero capacity. * * The parser will allocate capacity as needed. */ document() noexcept=default; ~document() noexcept=default; /** * Take another document's buffers. * * @param other The document to take. Its capacity is zeroed and it is invalidated. */ document(document &&other) noexcept = default; document(const document &) = delete; // Disallow copying /** * Take another document's buffers. * * @param other The document to take. Its capacity is zeroed. */ document &operator=(document &&other) noexcept = default; document &operator=(const document &) = delete; // Disallow copying /** The default batch size for parse_many and load_many */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; // Nested classes class element; class array; class object; class key_value_pair; class parser; class stream; class doc_move_result; class doc_result; class element_result; class array_result; class object_result; class stream_result; // Nested classes. See definitions later in file. using iterator = document_iterator; /** * Get the root element of this document as a JSON array. */ element root() const noexcept; /** * Get the root element of this document as a JSON array. */ array_result as_array() const noexcept; /** * Get the root element of this document as a JSON object. */ object_result as_object() const noexcept; /** * Get the root element of this document. */ operator element() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Read the root element of this document as a JSON array. * * @return The JSON array. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an array */ operator array() const noexcept(false); /** * Read this element as a JSON object (key/value pairs). * * @return The JSON object. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an object */ operator object() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with the given key, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ element_result operator[](const std::string_view &s) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ element_result operator[](const char *s) const noexcept; /** * Dump the raw tape for debugging. * * @param os the stream to output to. * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). */ bool dump_raw_tape(std::ostream &os) const noexcept; /** * Load a JSON document from a file and return it. * * document doc = document::load("jsonexamples/twitter.json"); * * ### Parser Capacity * * If the parser's current capacity is less than the file length, it will allocate enough capacity * to handle it (up to max_capacity). * * @param path The path to load. * @return The document, or an error: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ inline static doc_move_result load(const std::string& path) noexcept; /** * Parse a JSON document and return a reference to it. * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. If realloc_if_needed is true, * it is assumed that the buffer does *not* have enough padding, and it is reallocated, enlarged * and copied before parsing. * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return the document, or an error if the JSON is invalid. */ inline static doc_move_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document. * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. If realloc_if_needed is true, * it is assumed that the buffer does *not* have enough padding, and it is reallocated, enlarged * and copied before parsing. * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return the document, or an error if the JSON is invalid. */ really_inline static doc_move_result parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document. * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. If `str.capacity() - str.size() * < SIMDJSON_PADDING`, the string will be copied to a string with larger capacity before parsing. * * @param s The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, or * a new string will be created with the extra padding. * @return the document, or an error if the JSON is invalid. */ really_inline static doc_move_result parse(const std::string &s) noexcept; /** * Parse a JSON document. * * @param s The JSON to parse. * @return the document, or an error if the JSON is invalid. */ really_inline static doc_move_result parse(const padded_string &s) noexcept; // We do not want to allow implicit conversion from C string to std::string. doc_result parse(const char *buf, bool realloc_if_needed = true) noexcept = delete; std::unique_ptr tape; std::unique_ptr string_buf;// should be at least byte_capacity private: inline error_code set_capacity(size_t len) noexcept; template friend class minify; }; // class document template class minify; /** * A parsed, *owned* document, or an error if the parse failed. * * document &doc = document::parse(json); * * Returns an owned `document`. When the doc_move_result (or the document retrieved from it) goes out of * scope, the document's memory is deallocated. * * ## Error Codes vs. Exceptions * * This result type allows the user to pick whether to use exceptions or not. * * Use like this to avoid exceptions: * * auto [doc, error] = document::parse(json); * if (error) { exit(1); } * * Use like this if you'd prefer to use exceptions: * * document doc = document::parse(json); * */ class document::doc_move_result : public simdjson_move_result { public: /** * Read this document as a JSON objec. * * @return The object value, or: * - UNEXPECTED_TYPE if the JSON document is not an object */ inline object_result as_object() const noexcept; /** * Read this document as a JSON array. * * @return The array value, or: * - UNEXPECTED_TYPE if the JSON document is not an array */ inline array_result as_array() const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const std::string_view &key) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const char *key) const noexcept; ~doc_move_result() noexcept=default; doc_move_result(document &&doc, error_code error) noexcept; doc_move_result(document &&doc) noexcept; doc_move_result(error_code error) noexcept; friend class document; }; // class document::doc_move_result /** * A parsed document reference, or an error if the parse failed. * * document &doc = document::parse(json); * * ## Document Ownership * * The `document &` refers to an internal document the parser reuses on each `parse()` call. It will * become invalidated on the next `parse()`. * * This is more efficient for common cases where documents are parsed and used one at a time. If you * need to keep the document around longer, you may *take* it from the parser by casting it: * * document doc = parser.parse(); // take ownership * * If you do this, the parser will automatically allocate a new document on the next `parse()` call. * * ## Error Codes vs. Exceptions * * This result type allows the user to pick whether to use exceptions or not. * * Use like this to avoid exceptions: * * auto [doc, error] = parser.parse(json); * if (error) { exit(1); } * * Use like this if you'd prefer to use exceptions: * * document &doc = document::parse(json); * */ class document::doc_result : public simdjson_result { public: /** * Read this document as a JSON objec. * * @return The object value, or: * - UNEXPECTED_TYPE if the JSON document is not an object */ inline object_result as_object() const noexcept; /** * Read this document as a JSON array. * * @return The array value, or: * - UNEXPECTED_TYPE if the JSON document is not an array */ inline array_result as_array() const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const std::string_view &key) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const char *key) const noexcept; ~doc_result()=default; doc_result(document &doc, error_code error) noexcept; friend class document::parser; friend class document::stream; }; // class document::doc_result namespace internal { /** * The possible types in the tape. Internal only. */ enum class tape_type { ROOT = 'r', START_ARRAY = '[', START_OBJECT = '{', END_ARRAY = ']', END_OBJECT = '}', STRING = '"', INT64 = 'l', UINT64 = 'u', DOUBLE = 'd', TRUE_VALUE = 't', FALSE_VALUE = 'f', NULL_VALUE = 'n' }; /** * A reference to an element on the tape. Internal only. */ class tape_ref { protected: really_inline tape_ref() noexcept; really_inline tape_ref(const document *_doc, size_t _json_index) noexcept; inline size_t after_element() const noexcept; really_inline tape_type type() const noexcept; really_inline uint64_t tape_value() const noexcept; template really_inline T next_tape_value() const noexcept; inline std::string_view get_string_view() const noexcept; /** The document this element references. */ const document *doc; /** The index of this element on `doc.tape[]` */ size_t json_index; friend class simdjson::document::key_value_pair; template friend class simdjson::minify; }; } // namespace simdjson::internal /** * A JSON element. * * References an element in a JSON document, representing a JSON null, boolean, string, number, * array or object. */ class document::element : protected internal::tape_ref { public: /** Create a new, invalid element. */ really_inline element() noexcept; /** Whether this element is a json `null`. */ really_inline bool is_null() const noexcept; /** Whether this is a JSON `true` or `false` */ really_inline bool is_bool() const noexcept; /** Whether this is a JSON number (e.g. 1, 1.0 or 1e2) */ really_inline bool is_number() const noexcept; /** Whether this is a JSON integer (e.g. 1 or -1, but *not* 1.0 or 1e2) */ really_inline bool is_integer() const noexcept; /** Whether this is a JSON string (e.g. "abc") */ really_inline bool is_string() const noexcept; /** Whether this is a JSON array (e.g. []) */ really_inline bool is_array() const noexcept; /** Whether this is a JSON array (e.g. []) */ really_inline bool is_object() const noexcept; /** * Read this element as a boolean (json `true` or `false`). * * @return The boolean value, or: * - UNEXPECTED_TYPE error if the JSON element is not a boolean */ inline simdjson_result as_bool() const noexcept; /** * Read this element as a null-terminated string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return A `string_view` into the string, or: * - UNEXPECTED_TYPE error if the JSON element is not a string */ inline simdjson_result as_c_str() const noexcept; /** * Read this element as a C++ string_view (string with length). * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return A `string_view` into the string, or: * - UNEXPECTED_TYPE error if the JSON element is not a string */ inline simdjson_result as_string() const noexcept; /** * Read this element as an unsigned integer. * * @return The uninteger value, or: * - UNEXPECTED_TYPE if the JSON element is not an integer * - NUMBER_OUT_OF_RANGE if the integer doesn't fit in 64 bits or is negative */ inline simdjson_result as_uint64_t() const noexcept; /** * Read this element as a signed integer. * * @return The integer value, or: * - UNEXPECTED_TYPE if the JSON element is not an integer * - NUMBER_OUT_OF_RANGE if the integer doesn't fit in 64 bits */ inline simdjson_result as_int64_t() const noexcept; /** * Read this element as a floating point value. * * @return The double value, or: * - UNEXPECTED_TYPE if the JSON element is not a number */ inline simdjson_result as_double() const noexcept; /** * Read this element as a JSON array. * * @return The array value, or: * - UNEXPECTED_TYPE if the JSON element is not an array */ inline array_result as_array() const noexcept; /** * Read this element as a JSON object (key/value pairs). * * @return The object value, or: * - UNEXPECTED_TYPE if the JSON element is not an object */ inline object_result as_object() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Read this element as a boolean. * * @return The boolean value * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a boolean. */ inline operator bool() const noexcept(false); /** * Read this element as a null-terminated string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return The string value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a string. */ inline explicit operator const char*() const noexcept(false); /** * Read this element as a null-terminated string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return The string value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a string. */ inline operator std::string_view() const noexcept(false); /** * Read this element as an unsigned integer. * * @return The integer value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an integer * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ inline operator uint64_t() const noexcept(false); /** * Read this element as an signed integer. * * @return The integer value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an integer * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits */ inline operator int64_t() const noexcept(false); /** * Read this element as an double. * * @return The double value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a number * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ inline operator double() const noexcept(false); /** * Read this element as a JSON array. * * @return The JSON array. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an array */ inline operator document::array() const noexcept(false); /** * Read this element as a JSON object (key/value pairs). * * @return The JSON object. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an object */ inline operator document::object() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const std::string_view &s) const noexcept; /** * Get the value associated with the given key. * * Note: The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const char *s) const noexcept; private: really_inline element(const document *_doc, size_t _json_index) noexcept; friend class document; friend class document::element_result; template friend class minify; }; /** * Represents a JSON array. */ class document::array : protected internal::tape_ref { public: /** Create a new, invalid array */ really_inline array() noexcept; class iterator : tape_ref { public: /** * Get the actual value */ inline element operator*() const noexcept; /** * Get the next value. * * Part of the std::iterator interface. */ inline void operator++() noexcept; /** * Check if these values come from the same place in the JSON. * * Part of the std::iterator interface. */ inline bool operator!=(const iterator& other) const noexcept; private: really_inline iterator(const document *_doc, size_t _json_index) noexcept; friend class array; }; /** * Return the first array element. * * Part of the std::iterable interface. */ inline iterator begin() const noexcept; /** * One past the last array element. * * Part of the std::iterable interface. */ inline iterator end() const noexcept; private: really_inline array(const document *_doc, size_t _json_index) noexcept; friend class document::element; friend class document::element_result; template friend class minify; }; /** * Represents a JSON object. */ class document::object : protected internal::tape_ref { public: /** Create a new, invalid object */ really_inline object() noexcept; class iterator : protected internal::tape_ref { public: /** * Get the actual key/value pair */ inline const document::key_value_pair operator*() const noexcept; /** * Get the next key/value pair. * * Part of the std::iterator interface. */ inline void operator++() noexcept; /** * Check if these key value pairs come from the same place in the JSON. * * Part of the std::iterator interface. */ inline bool operator!=(const iterator& other) const noexcept; /** * Get the key of this key/value pair. */ inline std::string_view key() const noexcept; /** * Get the key of this key/value pair. */ inline const char *key_c_str() const noexcept; /** * Get the value of this key/value pair. */ inline element value() const noexcept; private: really_inline iterator(const document *_doc, size_t _json_index) noexcept; friend class document::object; }; /** * Return the first key/value pair. * * Part of the std::iterable interface. */ inline iterator begin() const noexcept; /** * One past the last key/value pair. * * Part of the std::iterable interface. */ inline iterator end() const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline element_result operator[](const std::string_view &s) const noexcept; /** * Get the value associated with the given key. * * Note: The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline element_result operator[](const char *s) const noexcept; private: really_inline object(const document *_doc, size_t _json_index) noexcept; friend class document::element; friend class document::element_result; template friend class minify; }; /** * Key/value pair in an object. */ class document::key_value_pair { public: std::string_view key; document::element value; private: really_inline key_value_pair(std::string_view _key, document::element _value) noexcept; friend class document::object; }; /** The result of a JSON navigation that may fail. */ class document::element_result : public simdjson_result { public: really_inline element_result(element value) noexcept; really_inline element_result(error_code error) noexcept; /** Whether this is a JSON `null` */ inline simdjson_result is_null() const noexcept; inline simdjson_result as_bool() const noexcept; inline simdjson_result as_string() const noexcept; inline simdjson_result as_c_str() const noexcept; inline simdjson_result as_uint64_t() const noexcept; inline simdjson_result as_int64_t() const noexcept; inline simdjson_result as_double() const noexcept; inline array_result as_array() const noexcept; inline object_result as_object() const noexcept; inline element_result operator[](const std::string_view &s) const noexcept; inline element_result operator[](const char *s) const noexcept; #if SIMDJSON_EXCEPTIONS inline operator bool() const noexcept(false); inline explicit operator const char*() const noexcept(false); inline operator std::string_view() const noexcept(false); inline operator uint64_t() const noexcept(false); inline operator int64_t() const noexcept(false); inline operator double() const noexcept(false); inline operator array() const noexcept(false); inline operator object() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; /** The result of a JSON conversion that may fail. */ class document::array_result : public simdjson_result { public: really_inline array_result(array value) noexcept; really_inline array_result(error_code error) noexcept; #if SIMDJSON_EXCEPTIONS inline array::iterator begin() const noexcept(false); inline array::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; /** The result of a JSON conversion that may fail. */ class document::object_result : public simdjson_result { public: really_inline object_result(object value) noexcept; really_inline object_result(error_code error) noexcept; inline element_result operator[](const std::string_view &s) const noexcept; inline element_result operator[](const char *s) const noexcept; #if SIMDJSON_EXCEPTIONS inline object::iterator begin() const noexcept(false); inline object::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; /** * A persistent document parser. * * The parser is designed to be reused, holding the internal buffers necessary to do parsing, * as well as memory for a single document. The parsed document is overwritten on each parse. * * This class cannot be copied, only moved, to avoid unintended allocations. * * @note This is not thread safe: one parser cannot produce two documents at the same time! */ class document::parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. * * @param max_capacity The maximum document length the parser can automatically handle. The parser * will allocate more capacity on an as needed basis (when it sees documents too big to handle) * up to this amount. The parser still starts with zero capacity no matter what this number is: * to allocate an initial capacity, call set_capacity() after constructing the parser. Defaults * to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). * @param max_depth The maximum depth--number of nested objects and arrays--this parser can handle. * This will not be allocated until parse() is called for the first time. Defaults to * DEFAULT_MAX_DEPTH. */ really_inline parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; ~parser()=default; /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ parser(document::parser &&other) = default; parser(const document::parser &) = delete; // Disallow copying /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ parser &operator=(document::parser &&other) = default; parser &operator=(const document::parser &) = delete; // Disallow copying /** * Load a JSON document from a file and return a reference to it. * * document::parser parser; * const document &doc = parser.load("jsonexamples/twitter.json"); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### Parser Capacity * * If the parser's current capacity is less than the file length, it will allocate enough capacity * to handle it (up to max_capacity). * * @param path The path to load. * @return The document, or an error: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ inline doc_result load(const std::string& path) noexcept; /** * Load a file containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(path)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The file must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.load_many(path)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. */ inline document::stream load_many(const std::string& path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(buf, len); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding, * and it is copied into an enlarged temporary buffer before parsing. * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return The document, or an error: * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, * and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ inline doc_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(buf, len); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding, * and it is copied into an enlarged temporary buffer before parsing. * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return The document, or an error: * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, * and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ really_inline doc_result parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(s); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * If s.capacity() is less than SIMDJSON_PADDING, the string will be copied into an enlarged * temporary buffer before parsing. * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, or * a new string will be created with the extra padding. * @return The document, or an error: * - MEMALLOC if the string does not have enough padding or the parser does not have * enough capacity, and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ really_inline doc_result parse(const std::string &s) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(s); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The JSON to parse. * @return The document, or an error: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ really_inline doc_result parse(const padded_string &s) noexcept; // We do not want to allow implicit conversion from C string to std::string. really_inline doc_result parse(const char *buf) noexcept = delete; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. */ inline stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails */ inline stream parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails */ inline stream parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The concatenated JSON to parse. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails */ inline stream parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; // We do not want to allow implicit conversion from C string to std::string. really_inline doc_result parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** * The largest document this parser can automatically support. * * The parser may reallocate internal buffers as needed up to this amount. * * @return Maximum capacity, in bytes. */ really_inline size_t max_capacity() const noexcept; /** * The largest document this parser can support without reallocating. * * @return Current capacity, in bytes. */ really_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ really_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. * * The parser may reallocate internal buffers as needed up to this amount. * * This call will not allocate or deallocate, even if capacity is currently above max_capacity. * * @param max_capacity The new maximum capacity, in bytes. */ really_inline void set_max_capacity(size_t max_capacity) noexcept; /** * Set capacity. This is the largest document this parser can support without reallocating. * * This will allocate or deallocate as necessary. * * @param capacity The new capacity, in bytes. * * @return MEMALLOC if unsuccessful, SUCCESS otherwise. */ WARN_UNUSED inline error_code set_capacity(size_t capacity) noexcept; /** * Set the maximum level of nested object and arrays supported by this parser. * * This will allocate or deallocate as necessary. * * @param max_depth The new maximum depth, in bytes. * * @return MEMALLOC if unsuccessful, SUCCESS otherwise. */ WARN_UNUSED inline error_code set_max_depth(size_t max_depth) noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * Equivalent to calling set_capacity() and set_max_depth(). * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return true if successful, false if allocation failed. */ WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; // type aliases for backcompat using Iterator = document::iterator; using InvalidJSON = simdjson_error; // Next location to write to in the tape uint32_t current_loc{0}; // structural indices passed from stage 1 to stage 2 uint32_t n_structural_indexes{0}; std::unique_ptr structural_indexes; // location and return address of each open { or [ std::unique_ptr containing_scope_offset; #ifdef SIMDJSON_USE_COMPUTED_GOTO std::unique_ptr ret_address; #else std::unique_ptr ret_address; #endif // Next place to write a string uint8_t *current_string_buf_loc; bool valid{false}; error_code error{UNINITIALIZED}; // Document we're writing to document doc; // // TODO these are deprecated; use the results of parse instead. // // returns true if the document parsed was valid inline bool is_valid() const noexcept; // return an error code corresponding to the last parsing attempt, see // simdjson.h will return UNITIALIZED if no parsing was attempted inline int get_error_code() const noexcept; // return the string equivalent of "get_error_code" inline std::string get_error_message() const noexcept; // print the json to std::ostream (should be valid) // return false if the tape is likely wrong (e.g., you did not parse a valid // JSON). inline bool print_json(std::ostream &os) const noexcept; inline bool dump_raw_tape(std::ostream &os) const noexcept; // // Parser callbacks: these are internal! // // TODO find a way to do this without exposing the interface or crippling performance // // this should be called when parsing (right before writing the tapes) inline void init_stage2() noexcept; really_inline error_code on_error(error_code new_error_code) noexcept; really_inline error_code on_success(error_code success_code) noexcept; really_inline bool on_start_document(uint32_t depth) noexcept; really_inline bool on_start_object(uint32_t depth) noexcept; really_inline bool on_start_array(uint32_t depth) noexcept; // TODO we're not checking this bool really_inline bool on_end_document(uint32_t depth) noexcept; really_inline bool on_end_object(uint32_t depth) noexcept; really_inline bool on_end_array(uint32_t depth) noexcept; really_inline bool on_true_atom() noexcept; really_inline bool on_false_atom() noexcept; really_inline bool on_null_atom() noexcept; really_inline uint8_t *on_start_string() noexcept; really_inline bool on_end_string(uint8_t *dst) noexcept; really_inline bool on_number_s64(int64_t value) noexcept; really_inline bool on_number_u64(uint64_t value) noexcept; really_inline bool on_number_double(double value) noexcept; private: // // The maximum document length this parser supports. // // Buffers are large enough to handle any document up to this length. // size_t _capacity{0}; // // The maximum document length this parser will automatically support. // // The parser will not be automatically allocated above this amount. // size_t _max_capacity; // // The maximum depth (number of nested objects and arrays) supported by this parser. // // Defaults to DEFAULT_MAX_DEPTH. // size_t _max_depth; // all nodes are stored on the doc.tape using a 64-bit word. // // strings, double and ints are stored as // a 64-bit word with a pointer to the actual value // // // // for objects or arrays, store [ or { at the beginning and } and ] at the // end. For the openings ([ or {), we annotate them with a reference to the // location on the doc.tape of the end, and for then closings (} and ]), we // annotate them with a reference to the location of the opening // // inline void write_tape(uint64_t val, internal::tape_type t) noexcept; inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) noexcept; // Ensure we have enough capacity to handle at least desired_capacity bytes, // and auto-allocate if not. inline error_code ensure_capacity(size_t desired_capacity) noexcept; #if SIMDJSON_EXCEPTIONS // Used internally to get the document inline const document &get_document() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS template friend class document_iterator; friend class document::stream; }; // class parser /** * Minifies a JSON element or document, printing the smallest possible valid JSON. * * document doc = document::parse(" [ 1 , 2 , 3 ] "_pad); * cout << minify(doc) << endl; // prints [1,2,3] * */ template class minify { public: /** * Create a new minifier. * * @param _value The document or element to minify. */ inline minify(const T &_value) noexcept : value{_value} {} /** * Minify JSON to a string. */ inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } /** * Minify JSON to an output stream. */ inline std::ostream& print(std::ostream& out); private: const T &value; }; /** * Minify JSON to an output stream. * * @param out The output stream. * @param formatter The minifier. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ template inline std::ostream& operator<<(std::ostream& out, minify formatter) { return formatter.print(out); } /** * Print JSON to an output stream. * * By default, the document will be printed minified. * * @param out The output stream. * @param value The document to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document &value) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::element &value) { return out << minify(value); }; /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::array &value) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::object &value) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::key_value_pair &value) { return out << minify(value); } #if SIMDJSON_EXCEPTIONS /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::doc_move_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::doc_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::element_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::array_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::object_result &value) noexcept(false) { return out << minify(value); } #endif } // namespace simdjson #endif // SIMDJSON_DOCUMENT_H /* end file include/simdjson/simdjson.h */ namespace simdjson { /** * An implementation of simdjson for a particular CPU architecture. * * Also used to maintain the currently active implementation. The active implementation is * automatically initialized on first use to the most advanced implementation supported by the host. */ class implementation { public: /** * The name of this implementation. * * const implementation *impl = simdjson::active_implementation; * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" */ virtual const std::string &name() const { return _name; } /** * The description of this implementation. * * const implementation *impl = simdjson::active_implementation; * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" */ virtual const std::string &description() const { return _description; } /** * The instruction sets this implementation is compiled against. * * @return a mask of all required `instruction_set` values */ virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; /** * Run a full document parse (ensure_capacity, stage1 and stage2). * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @return the error code, or SUCCESS if there was no error. */ WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0; /** * Stage 1 of the document parser. * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @param streaming whether this is being called by document::parser::parse_many. * @return the error code, or SUCCESS if there was no error. */ WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept = 0; /** * Stage 2 of the document parser. * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @return the error code, or SUCCESS if there was no error. */ WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0; /** * Stage 2 of the document parser for document::parser::parse_many. * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @param next_json the next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time. * @return the error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again. */ WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept = 0; protected: really_inline implementation( const std::string &name, const std::string &description, uint32_t required_instruction_sets ) : _name(name), _description(description), _required_instruction_sets(required_instruction_sets) { } private: /** * The name of this implementation. */ const std::string _name; /** * The description of this implementation. */ const std::string _description; /** * Instruction sets required for this implementation. */ const uint32_t _required_instruction_sets; }; namespace internal { /** * The list of available implementations compiled into simdjson. */ class available_implementation_list { public: /** Get the list of available implementations compiled into simdjson */ really_inline available_implementation_list() {} /** Number of implementations */ size_t size() const noexcept; /** STL const begin() iterator */ const implementation * const *begin() const noexcept; /** STL const end() iterator */ const implementation * const *end() const noexcept; /** * Get the implementation with the given name. * * Case sensitive. * * const implementation *impl = simdjson::available_implementations["westmere"]; * if (!impl) { exit(1); } * simdjson::active_implementation = impl; * * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" * @return the implementation, or nullptr if the parse failed. */ const implementation * operator[](const std::string& name) const noexcept { for (const implementation * impl : *this) { if (impl->name() == name) { return impl; } } return nullptr; } /** * Detect the most advanced implementation supported by the current host. * * This is used to initialize the implementation on startup. * * const implementation *impl = simdjson::available_implementation::detect_best_supported(); * simdjson::active_implementation = impl; * * @return the most advanced supported implementation for the current host, or an * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported * implementation. Will never return nullptr. */ const implementation *detect_best_supported() const noexcept; }; // Detects best supported implementation on first use, and sets it class detect_best_supported_implementation_on_first_use final : public implementation { public: const std::string& name() const noexcept final { return set_best()->name(); } const std::string& description() const noexcept final { return set_best()->description(); } uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final { return set_best()->parse(buf, len, parser); } WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final { return set_best()->stage1(buf, len, parser, streaming); } WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final { return set_best()->stage2(buf, len, parser); } WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final { return set_best()->stage2(buf, len, parser, next_json); } really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} private: const implementation *set_best() const noexcept; }; inline const detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; template class atomic_ptr { public: atomic_ptr(T *_ptr) : ptr{_ptr} {} operator const T*() const { return ptr.load(); } const T& operator*() const { return *ptr; } const T* operator->() const { return ptr.load(); } operator T*() { return ptr.load(); } T& operator*() { return *ptr; } T* operator->() { return ptr.load(); } T* operator=(T *_ptr) { return ptr = _ptr; } private: std::atomic ptr; }; } // namespace [simdjson::]internal /** * The list of available implementations compiled into simdjson. */ inline const internal::available_implementation_list available_implementations; /** * The active implementation. * * Automatically initialized on first use to the most advanced implementation supported by this hardware. */ inline internal::atomic_ptr active_implementation = &internal::detect_best_supported_implementation_on_first_use_singleton; } // namespace simdjson #endif // SIMDJSON_IMPLEMENTATION_H /* end file include/simdjson/simdjson.h */ /* begin file include/simdjson/document_stream.h */ #ifndef SIMDJSON_DOCUMENT_STREAM_H #define SIMDJSON_DOCUMENT_STREAM_H #include namespace simdjson { template class JsonStream; /** * A forward-only stream of documents. * * Produced by document::parser::parse_many. * */ class document::stream { public: really_inline ~stream() noexcept; /** * An iterator through a forward-only stream of documents. */ class iterator { public: /** * Get the current document (or error). */ really_inline doc_result operator*() noexcept; /** * Advance to the next document. */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ really_inline bool operator!=(const iterator &other) const noexcept; private: iterator(stream& stream, bool finished) noexcept; /** The stream parser we're iterating through. */ stream& _stream; /** Whether we're finished or not. */ bool finished; friend class stream; }; /** * Start iterating the documents in the stream. */ really_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ really_inline iterator end() noexcept; private: stream &operator=(const document::stream &) = delete; // Disallow copying stream(document::stream &other) = delete; // Disallow copying really_inline stream(document::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, error_code error = SUCCESS) noexcept; /** * Parse the next document found in the buffer previously given to stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are * discouraged. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * stream object. * * The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in case * of success and indicates that the buffer still contains more data to be parsed, * meaning this function can be called again to return the next JSON document * after this one. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline error_code json_parse() noexcept; /** * Returns the location (index) of where the next document should be in the * buffer. * Can be used for debugging, it tells the user the position of the end of the * last * valid JSON document parsed */ inline size_t get_current_buffer_loc() const { return current_buffer_loc; } /** * Returns the total amount of complete documents parsed by the stream, * in the current buffer, at the given time. */ inline size_t get_n_parsed_docs() const { return n_parsed_docs; } /** * Returns the total amount of data (in bytes) parsed by the stream, * in the current buffer, at the given time. */ inline size_t get_n_bytes_parsed() const { return n_bytes_parsed; } inline const uint8_t *buf() const { return _buf + buf_start; } inline void advance(size_t offset) { buf_start += offset; } inline size_t remaining() const { return _len - buf_start; } document::parser &parser; const uint8_t *_buf; const size_t _len; size_t _batch_size; // this is actually variable! size_t buf_start{0}; size_t next_json{0}; bool load_next_batch{true}; size_t current_buffer_loc{0}; #ifdef SIMDJSON_THREADS_ENABLED size_t last_json_buffer_loc{0}; #endif size_t n_parsed_docs{0}; size_t n_bytes_parsed{0}; error_code error{SUCCESS_AND_HAS_MORE}; #ifdef SIMDJSON_THREADS_ENABLED error_code stage1_is_ok_thread{SUCCESS}; std::thread stage_1_thread; document::parser parser_thread; #endif template friend class JsonStream; friend class document::parser; }; // class document::stream } // end of namespace simdjson #endif // SIMDJSON_DOCUMENT_STREAM_H /* end file include/simdjson/document_stream.h */ /* begin file include/simdjson/jsonminifier.h */ #ifndef SIMDJSON_JSONMINIFIER_H #define SIMDJSON_JSONMINIFIER_H #include #include #include namespace simdjson { // Take input from buf and remove useless whitespace, write it to out; buf and // out can be the same pointer. Result is null terminated, // return the string length (minus the null termination). // The accelerated version of this function only runs on AVX2 hardware. size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out); static inline size_t json_minify(const char *buf, size_t len, char *out) { return json_minify(reinterpret_cast(buf), len, reinterpret_cast(out)); } static inline size_t json_minify(const std::string_view &p, char *out) { return json_minify(p.data(), p.size(), out); } static inline size_t json_minify(const padded_string &p, char *out) { return json_minify(p.data(), p.size(), out); } } // namespace simdjson #endif // SIMDJSON_JSONMINIFIER_H /* end file include/simdjson/jsonminifier.h */ // Deprecated API /* begin file include/simdjson/parsedjsoniterator.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_PARSEDJSONITERATOR_H #define SIMDJSON_PARSEDJSONITERATOR_H /* begin file include/simdjson/document_iterator.h */ #ifndef SIMDJSON_DOCUMENT_ITERATOR_H #define SIMDJSON_DOCUMENT_ITERATOR_H #include #include #include #include #include #include /* begin file include/simdjson/internal/jsonformatutils.h */ #ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H #define SIMDJSON_INTERNAL_JSONFORMATUTILS_H #include #include #include namespace simdjson::internal { class escape_json_string; inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); class escape_json_string { public: escape_json_string(std::string_view _str) noexcept : str{_str} {} operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } private: std::string_view str; friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); }; inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { for (size_t i=0; i(unescaped.str[i]); out.flags(f); } else { out << unescaped.str[i]; } } } return out; } } // namespace simdjson::internal #endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H /* end file include/simdjson/internal/jsonformatutils.h */ namespace simdjson { template class document_iterator { public: #if SIMDJSON_EXCEPTIONS document_iterator(const document::parser &parser) noexcept(false); #endif document_iterator(const document &doc) noexcept; document_iterator(const document_iterator &o) noexcept; document_iterator &operator=(const document_iterator &o) noexcept; inline bool is_ok() const; // useful for debugging purposes inline size_t get_tape_location() const; // useful for debugging purposes inline size_t get_tape_length() const; // returns the current depth (start at 1 with 0 reserved for the fictitious // root node) inline size_t get_depth() const; // A scope is a series of nodes at the same depth, typically it is either an // object ({) or an array ([). The root node has type 'r'. inline uint8_t get_scope_type() const; // move forward in document order inline bool move_forward(); // retrieve the character code of what we're looking at: // [{"slutfn are the possibilities inline uint8_t get_type() const { return current_type; // short functions should be inlined! } // get the int64_t value at this node; valid only if get_type is "l" inline int64_t get_integer() const { if (location + 1 >= tape_length) { return 0; // default value in case of error } return static_cast(doc.tape[location + 1]); } // get the value as uint64; valid only if if get_type is "u" inline uint64_t get_unsigned_integer() const { if (location + 1 >= tape_length) { return 0; // default value in case of error } return doc.tape[location + 1]; } // get the string value at this node (NULL ended); valid only if get_type is " // note that tabs, and line endings are escaped in the returned value (see // print_with_escapes) return value is valid UTF-8, it may contain NULL chars // within the string: get_string_length determines the true string length. inline const char *get_string() const { return reinterpret_cast( doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); } // return the length of the string in bytes inline uint32_t get_string_length() const { uint32_t answer; memcpy(&answer, reinterpret_cast(doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK)), sizeof(uint32_t)); return answer; } // get the double value at this node; valid only if // get_type() is "d" inline double get_double() const { if (location + 1 >= tape_length) { return std::numeric_limits::quiet_NaN(); // default value in // case of error } double answer; memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); return answer; } inline bool is_object_or_array() const { return is_object() || is_array(); } inline bool is_object() const { return get_type() == '{'; } inline bool is_array() const { return get_type() == '['; } inline bool is_string() const { return get_type() == '"'; } // Returns true if the current type of node is an signed integer. // You can get its value with `get_integer()`. inline bool is_integer() const { return get_type() == 'l'; } // Returns true if the current type of node is an unsigned integer. // You can get its value with `get_unsigned_integer()`. // // NOTE: // Only a large value, which is out of range of a 64-bit signed integer, is // represented internally as an unsigned node. On the other hand, a typical // positive integer, such as 1, 42, or 1000000, is as a signed node. // Be aware this function returns false for a signed node. inline bool is_unsigned_integer() const { return get_type() == 'u'; } inline bool is_double() const { return get_type() == 'd'; } inline bool is_number() const { return is_integer() || is_unsigned_integer() || is_double(); } inline bool is_true() const { return get_type() == 't'; } inline bool is_false() const { return get_type() == 'f'; } inline bool is_null() const { return get_type() == 'n'; } static bool is_object_or_array(uint8_t type) { return ((type == '[') || (type == '{')); } // when at {, go one level deep, looking for a given key // if successful, we are left pointing at the value, // if not, we are still pointing at the object ({) // (in case of repeated keys, this only finds the first one). // We seek the key using C's strcmp so if your JSON strings contain // NULL chars, this would trigger a false positive: if you expect that // to be the case, take extra precautions. // Furthermore, we do the comparison character-by-character // without taking into account Unicode equivalence. inline bool move_to_key(const char *key); // as above, but case insensitive lookup (strcmpi instead of strcmp) inline bool move_to_key_insensitive(const char *key); // when at {, go one level deep, looking for a given key // if successful, we are left pointing at the value, // if not, we are still pointing at the object ({) // (in case of repeated keys, this only finds the first one). // The string we search for can contain NULL values. // Furthermore, we do the comparison character-by-character // without taking into account Unicode equivalence. inline bool move_to_key(const char *key, uint32_t length); // when at a key location within an object, this moves to the accompanying // value (located next to it). This is equivalent but much faster than // calling "next()". inline void move_to_value(); // when at [, go one level deep, and advance to the given index. // if successful, we are left pointing at the value, // if not, we are still pointing at the array ([) inline bool move_to_index(uint32_t index); // Moves the iterator to the value corresponding to the json pointer. // Always search from the root of the document. // if successful, we are left pointing at the value, // if not, we are still pointing the same value we were pointing before the // call. The json pointer follows the rfc6901 standard's syntax: // https://tools.ietf.org/html/rfc6901 However, the standard says "If a // referenced member name is not unique in an object, the member that is // referenced is undefined, and evaluation fails". Here we just return the // first corresponding value. The length parameter is the length of the // jsonpointer string ('pointer'). bool move_to(const char *pointer, uint32_t length); // Moves the iterator to the value corresponding to the json pointer. // Always search from the root of the document. // if successful, we are left pointing at the value, // if not, we are still pointing the same value we were pointing before the // call. The json pointer implementation follows the rfc6901 standard's // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says // "If a referenced member name is not unique in an object, the member that // is referenced is undefined, and evaluation fails". Here we just return // the first corresponding value. inline bool move_to(const std::string &pointer) { return move_to(pointer.c_str(), pointer.length()); } private: // Almost the same as move_to(), except it searches from the current // position. The pointer's syntax is identical, though that case is not // handled by the rfc6901 standard. The '/' is still required at the // beginning. However, contrary to move_to(), the URI Fragment Identifier // Representation is not supported here. Also, in case of failure, we are // left pointing at the closest value it could reach. For these reasons it // is private. It exists because it is used by move_to(). bool relative_move_to(const char *pointer, uint32_t length); public: // throughout return true if we can do the navigation, false // otherwise // Withing a given scope (series of nodes at the same depth within either an // array or an object), we move forward. // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { // and [. At the object ({) or at the array ([), you can issue a "down" to // visit their content. valid if we're not at the end of a scope (returns // true). inline bool next(); // Within a given scope (series of nodes at the same depth within either an // array or an object), we move backward. // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true // when starting at the end of the scope. At the object ({) or at the array // ([), you can issue a "down" to visit their content. // Performance warning: This function is implemented by starting again // from the beginning of the scope and scanning forward. You should expect // it to be relatively slow. inline bool prev(); // Moves back to either the containing array or object (type { or [) from // within a contained scope. // Valid unless we are at the first level of the document inline bool up(); // Valid if we're at a [ or { and it starts a non-empty scope; moves us to // start of that deeper scope if it not empty. Thus, given [true, null, // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. inline bool down(); // move us to the start of our current scope, // a scope is a series of nodes at the same level inline void to_start_scope(); inline void rewind() { while (up()) ; } // void to_end_scope(); // move us to // the start of our current scope; always succeeds // print the node we are currently pointing at bool print(std::ostream &os, bool escape_strings = true) const; typedef struct { size_t start_of_scope; uint8_t scope_type; } scopeindex_t; private: const document &doc; size_t depth; size_t location; // our current location on a tape size_t tape_length; uint8_t current_type; uint64_t current_val; scopeindex_t depth_index[max_depth]; }; } // namespace simdjson #endif // SIMDJSON_DOCUMENT_ITERATOR_H /* end file include/simdjson/internal/jsonformatutils.h */ #endif /* end file include/simdjson/internal/jsonformatutils.h */ /* begin file include/simdjson/jsonparser.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_JSONPARSER_H #define SIMDJSON_JSONPARSER_H /* begin file include/simdjson/parsedjson.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_PARSEDJSON_H #define SIMDJSON_PARSEDJSON_H namespace simdjson { using ParsedJson = document::parser; } // namespace simdjson #endif /* end file include/simdjson/parsedjson.h */ /* begin file include/simdjson/jsonioutil.h */ #ifndef SIMDJSON_JSONIOUTIL_H #define SIMDJSON_JSONIOUTIL_H #include #include #include #include #include #include namespace simdjson { #if SIMDJSON_EXCEPTIONS inline padded_string get_corpus(const std::string &filename) { return padded_string::load(filename); } #endif // SIMDJSON_EXCEPTIONS } // namespace simdjson #endif // SIMDJSON_JSONIOUTIL_H /* end file include/simdjson/jsonioutil.h */ namespace simdjson { // // C API (json_parse and build_parsed_json) declarations // inline int json_parse(const uint8_t *buf, size_t len, document::parser &parser, bool realloc_if_needed = true) noexcept { error_code code = parser.parse(buf, len, realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return code; } inline int json_parse(const char *buf, size_t len, document::parser &parser, bool realloc_if_needed = true) noexcept { return json_parse(reinterpret_cast(buf), len, parser, realloc_if_needed); } inline int json_parse(const std::string &s, document::parser &parser, bool realloc_if_needed = true) noexcept { return json_parse(s.data(), s.length(), parser, realloc_if_needed); } inline int json_parse(const padded_string &s, document::parser &parser) noexcept { return json_parse(s.data(), s.length(), parser, false); } WARN_UNUSED static document::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { document::parser parser; json_parse(buf, len, parser, realloc_if_needed); return parser; } WARN_UNUSED inline document::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { return build_parsed_json(reinterpret_cast(buf), len, realloc_if_needed); } WARN_UNUSED inline document::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { return build_parsed_json(s.data(), s.length(), realloc_if_needed); } WARN_UNUSED inline document::parser build_parsed_json(const padded_string &s) noexcept { return build_parsed_json(s.data(), s.length(), false); } // We do not want to allow implicit conversion from C string to std::string. int json_parse(const char *buf, document::parser &parser) noexcept = delete; document::parser build_parsed_json(const char *buf) noexcept = delete; } // namespace simdjson #endif /* end file include/simdjson/jsonioutil.h */ /* begin file include/simdjson/jsonstream.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_JSONSTREAM_H #define SIMDJSON_JSONSTREAM_H namespace simdjson { /** * @deprecated use document::stream instead. * * The main motivation for this piece of software is to achieve maximum speed and offer * good quality of life while parsing files containing multiple JSON documents. * * Since we want to offer flexibility and not restrict ourselves to a specific file * format, we support any file that contains any valid JSON documents separated by one * or more character that is considered a whitespace by the JSON spec. * Namely: space, nothing, linefeed, carriage return, horizontal tab. * Anything that is not whitespace will be parsed as a JSON document and could lead * to failure. * * To offer maximum parsing speed, our implementation processes the data inside the * buffer by batches and their size is defined by the parameter "batch_size". * By loading data in batches, we can optimize the time spent allocating data in the * parser and can also open the possibility of multi-threading. * The batch_size must be at least as large as the biggest document in the file, but * not too large in order to submerge the chached memory. We found that 1MB is * somewhat a sweet spot for now. Eventually, this batch_size could be fully * automated and be optimal at all times. * * The template parameter (string_container) must * support the data() and size() methods, returning a pointer * to a char* and to the number of bytes respectively. * The simdjson parser may read up to SIMDJSON_PADDING bytes beyond the end * of the string, so if you do not use a padded_string container, * you have the responsibility to overallocated. If you fail to * do so, your software may crash if you cross a page boundary, * and you should expect memory checkers to object. * Most users should use a simdjson::padded_string. */ template class JsonStream { public: /* Create a JsonStream object that can be used to parse sequentially the valid * JSON documents found in the buffer "buf". * * The batch_size must be at least as large as the biggest document in the * file, but * not too large to submerge the cached memory. We found that 1MB is * somewhat a sweet spot for now. * * The user is expected to call the following json_parse method to parse the * next * valid JSON document found in the buffer. This method can and is expected * to be * called in a loop. * * Various methods are offered to keep track of the status, like * get_current_buffer_loc, * get_n_parsed_docs, get_n_bytes_parsed, etc. * * */ JsonStream(const string_container &s, size_t _batch_size = 1000000) noexcept; ~JsonStream() noexcept; /* Parse the next document found in the buffer previously given to JsonStream. * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are * discouraged. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * JsonStream object. * * The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in case * of success and indicates that the buffer still contains more data to be parsed, * meaning this function can be called again to return the next JSON document * after this one. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a * string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ int json_parse(document::parser &parser) noexcept; /* Returns the location (index) of where the next document should be in the * buffer. * Can be used for debugging, it tells the user the position of the end of the * last * valid JSON document parsed*/ inline size_t get_current_buffer_loc() const noexcept { return stream ? stream->current_buffer_loc : 0; } /* Returns the total amount of complete documents parsed by the JsonStream, * in the current buffer, at the given time.*/ inline size_t get_n_parsed_docs() const noexcept { return stream ? stream->n_parsed_docs : 0; } /* Returns the total amount of data (in bytes) parsed by the JsonStream, * in the current buffer, at the given time.*/ inline size_t get_n_bytes_parsed() const noexcept { return stream ? stream->n_bytes_parsed : 0; } private: const string_container &str; const size_t batch_size; document::stream *stream{nullptr}; }; // end of class JsonStream } // end of namespace simdjson #endif // SIMDJSON_JSONSTREAM_H /* end file include/simdjson/jsonstream.h */ // Inline functions /* begin file include/simdjson/inline/document.h */ #ifndef SIMDJSON_INLINE_DOCUMENT_H #define SIMDJSON_INLINE_DOCUMENT_H // Inline implementations go in here. #include namespace simdjson { // // element_result inline implementation // really_inline document::element_result::element_result(element value) noexcept : simdjson_result(value) {} really_inline document::element_result::element_result(error_code error) noexcept : simdjson_result(error) {} inline simdjson_result document::element_result::is_null() const noexcept { if (error()) { return error(); } return first.is_null(); } inline simdjson_result document::element_result::as_bool() const noexcept { if (error()) { return error(); } return first.as_bool(); } inline simdjson_result document::element_result::as_c_str() const noexcept { if (error()) { return error(); } return first.as_c_str(); } inline simdjson_result document::element_result::as_string() const noexcept { if (error()) { return error(); } return first.as_string(); } inline simdjson_result document::element_result::as_uint64_t() const noexcept { if (error()) { return error(); } return first.as_uint64_t(); } inline simdjson_result document::element_result::as_int64_t() const noexcept { if (error()) { return error(); } return first.as_int64_t(); } inline simdjson_result document::element_result::as_double() const noexcept { if (error()) { return error(); } return first.as_double(); } inline document::array_result document::element_result::as_array() const noexcept { if (error()) { return error(); } return first.as_array(); } inline document::object_result document::element_result::as_object() const noexcept { if (error()) { return error(); } return first.as_object(); } inline document::element_result document::element_result::operator[](const std::string_view &key) const noexcept { if (error()) { return *this; } return first[key]; } inline document::element_result document::element_result::operator[](const char *key) const noexcept { if (error()) { return *this; } return first[key]; } #if SIMDJSON_EXCEPTIONS inline document::element_result::operator bool() const noexcept(false) { return as_bool(); } inline document::element_result::operator const char *() const noexcept(false) { return as_c_str(); } inline document::element_result::operator std::string_view() const noexcept(false) { return as_string(); } inline document::element_result::operator uint64_t() const noexcept(false) { return as_uint64_t(); } inline document::element_result::operator int64_t() const noexcept(false) { return as_int64_t(); } inline document::element_result::operator double() const noexcept(false) { return as_double(); } inline document::element_result::operator document::array() const noexcept(false) { return as_array(); } inline document::element_result::operator document::object() const noexcept(false) { return as_object(); } #endif // // array_result inline implementation // really_inline document::array_result::array_result(array value) noexcept : simdjson_result(value) {} really_inline document::array_result::array_result(error_code error) noexcept : simdjson_result(error) {} #if SIMDJSON_EXCEPTIONS inline document::array::iterator document::array_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } inline document::array::iterator document::array_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } #endif // SIMDJSON_EXCEPTIONS // // object_result inline implementation // really_inline document::object_result::object_result(object value) noexcept : simdjson_result