/* auto-generated on Fri Mar 20 11:47:31 PDT 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H /* begin file include/simdjson/compiler_check.h */ #ifndef SIMDJSON_COMPILER_CHECK_H #define SIMDJSON_COMPILER_CHECK_H #ifndef __cplusplus #error simdjson requires a C++ compiler #endif #ifndef SIMDJSON_CPLUSPLUS #if defined(_MSVC_LANG) && !defined(__clang__) #define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) #else #define SIMDJSON_CPLUSPLUS __cplusplus #endif #endif #if (SIMDJSON_CPLUSPLUS < 201703L) #error simdjson requires a compiler compliant with the C++17 standard #endif #endif // SIMDJSON_COMPILER_CHECK_H /* end file include/simdjson/compiler_check.h */ // Public API /* begin file include/simdjson/simdjson_version.h */ // /include/simdjson/simdjson_version.h automatically generated by release.py, // do not change by hand #ifndef SIMDJSON_SIMDJSON_VERSION_H #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ #define SIMDJSON_VERSION 0.2.1 namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ SIMDJSON_VERSION_MAJOR = 0, /** * The minor version (major.MINOR.revision) of simdjson being used. */ SIMDJSON_VERSION_MINOR = 2, /** * The revision (major.minor.REVISION) of simdjson being used. */ SIMDJSON_VERSION_REVISION = 1 }; } // namespace simdjson #endif // SIMDJSON_SIMDJSON_VERSION_H /* end file include/simdjson/simdjson_version.h */ /* begin file include/simdjson/error.h */ #ifndef SIMDJSON_ERROR_H #define SIMDJSON_ERROR_H /* begin file include/simdjson/common_defs.h */ #ifndef SIMDJSON_COMMON_DEFS_H #define SIMDJSON_COMMON_DEFS_H #include /* begin file include/simdjson/portability.h */ #ifndef SIMDJSON_PORTABILITY_H #define SIMDJSON_PORTABILITY_H #include #include #include #ifdef _MSC_VER #include #endif #if defined(__x86_64__) || defined(_M_AMD64) #define IS_X86_64 1 #endif #if defined(__aarch64__) || defined(_M_ARM64) #define IS_ARM64 1 #endif // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ #undef STRINGIFY #define STRINGIFY_IMPLEMENTATION_(a) #a #define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a) #ifndef SIMDJSON_IMPLEMENTATION_FALLBACK #define SIMDJSON_IMPLEMENTATION_FALLBACK 1 #endif #if IS_ARM64 #ifndef SIMDJSON_IMPLEMENTATION_ARM64 #define SIMDJSON_IMPLEMENTATION_ARM64 1 #endif #define SIMDJSON_IMPLEMENTATION_HASWELL 0 #define SIMDJSON_IMPLEMENTATION_WESTMERE 0 #endif // IS_ARM64 #if IS_X86_64 #ifndef SIMDJSON_IMPLEMENTATION_HASWELL #define SIMDJSON_IMPLEMENTATION_HASWELL 1 #endif #ifndef SIMDJSON_IMPLEMENTATION_WESTMERE #define SIMDJSON_IMPLEMENTATION_WESTMERE 1 #endif #define SIMDJSON_IMPLEMENTATION_ARM64 0 #endif // IS_X86_64 // we are going to use runtime dispatch #ifdef IS_X86_64 #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang up // til 8.0 so TARGET_REGION and UNTARGET_REGION must be *outside* of a // namespace. #define TARGET_REGION(T) \ _Pragma(STRINGIFY( \ clang attribute push(__attribute__((target(T))), apply_to = function))) #define UNTARGET_REGION _Pragma("clang attribute pop") #elif defined(__GNUC__) // GCC is easier #define TARGET_REGION(T) \ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T))) #define UNTARGET_REGION _Pragma("GCC pop_options") #endif // clang then gcc #endif // x86 // Default target region macros don't do anything. #ifndef TARGET_REGION #define TARGET_REGION(T) #define UNTARGET_REGION #endif // under GCC and CLANG, we use these two macros #define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul,lzcnt") #define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul") #define TARGET_ARM64 // Threading is disabled #undef SIMDJSON_THREADS_ENABLED // Is threading enabled? #if defined(BOOST_HAS_THREADS) || defined(_REENTRANT) || defined(_MT) #define SIMDJSON_THREADS_ENABLED #endif #if defined(__clang__) #define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) #elif defined(__GNUC__) #define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) #else #define NO_SANITIZE_UNDEFINED #endif #ifdef _MSC_VER #include // visual studio #endif #ifdef _MSC_VER #define simdjson_strcasecmp _stricmp #else #define simdjson_strcasecmp strcasecmp #endif namespace simdjson { // portable version of posix_memalign static inline void *aligned_malloc(size_t alignment, size_t size) { void *p; #ifdef _MSC_VER p = _aligned_malloc(size, alignment); #elif defined(__MINGW32__) || defined(__MINGW64__) p = __mingw_aligned_malloc(size, alignment); #else // somehow, if this is used before including "x86intrin.h", it creates an // implicit defined warning. if (posix_memalign(&p, alignment, size) != 0) { return nullptr; } #endif return p; } static inline char *aligned_malloc_char(size_t alignment, size_t size) { return (char *)aligned_malloc(alignment, size); } static inline void aligned_free(void *mem_block) { if (mem_block == nullptr) { return; } #ifdef _MSC_VER _aligned_free(mem_block); #elif defined(__MINGW32__) || defined(__MINGW64__) __mingw_aligned_free(mem_block); #else free(mem_block); #endif } static inline void aligned_free_char(char *mem_block) { aligned_free((void *)mem_block); } } // namespace simdjson #endif // SIMDJSON_PORTABILITY_H /* end file include/simdjson/portability.h */ namespace simdjson { #ifndef SIMDJSON_EXCEPTIONS #if __cpp_exceptions #define SIMDJSON_EXCEPTIONS 1 #else #define SIMDJSON_EXCEPTIONS 0 #endif #endif /** The maximum document size supported by simdjson. */ constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; /** * The amount of padding needed in a buffer to parse JSON. * * the input buf should be readable up to buf + SIMDJSON_PADDING * this is a stopgap; there should be a better description of the * main loop and its behavior that abstracts over this * See https://github.com/lemire/simdjson/issues/174 */ constexpr size_t SIMDJSON_PADDING = 32; /** * By default, simdjson supports this many nested objects and arrays. * * This is the default for document::parser::max_depth(). */ constexpr size_t DEFAULT_MAX_DEPTH = 1024; } // namespace simdjson #if defined(__GNUC__) // Marks a block with a name so that MCA analysis can see it. #define BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); #define END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); #define DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); #else #define BEGIN_DEBUG_BLOCK(name) #define END_DEBUG_BLOCK(name) #define DEBUG_BLOCK(name, block) #endif #if !defined(_MSC_VER) && !defined(SIMDJSON_NO_COMPUTED_GOTO) // Implemented using Labels as Values which works in GCC and CLANG (and maybe // also in Intel's compiler), but won't work in MSVC. #define SIMDJSON_USE_COMPUTED_GOTO #endif // Align to N-byte boundary #define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) #define ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) #define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #ifdef _MSC_VER #define really_inline __forceinline #define never_inline __declspec(noinline) #define UNUSED #define WARN_UNUSED #ifndef likely #define likely(x) x #endif #ifndef unlikely #define unlikely(x) x #endif #else #define really_inline inline __attribute__((always_inline, unused)) #define never_inline inline __attribute__((noinline, unused)) #define UNUSED __attribute__((unused)) #define WARN_UNUSED __attribute__((warn_unused_result)) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) #endif #ifndef unlikely #define unlikely(x) __builtin_expect(!!(x), 0) #endif #endif // MSC_VER #endif // SIMDJSON_COMMON_DEFS_H /* end file include/simdjson/portability.h */ #include #include namespace simdjson { /** * All possible errors returned by simdjson. */ enum error_code { SUCCESS = 0, ///< No error SUCCESS_AND_HAS_MORE, ///< No error and buffer still has more data CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' NUMBER_ERROR, ///< Problem while parsing a number UTF8_ERROR, ///< the input is not valid UTF-8 UNINITIALIZED, ///< unknown error, or uninitialized document EMPTY, ///< no structural element found UNESCAPED_CHARS, ///< found unescaped characters in a string. UNCLOSED_STRING, ///< missing quote at the end UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture INCORRECT_TYPE, ///< JSON element has a different type than user expected NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits NO_SUCH_FIELD, ///< JSON field not found in object IO_ERROR, ///< Error reading a file UNEXPECTED_ERROR, ///< indicative of a bug in simdjson /** @private Number of error codes */ NUM_ERROR_CODES }; /** * Get the error message for the given error code. * * auto [doc, error] = document::parse("foo"); * if (error) { printf("Error: %s\n", error_message(error)); } * * @return The error message. */ inline const char *error_message(error_code error) noexcept; /** * Write the error message to the output stream */ inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; /** * Exception thrown when an exception-supporting simdjson method is called */ struct simdjson_error : public std::exception { /** * Create an exception from a simdjson error code. * @param error The error code */ simdjson_error(error_code error) noexcept : _error{error} { } /** The error message */ const char *what() const noexcept { return error_message(error()); } /** The error code */ error_code error() const noexcept { return _error; } private: /** The error code that was used */ error_code _error; }; /** * The result of a simd operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. */ template struct simdjson_result : public std::pair { /** * The error. */ error_code error() const { return this->second; } #if SIMDJSON_EXCEPTIONS /** * The value of the function. * * @throw simdjson_error if there was an error. */ T get() noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; }; /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ operator T() noexcept(false) { return get(); } #endif // SIMDJSON_EXCEPTIONS /** * Create a new error result. */ simdjson_result(error_code _error) noexcept : std::pair({}, _error) {} /** * Create a new successful result. */ simdjson_result(T _value) noexcept : std::pair(_value, SUCCESS) {} /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_result(T value, error_code error) noexcept : std::pair(value, error) {} }; /** * The result of a simd operation that could fail. * * This class is for values that must be *moved*, like padded_string and document. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. */ template struct simdjson_move_result : std::pair { /** * Move the value and the error to the provided variables. */ void tie(T& t, error_code & e) { // on the clang compiler that comes with current macOS (Apple clang version 11.0.0), // std::tie(this->json, error) = padded_string::load(filename); // fails with "benchmark/benchmarker.h:266:33: error: no viable overloaded '='"" t = std::move(this->first); e = std::move(this->second); } /** * The error. */ error_code error() const { return this->second; } #if SIMDJSON_EXCEPTIONS /** * The value of the function. * * @throw simdjson_error if there was an error. */ T move() noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::move(this->first); }; /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ operator T() noexcept(false) { return move(); } #endif /** * Create a new error result. */ simdjson_move_result(error_code error) noexcept : std::pair(T(), error) {} /** * Create a new successful result. */ simdjson_move_result(T value) noexcept : std::pair(std::move(value), SUCCESS) {} /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_move_result(T value, error_code error) noexcept : std::pair(std::move(value), error) {} }; /** * @deprecated This is an alias and will be removed, use error_code instead */ using ErrorValues = error_code; /** * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. */ inline const std::string &error_message(int error) noexcept; } // namespace simdjson #endif // SIMDJSON_ERROR_H /* end file include/simdjson/portability.h */ /* begin file include/simdjson/padded_string.h */ #ifndef SIMDJSON_PADDED_STRING_H #define SIMDJSON_PADDED_STRING_H #include #include #include namespace simdjson { /** * String with extra allocation for ease of use with document::parser::parse() * * This is a move-only class, it cannot be copied. */ struct padded_string final { /** * Create a new, empty padded string. */ explicit inline padded_string() noexcept; /** * Create a new padded string buffer. * * @param length the size of the string. */ explicit inline padded_string(size_t length) noexcept; /** * Create a new padded string by copying the given input. * * @param data the buffer to copy * @param length the number of bytes to copy */ explicit inline padded_string(const char *data, size_t length) noexcept; /** * Create a new padded string by copying the given input. * * @param str_ the string to copy */ inline padded_string(const std::string & str_ ) noexcept; /** * Create a new padded string by copying the given input. * * @param str_ the string to copy */ inline padded_string(std::string_view sv_) noexcept; /** * Move one padded string into another. * * The original padded string will be reduced to zero capacity. * * @param o the string to move. */ inline padded_string(padded_string &&o) noexcept; /** * Move one padded string into another. * * The original padded string will be reduced to zero capacity. * * @param o the string to move. */ inline padded_string &operator=(padded_string &&o) noexcept; inline void swap(padded_string &o) noexcept; ~padded_string() noexcept; /** * The length of the string. * * Does not include padding. */ size_t size() const noexcept; /** * The length of the string. * * Does not include padding. */ size_t length() const noexcept; /** * The string data. **/ const char *data() const noexcept; /** * The string data. **/ char *data() noexcept; /** * Load this padded string from a file. * * @param path the path to the file. **/ inline static simdjson_move_result load(const std::string &path) noexcept; private: padded_string &operator=(const padded_string &o) = delete; padded_string(const padded_string &o) = delete; size_t viable_size; char *data_ptr{nullptr}; }; // padded_string } // namespace simdjson namespace simdjson::internal { // low-level function to allocate memory with padding so we can read past the // "length" bytes safely. if you must provide a pointer to some data, create it // with this function: length is the max. size in bytes of the string caller is // responsible to free the memory (free(...)) inline char *allocate_padded_buffer(size_t length) noexcept; } // namespace simdjson::internal; #endif // SIMDJSON_PADDED_STRING_H /* end file include/simdjson/padded_string.h */ /* begin file include/simdjson/implementation.h */ #ifndef SIMDJSON_IMPLEMENTATION_H #define SIMDJSON_IMPLEMENTATION_H #include #include #include #include /* begin file include/simdjson/document.h */ #ifndef SIMDJSON_DOCUMENT_H #define SIMDJSON_DOCUMENT_H #include #include #include #include #include /* begin file include/simdjson/simdjson.h */ /** * @file * @deprecated We'll be removing this file so it isn't confused with the top level simdjson.h */ #ifndef SIMDJSON_SIMDJSON_H #define SIMDJSON_SIMDJSON_H #endif // SIMDJSON_H /* end file include/simdjson/simdjson.h */ namespace simdjson::internal { constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; enum class tape_type; class tape_ref; } // namespace simdjson::internal namespace simdjson { template class document_iterator; /** * A parsed JSON document. * * This class cannot be copied, only moved, to avoid unintended allocations. */ class document { public: /** * Create a document container with zero capacity. * * The parser will allocate capacity as needed. */ document() noexcept=default; ~document() noexcept=default; /** * Take another document's buffers. * * @param other The document to take. Its capacity is zeroed and it is invalidated. */ document(document &&other) noexcept = default; document(const document &) = delete; // Disallow copying /** * Take another document's buffers. * * @param other The document to take. Its capacity is zeroed. */ document &operator=(document &&other) noexcept = default; document &operator=(const document &) = delete; // Disallow copying /** The default batch size for parse_many and load_many */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; // Nested classes class element; class array; class object; class key_value_pair; class parser; class stream; class doc_move_result; class doc_result; class element_result; class array_result; class object_result; class stream_result; // Nested classes. See definitions later in file. using iterator = document_iterator; /** * Get the root element of this document as a JSON array. */ element root() const noexcept; /** * Get the root element of this document as a JSON array. */ array_result as_array() const noexcept; /** * Get the root element of this document as a JSON object. */ object_result as_object() const noexcept; /** * Get the root element of this document. */ operator element() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Read the root element of this document as a JSON array. * * @return The JSON array. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an array */ operator array() const noexcept(false); /** * Read this element as a JSON object (key/value pairs). * * @return The JSON object. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an object */ operator object() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with the given key, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ element_result operator[](const std::string_view &s) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ element_result operator[](const char *s) const noexcept; /** * Dump the raw tape for debugging. * * @param os the stream to output to. * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). */ bool dump_raw_tape(std::ostream &os) const noexcept; /** * Load a JSON document from a file and return it. * * document doc = document::load("jsonexamples/twitter.json"); * * ### Parser Capacity * * If the parser's current capacity is less than the file length, it will allocate enough capacity * to handle it (up to max_capacity). * * @param path The path to load. * @return The document, or an error: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ inline static doc_move_result load(const std::string& path) noexcept; /** * Parse a JSON document and return a reference to it. * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. If realloc_if_needed is true, * it is assumed that the buffer does *not* have enough padding, and it is reallocated, enlarged * and copied before parsing. * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return the document, or an error if the JSON is invalid. */ inline static doc_move_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document. * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. If realloc_if_needed is true, * it is assumed that the buffer does *not* have enough padding, and it is reallocated, enlarged * and copied before parsing. * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return the document, or an error if the JSON is invalid. */ really_inline static doc_move_result parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document. * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. If `str.capacity() - str.size() * < SIMDJSON_PADDING`, the string will be copied to a string with larger capacity before parsing. * * @param s The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, or * a new string will be created with the extra padding. * @return the document, or an error if the JSON is invalid. */ really_inline static doc_move_result parse(const std::string &s) noexcept; /** * Parse a JSON document. * * @param s The JSON to parse. * @return the document, or an error if the JSON is invalid. */ really_inline static doc_move_result parse(const padded_string &s) noexcept; // We do not want to allow implicit conversion from C string to std::string. doc_result parse(const char *buf, bool realloc_if_needed = true) noexcept = delete; std::unique_ptr tape; std::unique_ptr string_buf;// should be at least byte_capacity private: inline error_code set_capacity(size_t len) noexcept; template friend class minify; }; // class document template class minify; /** * A parsed, *owned* document, or an error if the parse failed. * * document &doc = document::parse(json); * * Returns an owned `document`. When the doc_move_result (or the document retrieved from it) goes out of * scope, the document's memory is deallocated. * * ## Error Codes vs. Exceptions * * This result type allows the user to pick whether to use exceptions or not. * * Use like this to avoid exceptions: * * auto [doc, error] = document::parse(json); * if (error) { exit(1); } * * Use like this if you'd prefer to use exceptions: * * document doc = document::parse(json); * */ class document::doc_move_result : public simdjson_move_result { public: /** * Read this document as a JSON objec. * * @return The object value, or: * - UNEXPECTED_TYPE if the JSON document is not an object */ inline object_result as_object() const noexcept; /** * Read this document as a JSON array. * * @return The array value, or: * - UNEXPECTED_TYPE if the JSON document is not an array */ inline array_result as_array() const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const std::string_view &key) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const char *key) const noexcept; ~doc_move_result() noexcept=default; doc_move_result(document &&doc, error_code error) noexcept; doc_move_result(document &&doc) noexcept; doc_move_result(error_code error) noexcept; friend class document; }; // class document::doc_move_result /** * A parsed document reference, or an error if the parse failed. * * document &doc = document::parse(json); * * ## Document Ownership * * The `document &` refers to an internal document the parser reuses on each `parse()` call. It will * become invalidated on the next `parse()`. * * This is more efficient for common cases where documents are parsed and used one at a time. If you * need to keep the document around longer, you may *take* it from the parser by casting it: * * document doc = parser.parse(); // take ownership * * If you do this, the parser will automatically allocate a new document on the next `parse()` call. * * ## Error Codes vs. Exceptions * * This result type allows the user to pick whether to use exceptions or not. * * Use like this to avoid exceptions: * * auto [doc, error] = parser.parse(json); * if (error) { exit(1); } * * Use like this if you'd prefer to use exceptions: * * document &doc = document::parse(json); * */ class document::doc_result : public simdjson_result { public: /** * Read this document as a JSON objec. * * @return The object value, or: * - UNEXPECTED_TYPE if the JSON document is not an object */ inline object_result as_object() const noexcept; /** * Read this document as a JSON array. * * @return The array value, or: * - UNEXPECTED_TYPE if the JSON document is not an array */ inline array_result as_array() const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const std::string_view &key) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const char *key) const noexcept; ~doc_result()=default; doc_result(document &doc, error_code error) noexcept; friend class document::parser; friend class document::stream; }; // class document::doc_result namespace internal { /** * The possible types in the tape. Internal only. */ enum class tape_type { ROOT = 'r', START_ARRAY = '[', START_OBJECT = '{', END_ARRAY = ']', END_OBJECT = '}', STRING = '"', INT64 = 'l', UINT64 = 'u', DOUBLE = 'd', TRUE_VALUE = 't', FALSE_VALUE = 'f', NULL_VALUE = 'n' }; /** * A reference to an element on the tape. Internal only. */ class tape_ref { protected: really_inline tape_ref() noexcept; really_inline tape_ref(const document *_doc, size_t _json_index) noexcept; inline size_t after_element() const noexcept; really_inline tape_type type() const noexcept; really_inline uint64_t tape_value() const noexcept; template really_inline T next_tape_value() const noexcept; inline std::string_view get_string_view() const noexcept; /** The document this element references. */ const document *doc; /** The index of this element on `doc.tape[]` */ size_t json_index; friend class simdjson::document::key_value_pair; template friend class simdjson::minify; }; } // namespace simdjson::internal /** * A JSON element. * * References an element in a JSON document, representing a JSON null, boolean, string, number, * array or object. */ class document::element : protected internal::tape_ref { public: /** Create a new, invalid element. */ really_inline element() noexcept; /** Whether this element is a json `null`. */ really_inline bool is_null() const noexcept; /** Whether this is a JSON `true` or `false` */ really_inline bool is_bool() const noexcept; /** Whether this is a JSON number (e.g. 1, 1.0 or 1e2) */ really_inline bool is_number() const noexcept; /** Whether this is a JSON integer (e.g. 1 or -1, but *not* 1.0 or 1e2) */ really_inline bool is_integer() const noexcept; /** Whether this is a JSON string (e.g. "abc") */ really_inline bool is_string() const noexcept; /** Whether this is a JSON array (e.g. []) */ really_inline bool is_array() const noexcept; /** Whether this is a JSON array (e.g. []) */ really_inline bool is_object() const noexcept; /** * Read this element as a boolean (json `true` or `false`). * * @return The boolean value, or: * - UNEXPECTED_TYPE error if the JSON element is not a boolean */ inline simdjson_result as_bool() const noexcept; /** * Read this element as a null-terminated string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return A `string_view` into the string, or: * - UNEXPECTED_TYPE error if the JSON element is not a string */ inline simdjson_result as_c_str() const noexcept; /** * Read this element as a C++ string_view (string with length). * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return A `string_view` into the string, or: * - UNEXPECTED_TYPE error if the JSON element is not a string */ inline simdjson_result as_string() const noexcept; /** * Read this element as an unsigned integer. * * @return The uninteger value, or: * - UNEXPECTED_TYPE if the JSON element is not an integer * - NUMBER_OUT_OF_RANGE if the integer doesn't fit in 64 bits or is negative */ inline simdjson_result as_uint64_t() const noexcept; /** * Read this element as a signed integer. * * @return The integer value, or: * - UNEXPECTED_TYPE if the JSON element is not an integer * - NUMBER_OUT_OF_RANGE if the integer doesn't fit in 64 bits */ inline simdjson_result as_int64_t() const noexcept; /** * Read this element as a floating point value. * * @return The double value, or: * - UNEXPECTED_TYPE if the JSON element is not a number */ inline simdjson_result as_double() const noexcept; /** * Read this element as a JSON array. * * @return The array value, or: * - UNEXPECTED_TYPE if the JSON element is not an array */ inline array_result as_array() const noexcept; /** * Read this element as a JSON object (key/value pairs). * * @return The object value, or: * - UNEXPECTED_TYPE if the JSON element is not an object */ inline object_result as_object() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Read this element as a boolean. * * @return The boolean value * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a boolean. */ inline operator bool() const noexcept(false); /** * Read this element as a null-terminated string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return The string value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a string. */ inline explicit operator const char*() const noexcept(false); /** * Read this element as a null-terminated string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return The string value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a string. */ inline operator std::string_view() const noexcept(false); /** * Read this element as an unsigned integer. * * @return The integer value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an integer * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ inline operator uint64_t() const noexcept(false); /** * Read this element as an signed integer. * * @return The integer value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an integer * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits */ inline operator int64_t() const noexcept(false); /** * Read this element as an double. * * @return The double value. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a number * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ inline operator double() const noexcept(false); /** * Read this element as a JSON array. * * @return The JSON array. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an array */ inline operator document::array() const noexcept(false); /** * Read this element as a JSON object (key/value pairs). * * @return The JSON object. * @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an object */ inline operator document::object() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const std::string_view &s) const noexcept; /** * Get the value associated with the given key. * * Note: The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - UNEXPECTED_TYPE if the document is not an object */ inline element_result operator[](const char *s) const noexcept; private: really_inline element(const document *_doc, size_t _json_index) noexcept; friend class document; friend class document::element_result; template friend class minify; }; /** * Represents a JSON array. */ class document::array : protected internal::tape_ref { public: /** Create a new, invalid array */ really_inline array() noexcept; class iterator : tape_ref { public: /** * Get the actual value */ inline element operator*() const noexcept; /** * Get the next value. * * Part of the std::iterator interface. */ inline void operator++() noexcept; /** * Check if these values come from the same place in the JSON. * * Part of the std::iterator interface. */ inline bool operator!=(const iterator& other) const noexcept; private: really_inline iterator(const document *_doc, size_t _json_index) noexcept; friend class array; }; /** * Return the first array element. * * Part of the std::iterable interface. */ inline iterator begin() const noexcept; /** * One past the last array element. * * Part of the std::iterable interface. */ inline iterator end() const noexcept; private: really_inline array(const document *_doc, size_t _json_index) noexcept; friend class document::element; friend class document::element_result; template friend class minify; }; /** * Represents a JSON object. */ class document::object : protected internal::tape_ref { public: /** Create a new, invalid object */ really_inline object() noexcept; class iterator : protected internal::tape_ref { public: /** * Get the actual key/value pair */ inline const document::key_value_pair operator*() const noexcept; /** * Get the next key/value pair. * * Part of the std::iterator interface. */ inline void operator++() noexcept; /** * Check if these key value pairs come from the same place in the JSON. * * Part of the std::iterator interface. */ inline bool operator!=(const iterator& other) const noexcept; /** * Get the key of this key/value pair. */ inline std::string_view key() const noexcept; /** * Get the key of this key/value pair. */ inline const char *key_c_str() const noexcept; /** * Get the value of this key/value pair. */ inline element value() const noexcept; private: really_inline iterator(const document *_doc, size_t _json_index) noexcept; friend class document::object; }; /** * Return the first key/value pair. * * Part of the std::iterable interface. */ inline iterator begin() const noexcept; /** * One past the last key/value pair. * * Part of the std::iterable interface. */ inline iterator end() const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline element_result operator[](const std::string_view &s) const noexcept; /** * Get the value associated with the given key. * * Note: The key will be matched against **unescaped** JSON: * * document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1 * document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline element_result operator[](const char *s) const noexcept; private: really_inline object(const document *_doc, size_t _json_index) noexcept; friend class document::element; friend class document::element_result; template friend class minify; }; /** * Key/value pair in an object. */ class document::key_value_pair { public: std::string_view key; document::element value; private: really_inline key_value_pair(std::string_view _key, document::element _value) noexcept; friend class document::object; }; /** The result of a JSON navigation that may fail. */ class document::element_result : public simdjson_result { public: really_inline element_result(element value) noexcept; really_inline element_result(error_code error) noexcept; /** Whether this is a JSON `null` */ inline simdjson_result is_null() const noexcept; inline simdjson_result as_bool() const noexcept; inline simdjson_result as_string() const noexcept; inline simdjson_result as_c_str() const noexcept; inline simdjson_result as_uint64_t() const noexcept; inline simdjson_result as_int64_t() const noexcept; inline simdjson_result as_double() const noexcept; inline array_result as_array() const noexcept; inline object_result as_object() const noexcept; inline element_result operator[](const std::string_view &s) const noexcept; inline element_result operator[](const char *s) const noexcept; #if SIMDJSON_EXCEPTIONS inline operator bool() const noexcept(false); inline explicit operator const char*() const noexcept(false); inline operator std::string_view() const noexcept(false); inline operator uint64_t() const noexcept(false); inline operator int64_t() const noexcept(false); inline operator double() const noexcept(false); inline operator array() const noexcept(false); inline operator object() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; /** The result of a JSON conversion that may fail. */ class document::array_result : public simdjson_result { public: really_inline array_result(array value) noexcept; really_inline array_result(error_code error) noexcept; #if SIMDJSON_EXCEPTIONS inline array::iterator begin() const noexcept(false); inline array::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; /** The result of a JSON conversion that may fail. */ class document::object_result : public simdjson_result { public: really_inline object_result(object value) noexcept; really_inline object_result(error_code error) noexcept; inline element_result operator[](const std::string_view &s) const noexcept; inline element_result operator[](const char *s) const noexcept; #if SIMDJSON_EXCEPTIONS inline object::iterator begin() const noexcept(false); inline object::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; /** * A persistent document parser. * * The parser is designed to be reused, holding the internal buffers necessary to do parsing, * as well as memory for a single document. The parsed document is overwritten on each parse. * * This class cannot be copied, only moved, to avoid unintended allocations. * * @note This is not thread safe: one parser cannot produce two documents at the same time! */ class document::parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. * * @param max_capacity The maximum document length the parser can automatically handle. The parser * will allocate more capacity on an as needed basis (when it sees documents too big to handle) * up to this amount. The parser still starts with zero capacity no matter what this number is: * to allocate an initial capacity, call set_capacity() after constructing the parser. Defaults * to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). * @param max_depth The maximum depth--number of nested objects and arrays--this parser can handle. * This will not be allocated until parse() is called for the first time. Defaults to * DEFAULT_MAX_DEPTH. */ really_inline parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; ~parser()=default; /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ parser(document::parser &&other) = default; parser(const document::parser &) = delete; // Disallow copying /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ parser &operator=(document::parser &&other) = default; parser &operator=(const document::parser &) = delete; // Disallow copying /** * Load a JSON document from a file and return a reference to it. * * document::parser parser; * const document &doc = parser.load("jsonexamples/twitter.json"); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### Parser Capacity * * If the parser's current capacity is less than the file length, it will allocate enough capacity * to handle it (up to max_capacity). * * @param path The path to load. * @return The document, or an error: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ inline doc_result load(const std::string& path) noexcept; /** * Load a file containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(path)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The file must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.load_many(path)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. */ inline document::stream load_many(const std::string& path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(buf, len); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding, * and it is copied into an enlarged temporary buffer before parsing. * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return The document, or an error: * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, * and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ inline doc_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(buf, len); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding, * and it is copied into an enlarged temporary buffer before parsing. * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return The document, or an error: * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, * and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ really_inline doc_result parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(s); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * If s.capacity() is less than SIMDJSON_PADDING, the string will be copied into an enlarged * temporary buffer before parsing. * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, or * a new string will be created with the extra padding. * @return The document, or an error: * - MEMALLOC if the string does not have enough padding or the parser does not have * enough capacity, and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ really_inline doc_result parse(const std::string &s) noexcept; /** * Parse a JSON document and return a temporary reference to it. * * document::parser parser; * const document &doc = parser.parse(s); * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The JSON to parse. * @return The document, or an error: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ really_inline doc_result parse(const padded_string &s) noexcept; // We do not want to allow implicit conversion from C string to std::string. really_inline doc_result parse(const char *buf) noexcept = delete; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. */ inline stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails */ inline stream parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails */ inline stream parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * document::parser parser; * for (const document &doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * document::parser parser; * for (auto [doc, error] : parser.parse_many(buf, len)) { * if (error) { cerr << error << endl; exit(1); } * cout << std::string(doc["title"]) << endl; * } * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param s The concatenated JSON to parse. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream. If there is an error, it will be returned during iteration. An empty input * will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails */ inline stream parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; // We do not want to allow implicit conversion from C string to std::string. really_inline doc_result parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** * The largest document this parser can automatically support. * * The parser may reallocate internal buffers as needed up to this amount. * * @return Maximum capacity, in bytes. */ really_inline size_t max_capacity() const noexcept; /** * The largest document this parser can support without reallocating. * * @return Current capacity, in bytes. */ really_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ really_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. * * The parser may reallocate internal buffers as needed up to this amount. * * This call will not allocate or deallocate, even if capacity is currently above max_capacity. * * @param max_capacity The new maximum capacity, in bytes. */ really_inline void set_max_capacity(size_t max_capacity) noexcept; /** * Set capacity. This is the largest document this parser can support without reallocating. * * This will allocate or deallocate as necessary. * * @param capacity The new capacity, in bytes. * * @return MEMALLOC if unsuccessful, SUCCESS otherwise. */ WARN_UNUSED inline error_code set_capacity(size_t capacity) noexcept; /** * Set the maximum level of nested object and arrays supported by this parser. * * This will allocate or deallocate as necessary. * * @param max_depth The new maximum depth, in bytes. * * @return MEMALLOC if unsuccessful, SUCCESS otherwise. */ WARN_UNUSED inline error_code set_max_depth(size_t max_depth) noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * Equivalent to calling set_capacity() and set_max_depth(). * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return true if successful, false if allocation failed. */ WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; // type aliases for backcompat using Iterator = document::iterator; using InvalidJSON = simdjson_error; // Next location to write to in the tape uint32_t current_loc{0}; // structural indices passed from stage 1 to stage 2 uint32_t n_structural_indexes{0}; std::unique_ptr structural_indexes; // location and return address of each open { or [ std::unique_ptr containing_scope_offset; #ifdef SIMDJSON_USE_COMPUTED_GOTO std::unique_ptr ret_address; #else std::unique_ptr ret_address; #endif // Next place to write a string uint8_t *current_string_buf_loc; bool valid{false}; error_code error{UNINITIALIZED}; // Document we're writing to document doc; // // TODO these are deprecated; use the results of parse instead. // // returns true if the document parsed was valid inline bool is_valid() const noexcept; // return an error code corresponding to the last parsing attempt, see // simdjson.h will return UNITIALIZED if no parsing was attempted inline int get_error_code() const noexcept; // return the string equivalent of "get_error_code" inline std::string get_error_message() const noexcept; // print the json to std::ostream (should be valid) // return false if the tape is likely wrong (e.g., you did not parse a valid // JSON). inline bool print_json(std::ostream &os) const noexcept; inline bool dump_raw_tape(std::ostream &os) const noexcept; // // Parser callbacks: these are internal! // // TODO find a way to do this without exposing the interface or crippling performance // // this should be called when parsing (right before writing the tapes) inline void init_stage2() noexcept; really_inline error_code on_error(error_code new_error_code) noexcept; really_inline error_code on_success(error_code success_code) noexcept; really_inline bool on_start_document(uint32_t depth) noexcept; really_inline bool on_start_object(uint32_t depth) noexcept; really_inline bool on_start_array(uint32_t depth) noexcept; // TODO we're not checking this bool really_inline bool on_end_document(uint32_t depth) noexcept; really_inline bool on_end_object(uint32_t depth) noexcept; really_inline bool on_end_array(uint32_t depth) noexcept; really_inline bool on_true_atom() noexcept; really_inline bool on_false_atom() noexcept; really_inline bool on_null_atom() noexcept; really_inline uint8_t *on_start_string() noexcept; really_inline bool on_end_string(uint8_t *dst) noexcept; really_inline bool on_number_s64(int64_t value) noexcept; really_inline bool on_number_u64(uint64_t value) noexcept; really_inline bool on_number_double(double value) noexcept; private: // // The maximum document length this parser supports. // // Buffers are large enough to handle any document up to this length. // size_t _capacity{0}; // // The maximum document length this parser will automatically support. // // The parser will not be automatically allocated above this amount. // size_t _max_capacity; // // The maximum depth (number of nested objects and arrays) supported by this parser. // // Defaults to DEFAULT_MAX_DEPTH. // size_t _max_depth; // all nodes are stored on the doc.tape using a 64-bit word. // // strings, double and ints are stored as // a 64-bit word with a pointer to the actual value // // // // for objects or arrays, store [ or { at the beginning and } and ] at the // end. For the openings ([ or {), we annotate them with a reference to the // location on the doc.tape of the end, and for then closings (} and ]), we // annotate them with a reference to the location of the opening // // inline void write_tape(uint64_t val, internal::tape_type t) noexcept; inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) noexcept; // Ensure we have enough capacity to handle at least desired_capacity bytes, // and auto-allocate if not. inline error_code ensure_capacity(size_t desired_capacity) noexcept; #if SIMDJSON_EXCEPTIONS // Used internally to get the document inline const document &get_document() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS template friend class document_iterator; friend class document::stream; }; // class parser /** * Minifies a JSON element or document, printing the smallest possible valid JSON. * * document doc = document::parse(" [ 1 , 2 , 3 ] "_pad); * cout << minify(doc) << endl; // prints [1,2,3] * */ template class minify { public: /** * Create a new minifier. * * @param _value The document or element to minify. */ inline minify(const T &_value) noexcept : value{_value} {} /** * Minify JSON to a string. */ inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } /** * Minify JSON to an output stream. */ inline std::ostream& print(std::ostream& out); private: const T &value; }; /** * Minify JSON to an output stream. * * @param out The output stream. * @param formatter The minifier. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ template inline std::ostream& operator<<(std::ostream& out, minify formatter) { return formatter.print(out); } /** * Print JSON to an output stream. * * By default, the document will be printed minified. * * @param out The output stream. * @param value The document to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document &value) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::element &value) { return out << minify(value); }; /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::array &value) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::object &value) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const document::key_value_pair &value) { return out << minify(value); } #if SIMDJSON_EXCEPTIONS /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::doc_move_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::doc_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::element_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::array_result &value) noexcept(false) { return out << minify(value); } /** * Print JSON to an output stream. * * By default, the value will be printed minified. * * @param out The output stream. * @param value The value to print. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, const document::object_result &value) noexcept(false) { return out << minify(value); } #endif } // namespace simdjson #endif // SIMDJSON_DOCUMENT_H /* end file include/simdjson/simdjson.h */ namespace simdjson { /** * An implementation of simdjson for a particular CPU architecture. * * Also used to maintain the currently active implementation. The active implementation is * automatically initialized on first use to the most advanced implementation supported by the host. */ class implementation { public: /** * The name of this implementation. * * const implementation *impl = simdjson::active_implementation; * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" */ virtual const std::string &name() const { return _name; } /** * The description of this implementation. * * const implementation *impl = simdjson::active_implementation; * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" */ virtual const std::string &description() const { return _description; } /** * The instruction sets this implementation is compiled against. * * @return a mask of all required `instruction_set` values */ virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; /** * Run a full document parse (ensure_capacity, stage1 and stage2). * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @return the error code, or SUCCESS if there was no error. */ WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0; /** * Stage 1 of the document parser. * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @param streaming whether this is being called by document::parser::parse_many. * @return the error code, or SUCCESS if there was no error. */ WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept = 0; /** * Stage 2 of the document parser. * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @return the error code, or SUCCESS if there was no error. */ WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0; /** * Stage 2 of the document parser for document::parser::parse_many. * * Overridden by each implementation. * * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len the length of the json document. * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. * @param next_json the next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time. * @return the error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again. */ WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept = 0; protected: really_inline implementation( const std::string &name, const std::string &description, uint32_t required_instruction_sets ) : _name(name), _description(description), _required_instruction_sets(required_instruction_sets) { } private: /** * The name of this implementation. */ const std::string _name; /** * The description of this implementation. */ const std::string _description; /** * Instruction sets required for this implementation. */ const uint32_t _required_instruction_sets; }; namespace internal { /** * The list of available implementations compiled into simdjson. */ class available_implementation_list { public: /** Get the list of available implementations compiled into simdjson */ really_inline available_implementation_list() {} /** Number of implementations */ size_t size() const noexcept; /** STL const begin() iterator */ const implementation * const *begin() const noexcept; /** STL const end() iterator */ const implementation * const *end() const noexcept; /** * Get the implementation with the given name. * * Case sensitive. * * const implementation *impl = simdjson::available_implementations["westmere"]; * if (!impl) { exit(1); } * simdjson::active_implementation = impl; * * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" * @return the implementation, or nullptr if the parse failed. */ const implementation * operator[](const std::string& name) const noexcept { for (const implementation * impl : *this) { if (impl->name() == name) { return impl; } } return nullptr; } /** * Detect the most advanced implementation supported by the current host. * * This is used to initialize the implementation on startup. * * const implementation *impl = simdjson::available_implementation::detect_best_supported(); * simdjson::active_implementation = impl; * * @return the most advanced supported implementation for the current host, or an * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported * implementation. Will never return nullptr. */ const implementation *detect_best_supported() const noexcept; }; // Detects best supported implementation on first use, and sets it class detect_best_supported_implementation_on_first_use final : public implementation { public: const std::string& name() const noexcept final { return set_best()->name(); } const std::string& description() const noexcept final { return set_best()->description(); } uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final { return set_best()->parse(buf, len, parser); } WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final { return set_best()->stage1(buf, len, parser, streaming); } WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final { return set_best()->stage2(buf, len, parser); } WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final { return set_best()->stage2(buf, len, parser, next_json); } really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} private: const implementation *set_best() const noexcept; }; inline const detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; template class atomic_ptr { public: atomic_ptr(T *_ptr) : ptr{_ptr} {} operator const T*() const { return ptr.load(); } const T& operator*() const { return *ptr; } const T* operator->() const { return ptr.load(); } operator T*() { return ptr.load(); } T& operator*() { return *ptr; } T* operator->() { return ptr.load(); } T* operator=(T *_ptr) { return ptr = _ptr; } private: std::atomic ptr; }; } // namespace [simdjson::]internal /** * The list of available implementations compiled into simdjson. */ inline const internal::available_implementation_list available_implementations; /** * The active implementation. * * Automatically initialized on first use to the most advanced implementation supported by this hardware. */ inline internal::atomic_ptr active_implementation = &internal::detect_best_supported_implementation_on_first_use_singleton; } // namespace simdjson #endif // SIMDJSON_IMPLEMENTATION_H /* end file include/simdjson/simdjson.h */ /* begin file include/simdjson/document_stream.h */ #ifndef SIMDJSON_DOCUMENT_STREAM_H #define SIMDJSON_DOCUMENT_STREAM_H #include namespace simdjson { template class JsonStream; /** * A forward-only stream of documents. * * Produced by document::parser::parse_many. * */ class document::stream { public: really_inline ~stream() noexcept; /** * An iterator through a forward-only stream of documents. */ class iterator { public: /** * Get the current document (or error). */ really_inline doc_result operator*() noexcept; /** * Advance to the next document. */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ really_inline bool operator!=(const iterator &other) const noexcept; private: iterator(stream& stream, bool finished) noexcept; /** The stream parser we're iterating through. */ stream& _stream; /** Whether we're finished or not. */ bool finished; friend class stream; }; /** * Start iterating the documents in the stream. */ really_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ really_inline iterator end() noexcept; private: stream &operator=(const document::stream &) = delete; // Disallow copying stream(document::stream &other) = delete; // Disallow copying really_inline stream(document::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, error_code error = SUCCESS) noexcept; /** * Parse the next document found in the buffer previously given to stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are * discouraged. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * stream object. * * The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in case * of success and indicates that the buffer still contains more data to be parsed, * meaning this function can be called again to return the next JSON document * after this one. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline error_code json_parse() noexcept; /** * Returns the location (index) of where the next document should be in the * buffer. * Can be used for debugging, it tells the user the position of the end of the * last * valid JSON document parsed */ inline size_t get_current_buffer_loc() const { return current_buffer_loc; } /** * Returns the total amount of complete documents parsed by the stream, * in the current buffer, at the given time. */ inline size_t get_n_parsed_docs() const { return n_parsed_docs; } /** * Returns the total amount of data (in bytes) parsed by the stream, * in the current buffer, at the given time. */ inline size_t get_n_bytes_parsed() const { return n_bytes_parsed; } inline const uint8_t *buf() const { return _buf + buf_start; } inline void advance(size_t offset) { buf_start += offset; } inline size_t remaining() const { return _len - buf_start; } document::parser &parser; const uint8_t *_buf; const size_t _len; size_t _batch_size; // this is actually variable! size_t buf_start{0}; size_t next_json{0}; bool load_next_batch{true}; size_t current_buffer_loc{0}; #ifdef SIMDJSON_THREADS_ENABLED size_t last_json_buffer_loc{0}; #endif size_t n_parsed_docs{0}; size_t n_bytes_parsed{0}; error_code error{SUCCESS_AND_HAS_MORE}; #ifdef SIMDJSON_THREADS_ENABLED error_code stage1_is_ok_thread{SUCCESS}; std::thread stage_1_thread; document::parser parser_thread; #endif template friend class JsonStream; friend class document::parser; }; // class document::stream } // end of namespace simdjson #endif // SIMDJSON_DOCUMENT_STREAM_H /* end file include/simdjson/document_stream.h */ /* begin file include/simdjson/jsonminifier.h */ #ifndef SIMDJSON_JSONMINIFIER_H #define SIMDJSON_JSONMINIFIER_H #include #include #include namespace simdjson { // Take input from buf and remove useless whitespace, write it to out; buf and // out can be the same pointer. Result is null terminated, // return the string length (minus the null termination). // The accelerated version of this function only runs on AVX2 hardware. size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out); static inline size_t json_minify(const char *buf, size_t len, char *out) { return json_minify(reinterpret_cast(buf), len, reinterpret_cast(out)); } static inline size_t json_minify(const std::string_view &p, char *out) { return json_minify(p.data(), p.size(), out); } static inline size_t json_minify(const padded_string &p, char *out) { return json_minify(p.data(), p.size(), out); } } // namespace simdjson #endif // SIMDJSON_JSONMINIFIER_H /* end file include/simdjson/jsonminifier.h */ // Deprecated API /* begin file include/simdjson/parsedjsoniterator.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_PARSEDJSONITERATOR_H #define SIMDJSON_PARSEDJSONITERATOR_H /* begin file include/simdjson/document_iterator.h */ #ifndef SIMDJSON_DOCUMENT_ITERATOR_H #define SIMDJSON_DOCUMENT_ITERATOR_H #include #include #include #include #include #include /* begin file include/simdjson/internal/jsonformatutils.h */ #ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H #define SIMDJSON_INTERNAL_JSONFORMATUTILS_H #include #include #include namespace simdjson::internal { class escape_json_string; inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); class escape_json_string { public: escape_json_string(std::string_view _str) noexcept : str{_str} {} operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } private: std::string_view str; friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); }; inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { for (size_t i=0; i(unescaped.str[i]); out.flags(f); } else { out << unescaped.str[i]; } } } return out; } } // namespace simdjson::internal #endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H /* end file include/simdjson/internal/jsonformatutils.h */ namespace simdjson { template class document_iterator { public: #if SIMDJSON_EXCEPTIONS document_iterator(const document::parser &parser) noexcept(false); #endif document_iterator(const document &doc) noexcept; document_iterator(const document_iterator &o) noexcept; document_iterator &operator=(const document_iterator &o) noexcept; inline bool is_ok() const; // useful for debugging purposes inline size_t get_tape_location() const; // useful for debugging purposes inline size_t get_tape_length() const; // returns the current depth (start at 1 with 0 reserved for the fictitious // root node) inline size_t get_depth() const; // A scope is a series of nodes at the same depth, typically it is either an // object ({) or an array ([). The root node has type 'r'. inline uint8_t get_scope_type() const; // move forward in document order inline bool move_forward(); // retrieve the character code of what we're looking at: // [{"slutfn are the possibilities inline uint8_t get_type() const { return current_type; // short functions should be inlined! } // get the int64_t value at this node; valid only if get_type is "l" inline int64_t get_integer() const { if (location + 1 >= tape_length) { return 0; // default value in case of error } return static_cast(doc.tape[location + 1]); } // get the value as uint64; valid only if if get_type is "u" inline uint64_t get_unsigned_integer() const { if (location + 1 >= tape_length) { return 0; // default value in case of error } return doc.tape[location + 1]; } // get the string value at this node (NULL ended); valid only if get_type is " // note that tabs, and line endings are escaped in the returned value (see // print_with_escapes) return value is valid UTF-8, it may contain NULL chars // within the string: get_string_length determines the true string length. inline const char *get_string() const { return reinterpret_cast( doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); } // return the length of the string in bytes inline uint32_t get_string_length() const { uint32_t answer; memcpy(&answer, reinterpret_cast(doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK)), sizeof(uint32_t)); return answer; } // get the double value at this node; valid only if // get_type() is "d" inline double get_double() const { if (location + 1 >= tape_length) { return std::numeric_limits::quiet_NaN(); // default value in // case of error } double answer; memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); return answer; } inline bool is_object_or_array() const { return is_object() || is_array(); } inline bool is_object() const { return get_type() == '{'; } inline bool is_array() const { return get_type() == '['; } inline bool is_string() const { return get_type() == '"'; } // Returns true if the current type of node is an signed integer. // You can get its value with `get_integer()`. inline bool is_integer() const { return get_type() == 'l'; } // Returns true if the current type of node is an unsigned integer. // You can get its value with `get_unsigned_integer()`. // // NOTE: // Only a large value, which is out of range of a 64-bit signed integer, is // represented internally as an unsigned node. On the other hand, a typical // positive integer, such as 1, 42, or 1000000, is as a signed node. // Be aware this function returns false for a signed node. inline bool is_unsigned_integer() const { return get_type() == 'u'; } inline bool is_double() const { return get_type() == 'd'; } inline bool is_number() const { return is_integer() || is_unsigned_integer() || is_double(); } inline bool is_true() const { return get_type() == 't'; } inline bool is_false() const { return get_type() == 'f'; } inline bool is_null() const { return get_type() == 'n'; } static bool is_object_or_array(uint8_t type) { return ((type == '[') || (type == '{')); } // when at {, go one level deep, looking for a given key // if successful, we are left pointing at the value, // if not, we are still pointing at the object ({) // (in case of repeated keys, this only finds the first one). // We seek the key using C's strcmp so if your JSON strings contain // NULL chars, this would trigger a false positive: if you expect that // to be the case, take extra precautions. // Furthermore, we do the comparison character-by-character // without taking into account Unicode equivalence. inline bool move_to_key(const char *key); // as above, but case insensitive lookup (strcmpi instead of strcmp) inline bool move_to_key_insensitive(const char *key); // when at {, go one level deep, looking for a given key // if successful, we are left pointing at the value, // if not, we are still pointing at the object ({) // (in case of repeated keys, this only finds the first one). // The string we search for can contain NULL values. // Furthermore, we do the comparison character-by-character // without taking into account Unicode equivalence. inline bool move_to_key(const char *key, uint32_t length); // when at a key location within an object, this moves to the accompanying // value (located next to it). This is equivalent but much faster than // calling "next()". inline void move_to_value(); // when at [, go one level deep, and advance to the given index. // if successful, we are left pointing at the value, // if not, we are still pointing at the array ([) inline bool move_to_index(uint32_t index); // Moves the iterator to the value corresponding to the json pointer. // Always search from the root of the document. // if successful, we are left pointing at the value, // if not, we are still pointing the same value we were pointing before the // call. The json pointer follows the rfc6901 standard's syntax: // https://tools.ietf.org/html/rfc6901 However, the standard says "If a // referenced member name is not unique in an object, the member that is // referenced is undefined, and evaluation fails". Here we just return the // first corresponding value. The length parameter is the length of the // jsonpointer string ('pointer'). bool move_to(const char *pointer, uint32_t length); // Moves the iterator to the value corresponding to the json pointer. // Always search from the root of the document. // if successful, we are left pointing at the value, // if not, we are still pointing the same value we were pointing before the // call. The json pointer implementation follows the rfc6901 standard's // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says // "If a referenced member name is not unique in an object, the member that // is referenced is undefined, and evaluation fails". Here we just return // the first corresponding value. inline bool move_to(const std::string &pointer) { return move_to(pointer.c_str(), pointer.length()); } private: // Almost the same as move_to(), except it searches from the current // position. The pointer's syntax is identical, though that case is not // handled by the rfc6901 standard. The '/' is still required at the // beginning. However, contrary to move_to(), the URI Fragment Identifier // Representation is not supported here. Also, in case of failure, we are // left pointing at the closest value it could reach. For these reasons it // is private. It exists because it is used by move_to(). bool relative_move_to(const char *pointer, uint32_t length); public: // throughout return true if we can do the navigation, false // otherwise // Withing a given scope (series of nodes at the same depth within either an // array or an object), we move forward. // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { // and [. At the object ({) or at the array ([), you can issue a "down" to // visit their content. valid if we're not at the end of a scope (returns // true). inline bool next(); // Within a given scope (series of nodes at the same depth within either an // array or an object), we move backward. // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true // when starting at the end of the scope. At the object ({) or at the array // ([), you can issue a "down" to visit their content. // Performance warning: This function is implemented by starting again // from the beginning of the scope and scanning forward. You should expect // it to be relatively slow. inline bool prev(); // Moves back to either the containing array or object (type { or [) from // within a contained scope. // Valid unless we are at the first level of the document inline bool up(); // Valid if we're at a [ or { and it starts a non-empty scope; moves us to // start of that deeper scope if it not empty. Thus, given [true, null, // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. inline bool down(); // move us to the start of our current scope, // a scope is a series of nodes at the same level inline void to_start_scope(); inline void rewind() { while (up()) ; } // void to_end_scope(); // move us to // the start of our current scope; always succeeds // print the node we are currently pointing at bool print(std::ostream &os, bool escape_strings = true) const; typedef struct { size_t start_of_scope; uint8_t scope_type; } scopeindex_t; private: const document &doc; size_t depth; size_t location; // our current location on a tape size_t tape_length; uint8_t current_type; uint64_t current_val; scopeindex_t depth_index[max_depth]; }; } // namespace simdjson #endif // SIMDJSON_DOCUMENT_ITERATOR_H /* end file include/simdjson/internal/jsonformatutils.h */ #endif /* end file include/simdjson/internal/jsonformatutils.h */ /* begin file include/simdjson/jsonparser.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_JSONPARSER_H #define SIMDJSON_JSONPARSER_H /* begin file include/simdjson/parsedjson.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_PARSEDJSON_H #define SIMDJSON_PARSEDJSON_H namespace simdjson { using ParsedJson = document::parser; } // namespace simdjson #endif /* end file include/simdjson/parsedjson.h */ /* begin file include/simdjson/jsonioutil.h */ #ifndef SIMDJSON_JSONIOUTIL_H #define SIMDJSON_JSONIOUTIL_H #include #include #include #include #include #include namespace simdjson { #if SIMDJSON_EXCEPTIONS inline padded_string get_corpus(const std::string &filename) { return padded_string::load(filename); } #endif // SIMDJSON_EXCEPTIONS } // namespace simdjson #endif // SIMDJSON_JSONIOUTIL_H /* end file include/simdjson/jsonioutil.h */ namespace simdjson { // // C API (json_parse and build_parsed_json) declarations // inline int json_parse(const uint8_t *buf, size_t len, document::parser &parser, bool realloc_if_needed = true) noexcept { error_code code = parser.parse(buf, len, realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return code; } inline int json_parse(const char *buf, size_t len, document::parser &parser, bool realloc_if_needed = true) noexcept { return json_parse(reinterpret_cast(buf), len, parser, realloc_if_needed); } inline int json_parse(const std::string &s, document::parser &parser, bool realloc_if_needed = true) noexcept { return json_parse(s.data(), s.length(), parser, realloc_if_needed); } inline int json_parse(const padded_string &s, document::parser &parser) noexcept { return json_parse(s.data(), s.length(), parser, false); } WARN_UNUSED static document::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { document::parser parser; json_parse(buf, len, parser, realloc_if_needed); return parser; } WARN_UNUSED inline document::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { return build_parsed_json(reinterpret_cast(buf), len, realloc_if_needed); } WARN_UNUSED inline document::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { return build_parsed_json(s.data(), s.length(), realloc_if_needed); } WARN_UNUSED inline document::parser build_parsed_json(const padded_string &s) noexcept { return build_parsed_json(s.data(), s.length(), false); } // We do not want to allow implicit conversion from C string to std::string. int json_parse(const char *buf, document::parser &parser) noexcept = delete; document::parser build_parsed_json(const char *buf) noexcept = delete; } // namespace simdjson #endif /* end file include/simdjson/jsonioutil.h */ /* begin file include/simdjson/jsonstream.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_JSONSTREAM_H #define SIMDJSON_JSONSTREAM_H namespace simdjson { /** * @deprecated use document::stream instead. * * The main motivation for this piece of software is to achieve maximum speed and offer * good quality of life while parsing files containing multiple JSON documents. * * Since we want to offer flexibility and not restrict ourselves to a specific file * format, we support any file that contains any valid JSON documents separated by one * or more character that is considered a whitespace by the JSON spec. * Namely: space, nothing, linefeed, carriage return, horizontal tab. * Anything that is not whitespace will be parsed as a JSON document and could lead * to failure. * * To offer maximum parsing speed, our implementation processes the data inside the * buffer by batches and their size is defined by the parameter "batch_size". * By loading data in batches, we can optimize the time spent allocating data in the * parser and can also open the possibility of multi-threading. * The batch_size must be at least as large as the biggest document in the file, but * not too large in order to submerge the chached memory. We found that 1MB is * somewhat a sweet spot for now. Eventually, this batch_size could be fully * automated and be optimal at all times. * * The template parameter (string_container) must * support the data() and size() methods, returning a pointer * to a char* and to the number of bytes respectively. * The simdjson parser may read up to SIMDJSON_PADDING bytes beyond the end * of the string, so if you do not use a padded_string container, * you have the responsibility to overallocated. If you fail to * do so, your software may crash if you cross a page boundary, * and you should expect memory checkers to object. * Most users should use a simdjson::padded_string. */ template class JsonStream { public: /* Create a JsonStream object that can be used to parse sequentially the valid * JSON documents found in the buffer "buf". * * The batch_size must be at least as large as the biggest document in the * file, but * not too large to submerge the cached memory. We found that 1MB is * somewhat a sweet spot for now. * * The user is expected to call the following json_parse method to parse the * next * valid JSON document found in the buffer. This method can and is expected * to be * called in a loop. * * Various methods are offered to keep track of the status, like * get_current_buffer_loc, * get_n_parsed_docs, get_n_bytes_parsed, etc. * * */ JsonStream(const string_container &s, size_t _batch_size = 1000000) noexcept; ~JsonStream() noexcept; /* Parse the next document found in the buffer previously given to JsonStream. * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are * discouraged. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * JsonStream object. * * The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in case * of success and indicates that the buffer still contains more data to be parsed, * meaning this function can be called again to return the next JSON document * after this one. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a * string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ int json_parse(document::parser &parser) noexcept; /* Returns the location (index) of where the next document should be in the * buffer. * Can be used for debugging, it tells the user the position of the end of the * last * valid JSON document parsed*/ inline size_t get_current_buffer_loc() const noexcept { return stream ? stream->current_buffer_loc : 0; } /* Returns the total amount of complete documents parsed by the JsonStream, * in the current buffer, at the given time.*/ inline size_t get_n_parsed_docs() const noexcept { return stream ? stream->n_parsed_docs : 0; } /* Returns the total amount of data (in bytes) parsed by the JsonStream, * in the current buffer, at the given time.*/ inline size_t get_n_bytes_parsed() const noexcept { return stream ? stream->n_bytes_parsed : 0; } private: const string_container &str; const size_t batch_size; document::stream *stream{nullptr}; }; // end of class JsonStream } // end of namespace simdjson #endif // SIMDJSON_JSONSTREAM_H /* end file include/simdjson/jsonstream.h */ // Inline functions /* begin file include/simdjson/inline/document.h */ #ifndef SIMDJSON_INLINE_DOCUMENT_H #define SIMDJSON_INLINE_DOCUMENT_H // Inline implementations go in here. #include namespace simdjson { // // element_result inline implementation // really_inline document::element_result::element_result(element value) noexcept : simdjson_result(value) {} really_inline document::element_result::element_result(error_code error) noexcept : simdjson_result(error) {} inline simdjson_result document::element_result::is_null() const noexcept { if (error()) { return error(); } return first.is_null(); } inline simdjson_result document::element_result::as_bool() const noexcept { if (error()) { return error(); } return first.as_bool(); } inline simdjson_result document::element_result::as_c_str() const noexcept { if (error()) { return error(); } return first.as_c_str(); } inline simdjson_result document::element_result::as_string() const noexcept { if (error()) { return error(); } return first.as_string(); } inline simdjson_result document::element_result::as_uint64_t() const noexcept { if (error()) { return error(); } return first.as_uint64_t(); } inline simdjson_result document::element_result::as_int64_t() const noexcept { if (error()) { return error(); } return first.as_int64_t(); } inline simdjson_result document::element_result::as_double() const noexcept { if (error()) { return error(); } return first.as_double(); } inline document::array_result document::element_result::as_array() const noexcept { if (error()) { return error(); } return first.as_array(); } inline document::object_result document::element_result::as_object() const noexcept { if (error()) { return error(); } return first.as_object(); } inline document::element_result document::element_result::operator[](const std::string_view &key) const noexcept { if (error()) { return *this; } return first[key]; } inline document::element_result document::element_result::operator[](const char *key) const noexcept { if (error()) { return *this; } return first[key]; } #if SIMDJSON_EXCEPTIONS inline document::element_result::operator bool() const noexcept(false) { return as_bool(); } inline document::element_result::operator const char *() const noexcept(false) { return as_c_str(); } inline document::element_result::operator std::string_view() const noexcept(false) { return as_string(); } inline document::element_result::operator uint64_t() const noexcept(false) { return as_uint64_t(); } inline document::element_result::operator int64_t() const noexcept(false) { return as_int64_t(); } inline document::element_result::operator double() const noexcept(false) { return as_double(); } inline document::element_result::operator document::array() const noexcept(false) { return as_array(); } inline document::element_result::operator document::object() const noexcept(false) { return as_object(); } #endif // // array_result inline implementation // really_inline document::array_result::array_result(array value) noexcept : simdjson_result(value) {} really_inline document::array_result::array_result(error_code error) noexcept : simdjson_result(error) {} #if SIMDJSON_EXCEPTIONS inline document::array::iterator document::array_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } inline document::array::iterator document::array_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } #endif // SIMDJSON_EXCEPTIONS // // object_result inline implementation // really_inline document::object_result::object_result(object value) noexcept : simdjson_result(value) {} really_inline document::object_result::object_result(error_code error) noexcept : simdjson_result(error) {} inline document::element_result document::object_result::operator[](const std::string_view &key) const noexcept { if (error()) { return error(); } return first[key]; } inline document::element_result document::object_result::operator[](const char *key) const noexcept { if (error()) { return error(); } return first[key]; } #if SIMDJSON_EXCEPTIONS inline document::object::iterator document::object_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } inline document::object::iterator document::object_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } #endif // SIMDJSON_EXCEPTIONS // // document inline implementation // inline document::element document::root() const noexcept { return element(this, 1); } inline document::array_result document::as_array() const noexcept { return root().as_array(); } inline document::object_result document::as_object() const noexcept { return root().as_object(); } inline document::operator element() const noexcept { return root(); } #if SIMDJSON_EXCEPTIONS inline document::operator document::array() const noexcept(false) { return root(); } inline document::operator document::object() const noexcept(false) { return root(); } #endif inline document::element_result document::operator[](const std::string_view &key) const noexcept { return root()[key]; } inline document::element_result document::operator[](const char *key) const noexcept { return root()[key]; } inline document::doc_move_result document::load(const std::string &path) noexcept { document::parser parser; auto [doc, error] = parser.load(path); return doc_move_result((document &&)doc, error); } inline document::doc_move_result document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept { document::parser parser; auto [doc, error] = parser.parse(buf, len, realloc_if_needed); return doc_move_result((document &&)doc, error); } really_inline document::doc_move_result document::parse(const char *buf, size_t len, bool realloc_if_needed) noexcept { return parse((const uint8_t *)buf, len, realloc_if_needed); } really_inline document::doc_move_result document::parse(const std::string &s) noexcept { return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } really_inline document::doc_move_result document::parse(const padded_string &s) noexcept { return parse(s.data(), s.length(), false); } WARN_UNUSED inline error_code document::set_capacity(size_t capacity) noexcept { if (capacity == 0) { string_buf.reset(); tape.reset(); return SUCCESS; } // a pathological input like "[[[[..." would generate len tape elements, so // need a capacity of at least len + 1, but it is also possible to do // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" //where len + 1 tape elements are // generated, see issue https://github.com/lemire/simdjson/issues/345 size_t tape_capacity = ROUNDUP_N(capacity + 2, 64); // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t string_capacity = ROUNDUP_N(5 * capacity / 3 + 32, 64); string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); tape.reset(new (std::nothrow) uint64_t[tape_capacity]); return string_buf && tape ? SUCCESS : MEMALLOC; } inline bool document::dump_raw_tape(std::ostream &os) const noexcept { uint32_t string_length; size_t tape_idx = 0; uint64_t tape_val = tape[tape_idx]; uint8_t type = (tape_val >> 56); os << tape_idx << " : " << type; tape_idx++; size_t how_many = 0; if (type == 'r') { how_many = tape_val & internal::JSON_VALUE_MASK; } else { // Error: no starting root node? return false; } os << "\t// pointing to " << how_many << " (right after last node)\n"; uint64_t payload; for (; tape_idx < how_many; tape_idx++) { os << tape_idx << " : "; tape_val = tape[tape_idx]; payload = tape_val & internal::JSON_VALUE_MASK; type = (tape_val >> 56); switch (type) { case '"': // we have a string os << "string \""; memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); os << internal::escape_json_string(std::string_view( (const char *)(string_buf.get() + payload + sizeof(uint32_t)), string_length )); os << '"'; os << '\n'; break; case 'l': // we have a long int if (tape_idx + 1 >= how_many) { return false; } os << "integer " << static_cast(tape[++tape_idx]) << "\n"; break; case 'u': // we have a long uint if (tape_idx + 1 >= how_many) { return false; } os << "unsigned integer " << tape[++tape_idx] << "\n"; break; case 'd': // we have a double os << "float "; if (tape_idx + 1 >= how_many) { return false; } double answer; memcpy(&answer, &tape[++tape_idx], sizeof(answer)); os << answer << '\n'; break; case 'n': // we have a null os << "null\n"; break; case 't': // we have a true os << "true\n"; break; case 'f': // we have a false os << "false\n"; break; case '{': // we have an object os << "{\t// pointing to next tape location " << payload << " (first node after the scope) \n"; break; case '}': // we end an object os << "}\t// pointing to previous tape location " << payload << " (start of the scope) \n"; break; case '[': // we start an array os << "[\t// pointing to next tape location " << payload << " (first node after the scope) \n"; break; case ']': // we end an array os << "]\t// pointing to previous tape location " << payload << " (start of the scope) \n"; break; case 'r': // we start and end with the root node // should we be hitting the root node? return false; default: return false; } } tape_val = tape[tape_idx]; payload = tape_val & internal::JSON_VALUE_MASK; type = (tape_val >> 56); os << tape_idx << " : " << type << "\t// pointing to " << payload << " (start root)\n"; return true; } // // doc_result inline implementation // inline document::doc_result::doc_result(document &doc, error_code error) noexcept : simdjson_result(doc, error) { } inline document::array_result document::doc_result::as_array() const noexcept { if (error()) { return error(); } return first.root().as_array(); } inline document::object_result document::doc_result::as_object() const noexcept { if (error()) { return error(); } return first.root().as_object(); } inline document::element_result document::doc_result::operator[](const std::string_view &key) const noexcept { if (error()) { return error(); } return first[key]; } inline document::element_result document::doc_result::operator[](const char *key) const noexcept { if (error()) { return error(); } return first[key]; } // // doc_move_result inline implementation // inline document::doc_move_result::doc_move_result(document &&doc, error_code error) noexcept : simdjson_move_result(std::move(doc), error) { } inline document::doc_move_result::doc_move_result(document &&doc) noexcept : simdjson_move_result(std::move(doc)) { } inline document::doc_move_result::doc_move_result(error_code error) noexcept : simdjson_move_result(error) { } inline document::array_result document::doc_move_result::as_array() const noexcept { if (error()) { return error(); } return first.root().as_array(); } inline document::object_result document::doc_move_result::as_object() const noexcept { if (error()) { return error(); } return first.root().as_object(); } inline document::element_result document::doc_move_result::operator[](const std::string_view &key) const noexcept { if (error()) { return error(); } return first[key]; } inline document::element_result document::doc_move_result::operator[](const char *key) const noexcept { if (error()) { return error(); } return first[key]; } // // document::parser inline implementation // really_inline document::parser::parser(size_t max_capacity, size_t max_depth) noexcept : _max_capacity{max_capacity}, _max_depth{max_depth} { } inline bool document::parser::is_valid() const noexcept { return valid; } inline int document::parser::get_error_code() const noexcept { return error; } inline std::string document::parser::get_error_message() const noexcept { return error_message(int(error)); } inline bool document::parser::print_json(std::ostream &os) const noexcept { if (!is_valid()) { return false; } os << minify(doc); return true; } inline bool document::parser::dump_raw_tape(std::ostream &os) const noexcept { return is_valid() ? doc.dump_raw_tape(os) : false; } #if SIMDJSON_EXCEPTIONS inline const document &document::parser::get_document() const noexcept(false) { if (!is_valid()) { throw simdjson_error(error); } return doc; } #endif // SIMDJSON_EXCEPTIONS inline document::doc_result document::parser::load(const std::string &path) noexcept { auto [json, _error] = padded_string::load(path); if (_error) { return doc_result(doc, _error); } return parse(json); } inline document::stream document::parser::load_many(const std::string &path, size_t batch_size) noexcept { auto [json, _error] = padded_string::load(path); return stream(*this, reinterpret_cast(json.data()), json.length(), batch_size, _error); } inline document::doc_result document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept { error_code code = ensure_capacity(len); if (code) { return doc_result(doc, code); } if (realloc_if_needed) { const uint8_t *tmp_buf = buf; buf = (uint8_t *)internal::allocate_padded_buffer(len); if (buf == nullptr) return doc_result(doc, MEMALLOC); memcpy((void *)buf, tmp_buf, len); } code = simdjson::active_implementation->parse(buf, len, *this); // We're indicating validity via the doc_result, so set the parse state back to invalid valid = false; error = UNINITIALIZED; if (realloc_if_needed) { aligned_free((void *)buf); // must free before we exit } return doc_result(doc, code); } really_inline document::doc_result document::parser::parse(const char *buf, size_t len, bool realloc_if_needed) noexcept { return parse((const uint8_t *)buf, len, realloc_if_needed); } really_inline document::doc_result document::parser::parse(const std::string &s) noexcept { return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } really_inline document::doc_result document::parser::parse(const padded_string &s) noexcept { return parse(s.data(), s.length(), false); } inline document::stream document::parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { return stream(*this, buf, len, batch_size); } inline document::stream document::parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { return parse_many((const uint8_t *)buf, len, batch_size); } inline document::stream document::parser::parse_many(const std::string &s, size_t batch_size) noexcept { return parse_many(s.data(), s.length(), batch_size); } inline document::stream document::parser::parse_many(const padded_string &s, size_t batch_size) noexcept { return parse_many(s.data(), s.length(), batch_size); } really_inline size_t document::parser::capacity() const noexcept { return _capacity; } really_inline size_t document::parser::max_capacity() const noexcept { return _max_capacity; } really_inline size_t document::parser::max_depth() const noexcept { return _max_depth; } WARN_UNUSED inline error_code document::parser::set_capacity(size_t capacity) noexcept { if (_capacity == capacity) { return SUCCESS; } // Set capacity to 0 until we finish, in case there's an error _capacity = 0; // // Reallocate the document // error_code err = doc.set_capacity(capacity); if (err) { return err; } // // Don't allocate 0 bytes, just return. // if (capacity == 0) { structural_indexes.reset(); return SUCCESS; } // // Initialize stage 1 output // uint32_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7; structural_indexes.reset( new (std::nothrow) uint32_t[max_structures]); // TODO realloc if (!structural_indexes) { return MEMALLOC; } _capacity = capacity; return SUCCESS; } really_inline void document::parser::set_max_capacity(size_t max_capacity) noexcept { _max_capacity = max_capacity; } WARN_UNUSED inline error_code document::parser::set_max_depth(size_t max_depth) noexcept { if (max_depth == _max_depth && ret_address) { return SUCCESS; } _max_depth = 0; if (max_depth == 0) { ret_address.reset(); containing_scope_offset.reset(); return SUCCESS; } // // Initialize stage 2 state // containing_scope_offset.reset(new (std::nothrow) uint32_t[max_depth]); // TODO realloc #ifdef SIMDJSON_USE_COMPUTED_GOTO ret_address.reset(new (std::nothrow) void *[max_depth]); #else ret_address.reset(new (std::nothrow) char[max_depth]); #endif if (!ret_address || !containing_scope_offset) { // Could not allocate memory return MEMALLOC; } _max_depth = max_depth; return SUCCESS; } WARN_UNUSED inline bool document::parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { return !set_capacity(capacity) && !set_max_depth(max_depth); } inline error_code document::parser::ensure_capacity(size_t desired_capacity) noexcept { // If we don't have enough capacity, (try to) automatically bump it. if (unlikely(desired_capacity > capacity())) { if (desired_capacity > max_capacity()) { return error = CAPACITY; } error = set_capacity(desired_capacity); if (error) { return error; } } // Allocate depth-based buffers if they aren't already. error = set_max_depth(max_depth()); if (error) { return error; } // If the last doc was taken, we need to allocate a new one if (!doc.tape) { error = doc.set_capacity(desired_capacity); if (error) { return error; } } return SUCCESS; } // // tape_ref inline implementation // really_inline internal::tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} really_inline internal::tape_ref::tape_ref(const document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} inline size_t internal::tape_ref::after_element() const noexcept { switch (type()) { case tape_type::START_ARRAY: case tape_type::START_OBJECT: return tape_value(); case tape_type::UINT64: case tape_type::INT64: case tape_type::DOUBLE: return json_index + 2; default: return json_index + 1; } } really_inline internal::tape_type internal::tape_ref::type() const noexcept { return static_cast(doc->tape[json_index] >> 56); } really_inline uint64_t internal::tape_ref::tape_value() const noexcept { return doc->tape[json_index] & internal::JSON_VALUE_MASK; } template really_inline T internal::tape_ref::next_tape_value() const noexcept { static_assert(sizeof(T) == sizeof(uint64_t)); return *reinterpret_cast(&doc->tape[json_index + 1]); } inline std::string_view internal::tape_ref::get_string_view() const noexcept { size_t string_buf_index = tape_value(); uint32_t len; memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return std::string_view( reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]), len ); } // // array inline implementation // really_inline document::array::array() noexcept : internal::tape_ref() {} really_inline document::array::array(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) {} inline document::array::iterator document::array::begin() const noexcept { return iterator(doc, json_index + 1); } inline document::array::iterator document::array::end() const noexcept { return iterator(doc, after_element() - 1); } // // document::array::iterator inline implementation // really_inline document::array::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { } inline document::element document::array::iterator::operator*() const noexcept { return element(doc, json_index); } inline bool document::array::iterator::operator!=(const document::array::iterator& other) const noexcept { return json_index != other.json_index; } inline void document::array::iterator::operator++() noexcept { json_index = after_element(); } // // object inline implementation // really_inline document::object::object() noexcept : internal::tape_ref() {} really_inline document::object::object(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }; inline document::object::iterator document::object::begin() const noexcept { return iterator(doc, json_index + 1); } inline document::object::iterator document::object::end() const noexcept { return iterator(doc, after_element() - 1); } inline document::element_result document::object::operator[](const std::string_view &key) const noexcept { iterator end_field = end(); for (iterator field = begin(); field != end_field; ++field) { if (key == field.key()) { return field.value(); } } return NO_SUCH_FIELD; } inline document::element_result document::object::operator[](const char *key) const noexcept { iterator end_field = end(); for (iterator field = begin(); field != end_field; ++field) { if (!strcmp(key, field.key_c_str())) { return field.value(); } } return NO_SUCH_FIELD; } // // document::object::iterator inline implementation // really_inline document::object::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { } inline const document::key_value_pair document::object::iterator::operator*() const noexcept { return key_value_pair(key(), value()); } inline bool document::object::iterator::operator!=(const document::object::iterator& other) const noexcept { return json_index != other.json_index; } inline void document::object::iterator::operator++() noexcept { json_index++; json_index = after_element(); } inline std::string_view document::object::iterator::key() const noexcept { size_t string_buf_index = tape_value(); uint32_t len; memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return std::string_view( reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]), len ); } inline const char* document::object::iterator::key_c_str() const noexcept { return reinterpret_cast(&doc->string_buf[tape_value() + sizeof(uint32_t)]); } inline document::element document::object::iterator::value() const noexcept { return element(doc, json_index + 1); } // // document::key_value_pair inline implementation // inline document::key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept : key(_key), value(_value) {} // // element inline implementation // really_inline document::element::element() noexcept : internal::tape_ref() {} really_inline document::element::element(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { } really_inline bool document::element::is_null() const noexcept { return type() == internal::tape_type::NULL_VALUE; } really_inline bool document::element::is_bool() const noexcept { return type() == internal::tape_type::TRUE_VALUE || type() == internal::tape_type::FALSE_VALUE; } really_inline bool document::element::is_number() const noexcept { return type() == internal::tape_type::UINT64 || type() == internal::tape_type::INT64 || type() == internal::tape_type::DOUBLE; } really_inline bool document::element::is_integer() const noexcept { return type() == internal::tape_type::UINT64 || type() == internal::tape_type::INT64; } really_inline bool document::element::is_string() const noexcept { return type() == internal::tape_type::STRING; } really_inline bool document::element::is_array() const noexcept { return type() == internal::tape_type::START_ARRAY; } really_inline bool document::element::is_object() const noexcept { return type() == internal::tape_type::START_OBJECT; } #if SIMDJSON_EXCEPTIONS inline document::element::operator bool() const noexcept(false) { return as_bool(); } inline document::element::operator const char*() const noexcept(false) { return as_c_str(); } inline document::element::operator std::string_view() const noexcept(false) { return as_string(); } inline document::element::operator uint64_t() const noexcept(false) { return as_uint64_t(); } inline document::element::operator int64_t() const noexcept(false) { return as_int64_t(); } inline document::element::operator double() const noexcept(false) { return as_double(); } inline document::element::operator document::array() const noexcept(false) { return as_array(); } inline document::element::operator document::object() const noexcept(false) { return as_object(); } #endif inline simdjson_result document::element::as_bool() const noexcept { switch (type()) { case internal::tape_type::TRUE_VALUE: return true; case internal::tape_type::FALSE_VALUE: return false; default: return INCORRECT_TYPE; } } inline simdjson_result document::element::as_c_str() const noexcept { switch (type()) { case internal::tape_type::STRING: { size_t string_buf_index = tape_value(); return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } default: return INCORRECT_TYPE; } } inline simdjson_result document::element::as_string() const noexcept { switch (type()) { case internal::tape_type::STRING: return get_string_view(); default: return INCORRECT_TYPE; } } inline simdjson_result document::element::as_uint64_t() const noexcept { switch (type()) { case internal::tape_type::UINT64: return next_tape_value(); case internal::tape_type::INT64: { int64_t result = next_tape_value(); if (result < 0) { return NUMBER_OUT_OF_RANGE; } return static_cast(result); } default: return INCORRECT_TYPE; } } inline simdjson_result document::element::as_int64_t() const noexcept { switch (type()) { case internal::tape_type::UINT64: { uint64_t result = next_tape_value(); // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std if (result > (std::numeric_limits::max)()) { return NUMBER_OUT_OF_RANGE; } return static_cast(result); } case internal::tape_type::INT64: return next_tape_value(); default: return INCORRECT_TYPE; } } inline simdjson_result document::element::as_double() const noexcept { switch (type()) { case internal::tape_type::UINT64: return next_tape_value(); case internal::tape_type::INT64: { return next_tape_value(); int64_t result = tape_value(); if (result < 0) { return NUMBER_OUT_OF_RANGE; } return result; } case internal::tape_type::DOUBLE: return next_tape_value(); default: return INCORRECT_TYPE; } } inline document::array_result document::element::as_array() const noexcept { switch (type()) { case internal::tape_type::START_ARRAY: return array(doc, json_index); default: return INCORRECT_TYPE; } } inline document::object_result document::element::as_object() const noexcept { switch (type()) { case internal::tape_type::START_OBJECT: return object(doc, json_index); default: return INCORRECT_TYPE; } } inline document::element_result document::element::operator[](const std::string_view &key) const noexcept { auto [obj, error] = as_object(); if (error) { return error; } return obj[key]; } inline document::element_result document::element::operator[](const char *key) const noexcept { auto [obj, error] = as_object(); if (error) { return error; } return obj[key]; } // // minify inline implementation // template<> inline std::ostream& minify::print(std::ostream& out) { return out << minify(value.root()); } template<> inline std::ostream& minify::print(std::ostream& out) { using tape_type=internal::tape_type; size_t depth = 0; constexpr size_t MAX_DEPTH = 16; bool is_object[MAX_DEPTH]; is_object[0] = false; bool after_value = false; internal::tape_ref iter(value.doc, value.json_index); do { // print commas after each value if (after_value) { out << ","; } // If we are in an object, print the next key and :, and skip to the next value. if (is_object[depth]) { out << '"' << internal::escape_json_string(iter.get_string_view()) << "\":"; iter.json_index++; } switch (iter.type()) { // Arrays case tape_type::START_ARRAY: { // If we're too deep, we need to recurse to go deeper. depth++; if (unlikely(depth >= MAX_DEPTH)) { out << minify(document::array(iter.doc, iter.json_index)); iter.json_index = iter.tape_value() - 1; // Jump to the ] depth--; break; } // Output start [ out << '['; iter.json_index++; // Handle empty [] (we don't want to come back around and print commas) if (iter.type() == tape_type::END_ARRAY) { out << ']'; depth--; break; } is_object[depth] = false; after_value = false; continue; } // Objects case tape_type::START_OBJECT: { // If we're too deep, we need to recurse to go deeper. depth++; if (unlikely(depth >= MAX_DEPTH)) { out << minify(document::object(iter.doc, iter.json_index)); iter.json_index = iter.tape_value() - 1; // Jump to the } depth--; break; } // Output start { out << '{'; iter.json_index++; // Handle empty {} (we don't want to come back around and print commas) if (iter.type() == tape_type::END_OBJECT) { out << '}'; depth--; break; } is_object[depth] = true; after_value = false; continue; } // Scalars case tape_type::STRING: out << '"' << internal::escape_json_string(iter.get_string_view()) << '"'; break; case tape_type::INT64: out << iter.next_tape_value(); iter.json_index++; // numbers take up 2 spots, so we need to increment extra break; case tape_type::UINT64: out << iter.next_tape_value(); iter.json_index++; // numbers take up 2 spots, so we need to increment extra break; case tape_type::DOUBLE: out << iter.next_tape_value(); iter.json_index++; // numbers take up 2 spots, so we need to increment extra break; case tape_type::TRUE_VALUE: out << "true"; break; case tape_type::FALSE_VALUE: out << "false"; break; case tape_type::NULL_VALUE: out << "null"; break; // These are impossible case tape_type::END_ARRAY: case tape_type::END_OBJECT: case tape_type::ROOT: abort(); } iter.json_index++; after_value = true; // Handle multiple ends in a row while (depth != 0 && (iter.type() == tape_type::END_ARRAY || iter.type() == tape_type::END_OBJECT)) { out << char(iter.type()); depth--; iter.json_index++; } // Stop when we're at depth 0 } while (depth != 0); return out; } template<> inline std::ostream& minify::print(std::ostream& out) { out << '{'; auto pair = value.begin(); auto end = value.end(); if (pair != end) { out << minify(*pair); for (++pair; pair != end; ++pair) { out << "," << minify(*pair); } } return out << '}'; } template<> inline std::ostream& minify::print(std::ostream& out) { out << '['; auto element = value.begin(); auto end = value.end(); if (element != end) { out << minify(*element); for (++element; element != end; ++element) { out << "," << minify(*element); } } return out << ']'; } template<> inline std::ostream& minify::print(std::ostream& out) { return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value; } #if SIMDJSON_EXCEPTIONS template<> inline std::ostream& minify::print(std::ostream& out) { if (value.error()) { throw simdjson_error(value.error()); } return out << minify(value.first); } template<> inline std::ostream& minify::print(std::ostream& out) { if (value.error()) { throw simdjson_error(value.error()); } return out << minify(value.first); } template<> inline std::ostream& minify::print(std::ostream& out) { if (value.error()) { throw simdjson_error(value.error()); } return out << minify(value.first); } template<> inline std::ostream& minify::print(std::ostream& out) { if (value.error()) { throw simdjson_error(value.error()); } return out << minify(value.first); } template<> inline std::ostream& minify::print(std::ostream& out) { if (value.error()) { throw simdjson_error(value.error()); } return out << minify(value.first); } #endif } // namespace simdjson #endif // SIMDJSON_INLINE_DOCUMENT_H /* end file include/simdjson/inline/document.h */ /* begin file include/simdjson/inline/document_iterator.h */ #ifndef SIMDJSON_INLINE_DOCUMENT_ITERATOR_H #define SIMDJSON_INLINE_DOCUMENT_ITERATOR_H namespace simdjson { // Because of template weirdness, the actual class definition is inline in the document class template WARN_UNUSED bool document_iterator::is_ok() const { return location < tape_length; } // useful for debugging purposes template size_t document_iterator::get_tape_location() const { return location; } // useful for debugging purposes template size_t document_iterator::get_tape_length() const { return tape_length; } // returns the current depth (start at 1 with 0 reserved for the fictitious root // node) template size_t document_iterator::get_depth() const { return depth; } // A scope is a series of nodes at the same depth, typically it is either an // object ({) or an array ([). The root node has type 'r'. template uint8_t document_iterator::get_scope_type() const { return depth_index[depth].scope_type; } template bool document_iterator::move_forward() { if (location + 1 >= tape_length) { return false; // we are at the end! } if ((current_type == '[') || (current_type == '{')) { // We are entering a new scope depth++; assert(depth < max_depth); depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; } else if ((current_type == ']') || (current_type == '}')) { // Leaving a scope. depth--; } else if (is_number()) { // these types use 2 locations on the tape, not just one. location += 1; } location += 1; current_val = doc.tape[location]; current_type = (current_val >> 56); return true; } template void document_iterator::move_to_value() { // assume that we are on a key, so move by 1. location += 1; current_val = doc.tape[location]; current_type = (current_val >> 56); } template bool document_iterator::move_to_key(const char *key) { if (down()) { do { const bool right_key = (strcmp(get_string(), key) == 0); move_to_value(); if (right_key) { return true; } } while (next()); up(); } return false; } template bool document_iterator::move_to_key_insensitive( const char *key) { if (down()) { do { const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0); move_to_value(); if (right_key) { return true; } } while (next()); up(); } return false; } template bool document_iterator::move_to_key(const char *key, uint32_t length) { if (down()) { do { bool right_key = ((get_string_length() == length) && (memcmp(get_string(), key, length) == 0)); move_to_value(); if (right_key) { return true; } } while (next()); up(); } return false; } template bool document_iterator::move_to_index(uint32_t index) { if (down()) { uint32_t i = 0; for (; i < index; i++) { if (!next()) { break; } } if (i == index) { return true; } up(); } return false; } template bool document_iterator::prev() { size_t target_location = location; to_start_scope(); size_t npos = location; if (target_location == npos) { return false; // we were already at the start } size_t oldnpos; // we have that npos < target_location here do { oldnpos = npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump npos = (current_val & internal::JSON_VALUE_MASK); } else { npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); } } while (npos < target_location); location = oldnpos; current_val = doc.tape[location]; current_type = current_val >> 56; return true; } template bool document_iterator::up() { if (depth == 1) { return false; // don't allow moving back to root } to_start_scope(); // next we just move to the previous value depth--; location -= 1; current_val = doc.tape[location]; current_type = (current_val >> 56); return true; } template bool document_iterator::down() { if (location + 1 >= tape_length) { return false; } if ((current_type == '[') || (current_type == '{')) { size_t npos = (current_val & internal::JSON_VALUE_MASK); if (npos == location + 2) { return false; // we have an empty scope } depth++; assert(depth < max_depth); location = location + 1; depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; current_val = doc.tape[location]; current_type = (current_val >> 56); return true; } return false; } template void document_iterator::to_start_scope() { location = depth_index[depth].start_of_scope; current_val = doc.tape[location]; current_type = (current_val >> 56); } template bool document_iterator::next() { size_t npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump npos = (current_val & internal::JSON_VALUE_MASK); } else { npos = location + (is_number() ? 2 : 1); } uint64_t next_val = doc.tape[npos]; uint8_t next_type = (next_val >> 56); if ((next_type == ']') || (next_type == '}')) { return false; // we reached the end of the scope } location = npos; current_val = next_val; current_type = next_type; return true; } template document_iterator::document_iterator(const document &doc_) noexcept : doc(doc_), depth(0), location(0), tape_length(0) { depth_index[0].start_of_scope = location; current_val = doc.tape[location++]; current_type = (current_val >> 56); depth_index[0].scope_type = current_type; tape_length = current_val & internal::JSON_VALUE_MASK; if (location < tape_length) { // If we make it here, then depth_capacity must >=2, but the compiler // may not know this. current_val = doc.tape[location]; current_type = (current_val >> 56); depth++; assert(depth < max_depth); depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; } } #if SIMDJSON_EXCEPTIONS template document_iterator::document_iterator(const document::parser &parser) noexcept(false) : document_iterator(parser.get_document()) {} #endif template document_iterator::document_iterator( const document_iterator &o) noexcept : doc(o.doc), depth(o.depth), location(o.location), tape_length(o.tape_length), current_type(o.current_type), current_val(o.current_val) { memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); } template document_iterator &document_iterator:: operator=(const document_iterator &o) noexcept { doc = o.doc; depth = o.depth; location = o.location; tape_length = o.tape_length; current_type = o.current_type; current_val = o.current_val; memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); return *this; } template bool document_iterator::print(std::ostream &os, bool escape_strings) const { if (!is_ok()) { return false; } switch (current_type) { case '"': // we have a string os << '"'; if (escape_strings) { os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); } else { // was: os << get_string();, but given that we can include null chars, we // have to do something crazier: std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); } os << '"'; break; case 'l': // we have a long int os << get_integer(); break; case 'u': os << get_unsigned_integer(); break; case 'd': os << get_double(); break; case 'n': // we have a null os << "null"; break; case 't': // we have a true os << "true"; break; case 'f': // we have a false os << "false"; break; case '{': // we have an object case '}': // we end an object case '[': // we start an array case ']': // we end an array os << static_cast(current_type); break; default: return false; } return true; } template bool document_iterator::move_to(const char *pointer, uint32_t length) { char *new_pointer = nullptr; if (pointer[0] == '#') { // Converting fragment representation to string representation new_pointer = new char[length]; uint32_t new_length = 0; for (uint32_t i = 1; i < length; i++) { if (pointer[i] == '%' && pointer[i + 1] == 'x') { #if __cpp_exceptions try { #endif int fragment = std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16); if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { // escaping the character new_pointer[new_length] = '\\'; new_length++; } new_pointer[new_length] = fragment; i += 3; #if __cpp_exceptions } catch (std::invalid_argument &) { delete[] new_pointer; return false; // the fragment is invalid } #endif } else { new_pointer[new_length] = pointer[i]; } new_length++; } length = new_length; pointer = new_pointer; } // saving the current state size_t depth_s = depth; size_t location_s = location; uint8_t current_type_s = current_type; uint64_t current_val_s = current_val; rewind(); // The json pointer is used from the root of the document. bool found = relative_move_to(pointer, length); delete[] new_pointer; if (!found) { // since the pointer has found nothing, we get back to the original // position. depth = depth_s; location = location_s; current_type = current_type_s; current_val = current_val_s; } return found; } template bool document_iterator::relative_move_to(const char *pointer, uint32_t length) { if (length == 0) { // returns the whole document return true; } if (pointer[0] != '/') { // '/' must be the first character return false; } // finding the key in an object or the index in an array std::string key_or_index; uint32_t offset = 1; // checking for the "-" case if (is_array() && pointer[1] == '-') { if (length != 2) { // the pointer must be exactly "/-" // there can't be anything more after '-' as an index return false; } key_or_index = '-'; offset = length; // will skip the loop coming right after } // We either transform the first reference token to a valid json key // or we make sure it is a valid index in an array. for (; offset < length; offset++) { if (pointer[offset] == '/') { // beginning of the next key or index break; } if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { // the index of an array must be an integer // we also make sure std::stoi won't discard whitespaces later return false; } if (pointer[offset] == '~') { // "~1" represents "/" if (pointer[offset + 1] == '1') { key_or_index += '/'; offset++; continue; } // "~0" represents "~" if (pointer[offset + 1] == '0') { key_or_index += '~'; offset++; continue; } } if (pointer[offset] == '\\') { if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || (pointer[offset + 1] <= 0x1F)) { key_or_index += pointer[offset + 1]; offset++; continue; } return false; // invalid escaped character } if (pointer[offset] == '\"') { // unescaped quote character. this is an invalid case. // lets do nothing and assume most pointers will be valid. // it won't find any corresponding json key anyway. // return false; } key_or_index += pointer[offset]; } bool found = false; if (is_object()) { if (move_to_key(key_or_index.c_str(), key_or_index.length())) { found = relative_move_to(pointer + offset, length - offset); } } else if (is_array()) { if (key_or_index == "-") { // handling "-" case first if (down()) { while (next()) ; // moving to the end of the array // moving to the nonexistent value right after... size_t npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump npos = (current_val & internal::JSON_VALUE_MASK); } else { npos = location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); } location = npos; current_val = doc.tape[npos]; current_type = (current_val >> 56); return true; // how could it fail ? } } else { // regular numeric index // The index can't have a leading '0' if (key_or_index[0] == '0' && key_or_index.length() > 1) { return false; } // it cannot be empty if (key_or_index.length() == 0) { return false; } // we already checked the index contains only valid digits uint32_t index = std::stoi(key_or_index); if (move_to_index(index)) { found = relative_move_to(pointer + offset, length - offset); } } } return found; } } // namespace simdjson #endif // SIMDJSON_INLINE_DOCUMENT_ITERATOR_H /* end file include/simdjson/inline/document_iterator.h */ /* begin file include/simdjson/inline/document_stream.h */ #ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H #define SIMDJSON_INLINE_DOCUMENT_STREAM_H #include #include #include #include namespace simdjson::internal { /** * This algorithm is used to quickly identify the buffer position of * the last JSON document inside the current batch. * * It does its work by finding the last pair of structural characters * that represent the end followed by the start of a document. * * Simply put, we iterate over the structural characters, starting from * the end. We consider that we found the end of a JSON document when the * first element of the pair is NOT one of these characters: '{' '[' ';' ',' * and when the second element is NOT one of these characters: '}' '}' ';' ','. * * This simple comparison works most of the time, but it does not cover cases * where the batch's structural indexes contain a perfect amount of documents. * In such a case, we do not have access to the structural index which follows * the last document, therefore, we do not have access to the second element in * the pair, and means that we cannot identify the last document. To fix this * issue, we keep a count of the open and closed curly/square braces we found * while searching for the pair. When we find a pair AND the count of open and * closed curly/square braces is the same, we know that we just passed a * complete * document, therefore the last json buffer location is the end of the batch * */ inline size_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const document::parser &parser) { // this function can be generally useful if (parser.n_structural_indexes == 0) return 0; auto last_i = parser.n_structural_indexes - 1; if (parser.structural_indexes[last_i] == size) { if (last_i == 0) return 0; last_i = parser.n_structural_indexes - 2; } auto arr_cnt = 0; auto obj_cnt = 0; for (auto i = last_i; i > 0; i--) { auto idxb = parser.structural_indexes[i]; switch (buf[idxb]) { case ':': case ',': continue; case '}': obj_cnt--; continue; case ']': arr_cnt--; continue; case '{': obj_cnt++; break; case '[': arr_cnt++; break; } auto idxa = parser.structural_indexes[i - 1]; switch (buf[idxa]) { case '{': case '[': case ':': case ',': continue; } if (!arr_cnt && !obj_cnt) { return last_i + 1; } return i; } return 0; } // returns true if the provided byte value is an ASCII character static inline bool is_ascii(char c) { return ((unsigned char)c) <= 127; } // if the string ends with UTF-8 values, backtrack // up to the first ASCII character. May return 0. static inline size_t trimmed_length_safe_utf8(const char * c, size_t len) { while ((len > 0) and (not is_ascii(c[len - 1]))) { len--; } return len; } } // namespace simdjson::internal namespace simdjson { really_inline document::stream::stream( document::parser &_parser, const uint8_t *buf, size_t len, size_t batch_size, error_code _error ) noexcept : parser{_parser}, _buf{buf}, _len{len}, _batch_size(batch_size), error{_error} { if (!error) { error = json_parse(); } } inline document::stream::~stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED if (stage_1_thread.joinable()) { stage_1_thread.join(); } #endif } really_inline document::stream::iterator document::stream::begin() noexcept { return iterator(*this, false); } really_inline document::stream::iterator document::stream::end() noexcept { return iterator(*this, true); } really_inline document::stream::iterator::iterator(stream& stream, bool _is_end) noexcept : _stream{stream}, finished{_is_end} { } really_inline document::doc_result document::stream::iterator::operator*() noexcept { return doc_result(_stream.parser.doc, _stream.error == SUCCESS_AND_HAS_MORE ? SUCCESS : _stream.error); } really_inline document::stream::iterator& document::stream::iterator::operator++() noexcept { if (_stream.error == SUCCESS_AND_HAS_MORE) { _stream.error = _stream.json_parse(); } else { finished = true; } return *this; } really_inline bool document::stream::iterator::operator!=(const document::stream::iterator &other) const noexcept { return finished != other.finished; } #ifdef SIMDJSON_THREADS_ENABLED // threaded version of json_parse // todo: simplify this code further inline error_code document::stream::json_parse() noexcept { error = parser.ensure_capacity(_batch_size); if (error) { return error; } error = parser_thread.ensure_capacity(_batch_size); if (error) { return error; } if (unlikely(load_next_batch)) { // First time loading if (!stage_1_thread.joinable()) { _batch_size = (std::min)(_batch_size, remaining()); _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size); if (_batch_size == 0) { return simdjson::UTF8_ERROR; } auto stage1_is_ok = error_code(simdjson::active_implementation->stage1(buf(), _batch_size, parser, true)); if (stage1_is_ok != simdjson::SUCCESS) { return stage1_is_ok; } size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser); if (last_index == 0) { if (parser.n_structural_indexes == 0) { return simdjson::EMPTY; } } else { parser.n_structural_indexes = last_index + 1; } } // the second thread is running or done. else { stage_1_thread.join(); if (stage1_is_ok_thread != simdjson::SUCCESS) { return stage1_is_ok_thread; } std::swap(parser.structural_indexes, parser_thread.structural_indexes); parser.n_structural_indexes = parser_thread.n_structural_indexes; advance(last_json_buffer_loc); n_bytes_parsed += last_json_buffer_loc; } // let us decide whether we will start a new thread if (remaining() - _batch_size > 0) { last_json_buffer_loc = parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)]; _batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc); if (_batch_size > 0) { _batch_size = internal::trimmed_length_safe_utf8( (const char *)(buf() + last_json_buffer_loc), _batch_size); if (_batch_size == 0) { return simdjson::UTF8_ERROR; } // let us capture read-only variables const uint8_t *const b = buf() + last_json_buffer_loc; const size_t bs = _batch_size; // we call the thread on a lambda that will update // this->stage1_is_ok_thread // there is only one thread that may write to this value stage_1_thread = std::thread([this, b, bs] { this->stage1_is_ok_thread = error_code(simdjson::active_implementation->stage1(b, bs, this->parser_thread, true)); }); } } next_json = 0; load_next_batch = false; } // load_next_batch error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json); if (res == simdjson::SUCCESS_AND_HAS_MORE) { n_parsed_docs++; current_buffer_loc = parser.structural_indexes[next_json]; load_next_batch = (current_buffer_loc == last_json_buffer_loc); } else if (res == simdjson::SUCCESS) { n_parsed_docs++; if (remaining() > _batch_size) { current_buffer_loc = parser.structural_indexes[next_json - 1]; load_next_batch = true; res = simdjson::SUCCESS_AND_HAS_MORE; } } return res; } #else // SIMDJSON_THREADS_ENABLED // single-threaded version of json_parse inline error_code document::stream::json_parse() noexcept { error = parser.ensure_capacity(_batch_size); if (error) { return error; } if (unlikely(load_next_batch)) { advance(current_buffer_loc); n_bytes_parsed += current_buffer_loc; _batch_size = (std::min)(_batch_size, remaining()); _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size); auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true); if (stage1_is_ok != simdjson::SUCCESS) { return stage1_is_ok; } size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser); if (last_index == 0) { if (parser.n_structural_indexes == 0) { return EMPTY; } } else { parser.n_structural_indexes = last_index + 1; } load_next_batch = false; } // load_next_batch error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json); if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) { n_parsed_docs++; current_buffer_loc = parser.structural_indexes[next_json]; } else if (res == simdjson::SUCCESS) { n_parsed_docs++; if (remaining() > _batch_size) { current_buffer_loc = parser.structural_indexes[next_json - 1]; next_json = 1; load_next_batch = true; res = simdjson::SUCCESS_AND_HAS_MORE; } } return res; } #endif // SIMDJSON_THREADS_ENABLED } // end of namespace simdjson #endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H /* end file include/simdjson/inline/document_stream.h */ /* begin file include/simdjson/inline/error.h */ #ifndef SIMDJSON_INLINE_ERROR_H #define SIMDJSON_INLINE_ERROR_H #include namespace simdjson::internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { error_code code; std::string message; }; // These MUST match the codes in error_code. We check this constraint in basictests. inline const error_code_info error_codes[] { { SUCCESS, "No error" }, { SUCCESS_AND_HAS_MORE, "No error and buffer still has more data" }, { CAPACITY, "This parser can't support a document that big" }, { MEMALLOC, "Error allocating memory, we're most likely out of memory" }, { TAPE_ERROR, "Something went wrong while writing to the tape" }, { DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" }, { STRING_ERROR, "Problem while parsing a string" }, { T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" }, { F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" }, { N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" }, { NUMBER_ERROR, "Problem while parsing a number" }, { UTF8_ERROR, "The input is not valid UTF-8" }, { UNINITIALIZED, "Uninitialized" }, { EMPTY, "Empty: no JSON found" }, { UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" }, { UNCLOSED_STRING, "A string is opened, but never closed." }, { UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." }, { INCORRECT_TYPE, "The JSON element does not have the requested type." }, { NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." }, { NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." }, { IO_ERROR, "Error reading the file." }, { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" } }; // error_messages[] } // namespace simdjson::internal namespace simdjson { inline const char *error_message(error_code error) noexcept { // If you're using error_code, we're trusting you got it from the enum. return internal::error_codes[int(error)].message.c_str(); } inline const std::string &error_message(int error) noexcept { if (error < 0 || error >= error_code::NUM_ERROR_CODES) { return internal::error_codes[UNEXPECTED_ERROR].message; } return internal::error_codes[error].message; } inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { return out << error_message(error); } } // namespace simdjson #endif // SIMDJSON_INLINE_ERROR_H /* end file include/simdjson/inline/error.h */ /* begin file include/simdjson/inline/jsonstream.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_INLINE_JSONSTREAM_H #define SIMDJSON_INLINE_JSONSTREAM_H namespace simdjson { template inline JsonStream::JsonStream(const string_container &s, size_t _batch_size) noexcept : str(s), batch_size(_batch_size) { } template inline JsonStream::~JsonStream() noexcept { if (stream) { delete stream; } } template inline int JsonStream::json_parse(document::parser &parser) noexcept { if (unlikely(stream == nullptr)) { stream = new document::stream(parser, reinterpret_cast(str.data()), str.length(), batch_size); } else { if (&parser != &stream->parser) { return stream->error = TAPE_ERROR; } stream->error = stream->json_parse(); } return stream->error; } } // namespace simdjson #endif // SIMDJSON_INLINE_JSONSTREAM_H /* end file include/simdjson/inline/jsonstream.h */ /* begin file include/simdjson/inline/padded_string.h */ #ifndef SIMDJSON_INLINE_PADDED_STRING_H #define SIMDJSON_INLINE_PADDED_STRING_H #include #include #include #include namespace simdjson::internal { // low-level function to allocate memory with padding so we can read past the // "length" bytes safely. if you must provide a pointer to some data, create it // with this function: length is the max. size in bytes of the string caller is // responsible to free the memory (free(...)) inline char *allocate_padded_buffer(size_t length) noexcept { // we could do a simple malloc // return (char *) malloc(length + SIMDJSON_PADDING); // However, we might as well align to cache lines... size_t totalpaddedlength = length + SIMDJSON_PADDING; char *padded_buffer = aligned_malloc_char(64, totalpaddedlength); #ifndef NDEBUG if (padded_buffer == nullptr) { return nullptr; } #endif // NDEBUG memset(padded_buffer + length, 0, totalpaddedlength - length); return padded_buffer; } // allocate_padded_buffer() } // namespace simdjson::internal namespace simdjson { inline padded_string::padded_string() noexcept : viable_size(0), data_ptr(nullptr) {} inline padded_string::padded_string(size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { if (data_ptr != nullptr) data_ptr[length] = '\0'; // easier when you need a c_str } inline padded_string::padded_string(const char *data, size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { if ((data != nullptr) and (data_ptr != nullptr)) { memcpy(data_ptr, data, length); data_ptr[length] = '\0'; // easier when you need a c_str } } // note: do not pass std::string arguments by value inline padded_string::padded_string(const std::string & str_ ) noexcept : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { if (data_ptr != nullptr) { memcpy(data_ptr, str_.data(), str_.size()); data_ptr[str_.size()] = '\0'; // easier when you need a c_str } } // note: do pass std::string_view arguments by value inline padded_string::padded_string(std::string_view sv_) noexcept : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { if (data_ptr != nullptr) { memcpy(data_ptr, sv_.data(), sv_.size()); data_ptr[sv_.size()] = '\0'; // easier when you need a c_str } } inline padded_string::padded_string(padded_string &&o) noexcept : viable_size(o.viable_size), data_ptr(o.data_ptr) { o.data_ptr = nullptr; // we take ownership } inline padded_string &padded_string::operator=(padded_string &&o) noexcept { aligned_free_char(data_ptr); data_ptr = o.data_ptr; viable_size = o.viable_size; o.data_ptr = nullptr; // we take ownership o.viable_size = 0; return *this; } inline void padded_string::swap(padded_string &o) noexcept { size_t tmp_viable_size = viable_size; char *tmp_data_ptr = data_ptr; viable_size = o.viable_size; data_ptr = o.data_ptr; o.data_ptr = tmp_data_ptr; o.viable_size = tmp_viable_size; } inline padded_string::~padded_string() noexcept { aligned_free_char(data_ptr); } inline size_t padded_string::size() const noexcept { return viable_size; } inline size_t padded_string::length() const noexcept { return viable_size; } inline const char *padded_string::data() const noexcept { return data_ptr; } inline char *padded_string::data() noexcept { return data_ptr; } inline simdjson_move_result padded_string::load(const std::string &filename) noexcept { // Open the file std::FILE *fp = std::fopen(filename.c_str(), "rb"); if (fp == nullptr) { return IO_ERROR; } // Get the file size if(std::fseek(fp, 0, SEEK_END) < 0) { std::fclose(fp); return IO_ERROR; } long llen = std::ftell(fp); if((llen < 0) || (llen == LONG_MAX)) { std::fclose(fp); return IO_ERROR; } // Allocate the padded_string size_t len = (size_t) llen; padded_string s(len); if (s.data() == nullptr) { std::fclose(fp); return MEMALLOC; } // Read the padded_string std::rewind(fp); size_t bytes_read = std::fread(s.data(), 1, len, fp); if (std::fclose(fp) != 0 || bytes_read != len) { return IO_ERROR; } return std::move(s); } } // namespace simdjson #endif // SIMDJSON_INLINE_PADDED_STRING_H /* end file include/simdjson/inline/padded_string.h */ #endif // SIMDJSON_H /* end file include/simdjson/inline/padded_string.h */