5351 lines
180 KiB
C++
5351 lines
180 KiB
C++
/* auto-generated on Fri Mar 20 11:47:31 PDT 2020. Do not edit! */
|
|
/* begin file include/simdjson.h */
|
|
#ifndef SIMDJSON_H
|
|
#define SIMDJSON_H
|
|
|
|
/* begin file include/simdjson/compiler_check.h */
|
|
#ifndef SIMDJSON_COMPILER_CHECK_H
|
|
#define SIMDJSON_COMPILER_CHECK_H
|
|
|
|
#ifndef __cplusplus
|
|
#error simdjson requires a C++ compiler
|
|
#endif
|
|
|
|
#ifndef SIMDJSON_CPLUSPLUS
|
|
#if defined(_MSVC_LANG) && !defined(__clang__)
|
|
#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
|
|
#else
|
|
#define SIMDJSON_CPLUSPLUS __cplusplus
|
|
#endif
|
|
#endif
|
|
|
|
#if (SIMDJSON_CPLUSPLUS < 201703L)
|
|
#error simdjson requires a compiler compliant with the C++17 standard
|
|
#endif
|
|
|
|
#endif // SIMDJSON_COMPILER_CHECK_H
|
|
/* end file include/simdjson/compiler_check.h */
|
|
|
|
// Public API
|
|
/* begin file include/simdjson/simdjson_version.h */
|
|
// /include/simdjson/simdjson_version.h automatically generated by release.py,
|
|
// do not change by hand
|
|
#ifndef SIMDJSON_SIMDJSON_VERSION_H
|
|
#define SIMDJSON_SIMDJSON_VERSION_H
|
|
|
|
/** The version of simdjson being used (major.minor.revision) */
|
|
#define SIMDJSON_VERSION 0.2.1
|
|
|
|
namespace simdjson {
|
|
enum {
|
|
/**
|
|
* The major version (MAJOR.minor.revision) of simdjson being used.
|
|
*/
|
|
SIMDJSON_VERSION_MAJOR = 0,
|
|
/**
|
|
* The minor version (major.MINOR.revision) of simdjson being used.
|
|
*/
|
|
SIMDJSON_VERSION_MINOR = 2,
|
|
/**
|
|
* The revision (major.minor.REVISION) of simdjson being used.
|
|
*/
|
|
SIMDJSON_VERSION_REVISION = 1
|
|
};
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_SIMDJSON_VERSION_H
|
|
/* end file include/simdjson/simdjson_version.h */
|
|
/* begin file include/simdjson/error.h */
|
|
#ifndef SIMDJSON_ERROR_H
|
|
#define SIMDJSON_ERROR_H
|
|
|
|
/* begin file include/simdjson/common_defs.h */
|
|
#ifndef SIMDJSON_COMMON_DEFS_H
|
|
#define SIMDJSON_COMMON_DEFS_H
|
|
|
|
#include <cassert>
|
|
/* begin file include/simdjson/portability.h */
|
|
#ifndef SIMDJSON_PORTABILITY_H
|
|
#define SIMDJSON_PORTABILITY_H
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstdlib>
|
|
#ifdef _MSC_VER
|
|
#include <iso646.h>
|
|
#endif
|
|
|
|
#if defined(__x86_64__) || defined(_M_AMD64)
|
|
#define IS_X86_64 1
|
|
#endif
|
|
#if defined(__aarch64__) || defined(_M_ARM64)
|
|
#define IS_ARM64 1
|
|
#endif
|
|
|
|
// this is almost standard?
|
|
#undef STRINGIFY_IMPLEMENTATION_
|
|
#undef STRINGIFY
|
|
#define STRINGIFY_IMPLEMENTATION_(a) #a
|
|
#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a)
|
|
|
|
#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK
|
|
#define SIMDJSON_IMPLEMENTATION_FALLBACK 1
|
|
#endif
|
|
|
|
#if IS_ARM64
|
|
#ifndef SIMDJSON_IMPLEMENTATION_ARM64
|
|
#define SIMDJSON_IMPLEMENTATION_ARM64 1
|
|
#endif
|
|
#define SIMDJSON_IMPLEMENTATION_HASWELL 0
|
|
#define SIMDJSON_IMPLEMENTATION_WESTMERE 0
|
|
#endif // IS_ARM64
|
|
|
|
#if IS_X86_64
|
|
#ifndef SIMDJSON_IMPLEMENTATION_HASWELL
|
|
#define SIMDJSON_IMPLEMENTATION_HASWELL 1
|
|
#endif
|
|
#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE
|
|
#define SIMDJSON_IMPLEMENTATION_WESTMERE 1
|
|
#endif
|
|
#define SIMDJSON_IMPLEMENTATION_ARM64 0
|
|
#endif // IS_X86_64
|
|
|
|
// we are going to use runtime dispatch
|
|
#ifdef IS_X86_64
|
|
#ifdef __clang__
|
|
// clang does not have GCC push pop
|
|
// warning: clang attribute push can't be used within a namespace in clang up
|
|
// til 8.0 so TARGET_REGION and UNTARGET_REGION must be *outside* of a
|
|
// namespace.
|
|
#define TARGET_REGION(T) \
|
|
_Pragma(STRINGIFY( \
|
|
clang attribute push(__attribute__((target(T))), apply_to = function)))
|
|
#define UNTARGET_REGION _Pragma("clang attribute pop")
|
|
#elif defined(__GNUC__)
|
|
// GCC is easier
|
|
#define TARGET_REGION(T) \
|
|
_Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T)))
|
|
#define UNTARGET_REGION _Pragma("GCC pop_options")
|
|
#endif // clang then gcc
|
|
|
|
#endif // x86
|
|
|
|
// Default target region macros don't do anything.
|
|
#ifndef TARGET_REGION
|
|
#define TARGET_REGION(T)
|
|
#define UNTARGET_REGION
|
|
#endif
|
|
|
|
// under GCC and CLANG, we use these two macros
|
|
#define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul,lzcnt")
|
|
#define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul")
|
|
#define TARGET_ARM64
|
|
|
|
// Threading is disabled
|
|
#undef SIMDJSON_THREADS_ENABLED
|
|
// Is threading enabled?
|
|
#if defined(BOOST_HAS_THREADS) || defined(_REENTRANT) || defined(_MT)
|
|
#define SIMDJSON_THREADS_ENABLED
|
|
#endif
|
|
|
|
#if defined(__clang__)
|
|
#define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined")))
|
|
#elif defined(__GNUC__)
|
|
#define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined))
|
|
#else
|
|
#define NO_SANITIZE_UNDEFINED
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
#include <intrin.h> // visual studio
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
#define simdjson_strcasecmp _stricmp
|
|
#else
|
|
#define simdjson_strcasecmp strcasecmp
|
|
#endif
|
|
|
|
namespace simdjson {
|
|
// portable version of posix_memalign
|
|
static inline void *aligned_malloc(size_t alignment, size_t size) {
|
|
void *p;
|
|
#ifdef _MSC_VER
|
|
p = _aligned_malloc(size, alignment);
|
|
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
|
p = __mingw_aligned_malloc(size, alignment);
|
|
#else
|
|
// somehow, if this is used before including "x86intrin.h", it creates an
|
|
// implicit defined warning.
|
|
if (posix_memalign(&p, alignment, size) != 0) {
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
return p;
|
|
}
|
|
|
|
static inline char *aligned_malloc_char(size_t alignment, size_t size) {
|
|
return (char *)aligned_malloc(alignment, size);
|
|
}
|
|
|
|
static inline void aligned_free(void *mem_block) {
|
|
if (mem_block == nullptr) {
|
|
return;
|
|
}
|
|
#ifdef _MSC_VER
|
|
_aligned_free(mem_block);
|
|
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
|
__mingw_aligned_free(mem_block);
|
|
#else
|
|
free(mem_block);
|
|
#endif
|
|
}
|
|
|
|
static inline void aligned_free_char(char *mem_block) {
|
|
aligned_free((void *)mem_block);
|
|
}
|
|
} // namespace simdjson
|
|
#endif // SIMDJSON_PORTABILITY_H
|
|
/* end file include/simdjson/portability.h */
|
|
|
|
namespace simdjson {
|
|
|
|
#ifndef SIMDJSON_EXCEPTIONS
|
|
#if __cpp_exceptions
|
|
#define SIMDJSON_EXCEPTIONS 1
|
|
#else
|
|
#define SIMDJSON_EXCEPTIONS 0
|
|
#endif
|
|
#endif
|
|
|
|
/** The maximum document size supported by simdjson. */
|
|
constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF;
|
|
|
|
/**
|
|
* The amount of padding needed in a buffer to parse JSON.
|
|
*
|
|
* the input buf should be readable up to buf + SIMDJSON_PADDING
|
|
* this is a stopgap; there should be a better description of the
|
|
* main loop and its behavior that abstracts over this
|
|
* See https://github.com/lemire/simdjson/issues/174
|
|
*/
|
|
constexpr size_t SIMDJSON_PADDING = 32;
|
|
|
|
/**
|
|
* By default, simdjson supports this many nested objects and arrays.
|
|
*
|
|
* This is the default for document::parser::max_depth().
|
|
*/
|
|
constexpr size_t DEFAULT_MAX_DEPTH = 1024;
|
|
|
|
} // namespace simdjson
|
|
|
|
#if defined(__GNUC__)
|
|
// Marks a block with a name so that MCA analysis can see it.
|
|
#define BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
|
|
#define END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
|
|
#define DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name);
|
|
#else
|
|
#define BEGIN_DEBUG_BLOCK(name)
|
|
#define END_DEBUG_BLOCK(name)
|
|
#define DEBUG_BLOCK(name, block)
|
|
#endif
|
|
|
|
#if !defined(_MSC_VER) && !defined(SIMDJSON_NO_COMPUTED_GOTO)
|
|
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
|
|
// also in Intel's compiler), but won't work in MSVC.
|
|
#define SIMDJSON_USE_COMPUTED_GOTO
|
|
#endif
|
|
|
|
// Align to N-byte boundary
|
|
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
|
|
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
|
|
|
|
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
|
|
|
|
#ifdef _MSC_VER
|
|
#define really_inline __forceinline
|
|
#define never_inline __declspec(noinline)
|
|
|
|
#define UNUSED
|
|
#define WARN_UNUSED
|
|
|
|
#ifndef likely
|
|
#define likely(x) x
|
|
#endif
|
|
#ifndef unlikely
|
|
#define unlikely(x) x
|
|
#endif
|
|
|
|
|
|
#else
|
|
|
|
|
|
#define really_inline inline __attribute__((always_inline, unused))
|
|
#define never_inline inline __attribute__((noinline, unused))
|
|
|
|
#define UNUSED __attribute__((unused))
|
|
#define WARN_UNUSED __attribute__((warn_unused_result))
|
|
|
|
#ifndef likely
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
#endif
|
|
#ifndef unlikely
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#endif
|
|
|
|
#endif // MSC_VER
|
|
|
|
#endif // SIMDJSON_COMMON_DEFS_H
|
|
/* end file include/simdjson/portability.h */
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
namespace simdjson {
|
|
|
|
/**
|
|
* All possible errors returned by simdjson.
|
|
*/
|
|
enum error_code {
|
|
SUCCESS = 0, ///< No error
|
|
SUCCESS_AND_HAS_MORE, ///< No error and buffer still has more data
|
|
CAPACITY, ///< This parser can't support a document that big
|
|
MEMALLOC, ///< Error allocating memory, most likely out of memory
|
|
TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error
|
|
DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
|
|
STRING_ERROR, ///< Problem while parsing a string
|
|
T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
|
|
F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f'
|
|
N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n'
|
|
NUMBER_ERROR, ///< Problem while parsing a number
|
|
UTF8_ERROR, ///< the input is not valid UTF-8
|
|
UNINITIALIZED, ///< unknown error, or uninitialized document
|
|
EMPTY, ///< no structural element found
|
|
UNESCAPED_CHARS, ///< found unescaped characters in a string.
|
|
UNCLOSED_STRING, ///< missing quote at the end
|
|
UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture
|
|
INCORRECT_TYPE, ///< JSON element has a different type than user expected
|
|
NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits
|
|
NO_SUCH_FIELD, ///< JSON field not found in object
|
|
IO_ERROR, ///< Error reading a file
|
|
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
|
|
/** @private Number of error codes */
|
|
NUM_ERROR_CODES
|
|
};
|
|
|
|
/**
|
|
* Get the error message for the given error code.
|
|
*
|
|
* auto [doc, error] = document::parse("foo");
|
|
* if (error) { printf("Error: %s\n", error_message(error)); }
|
|
*
|
|
* @return The error message.
|
|
*/
|
|
inline const char *error_message(error_code error) noexcept;
|
|
|
|
/**
|
|
* Write the error message to the output stream
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept;
|
|
|
|
/**
|
|
* Exception thrown when an exception-supporting simdjson method is called
|
|
*/
|
|
struct simdjson_error : public std::exception {
|
|
/**
|
|
* Create an exception from a simdjson error code.
|
|
* @param error The error code
|
|
*/
|
|
simdjson_error(error_code error) noexcept : _error{error} { }
|
|
/** The error message */
|
|
const char *what() const noexcept { return error_message(error()); }
|
|
/** The error code */
|
|
error_code error() const noexcept { return _error; }
|
|
private:
|
|
/** The error code that was used */
|
|
error_code _error;
|
|
};
|
|
|
|
/**
|
|
* The result of a simd operation that could fail.
|
|
*
|
|
* Gives the option of reading error codes, or throwing an exception by casting to the desired result.
|
|
*/
|
|
template<typename T>
|
|
struct simdjson_result : public std::pair<T, error_code> {
|
|
/**
|
|
* The error.
|
|
*/
|
|
error_code error() const { return this->second; }
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
/**
|
|
* The value of the function.
|
|
*
|
|
* @throw simdjson_error if there was an error.
|
|
*/
|
|
T get() noexcept(false) {
|
|
if (error()) { throw simdjson_error(error()); }
|
|
return this->first;
|
|
};
|
|
|
|
/**
|
|
* Cast to the value (will throw on error).
|
|
*
|
|
* @throw simdjson_error if there was an error.
|
|
*/
|
|
operator T() noexcept(false) { return get(); }
|
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
/**
|
|
* Create a new error result.
|
|
*/
|
|
simdjson_result(error_code _error) noexcept : std::pair<T, error_code>({}, _error) {}
|
|
|
|
/**
|
|
* Create a new successful result.
|
|
*/
|
|
simdjson_result(T _value) noexcept : std::pair<T, error_code>(_value, SUCCESS) {}
|
|
|
|
/**
|
|
* Create a new result with both things (use if you don't want to branch when creating the result).
|
|
*/
|
|
simdjson_result(T value, error_code error) noexcept : std::pair<T, error_code>(value, error) {}
|
|
};
|
|
|
|
/**
|
|
* The result of a simd operation that could fail.
|
|
*
|
|
* This class is for values that must be *moved*, like padded_string and document.
|
|
*
|
|
* Gives the option of reading error codes, or throwing an exception by casting to the desired result.
|
|
*/
|
|
template<typename T>
|
|
struct simdjson_move_result : std::pair<T, error_code> {
|
|
/**
|
|
* Move the value and the error to the provided variables.
|
|
*/
|
|
void tie(T& t, error_code & e) {
|
|
// on the clang compiler that comes with current macOS (Apple clang version 11.0.0),
|
|
// std::tie(this->json, error) = padded_string::load(filename);
|
|
// fails with "benchmark/benchmarker.h:266:33: error: no viable overloaded '='""
|
|
t = std::move(this->first);
|
|
e = std::move(this->second);
|
|
}
|
|
/**
|
|
* The error.
|
|
*/
|
|
error_code error() const { return this->second; }
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
/**
|
|
* The value of the function.
|
|
*
|
|
* @throw simdjson_error if there was an error.
|
|
*/
|
|
T move() noexcept(false) {
|
|
if (error()) { throw simdjson_error(error()); }
|
|
return std::move(this->first);
|
|
};
|
|
|
|
/**
|
|
* Cast to the value (will throw on error).
|
|
*
|
|
* @throw simdjson_error if there was an error.
|
|
*/
|
|
operator T() noexcept(false) { return move(); }
|
|
|
|
#endif
|
|
|
|
/**
|
|
* Create a new error result.
|
|
*/
|
|
simdjson_move_result(error_code error) noexcept : std::pair<T, error_code>(T(), error) {}
|
|
|
|
/**
|
|
* Create a new successful result.
|
|
*/
|
|
simdjson_move_result(T value) noexcept : std::pair<T, error_code>(std::move(value), SUCCESS) {}
|
|
|
|
/**
|
|
* Create a new result with both things (use if you don't want to branch when creating the result).
|
|
*/
|
|
simdjson_move_result(T value, error_code error) noexcept : std::pair<T, error_code>(std::move(value), error) {}
|
|
};
|
|
|
|
/**
|
|
* @deprecated This is an alias and will be removed, use error_code instead
|
|
*/
|
|
using ErrorValues = error_code;
|
|
|
|
/**
|
|
* @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead.
|
|
*/
|
|
inline const std::string &error_message(int error) noexcept;
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_ERROR_H
|
|
/* end file include/simdjson/portability.h */
|
|
/* begin file include/simdjson/padded_string.h */
|
|
#ifndef SIMDJSON_PADDED_STRING_H
|
|
#define SIMDJSON_PADDED_STRING_H
|
|
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
namespace simdjson {
|
|
|
|
/**
|
|
* String with extra allocation for ease of use with document::parser::parse()
|
|
*
|
|
* This is a move-only class, it cannot be copied.
|
|
*/
|
|
struct padded_string final {
|
|
|
|
/**
|
|
* Create a new, empty padded string.
|
|
*/
|
|
explicit inline padded_string() noexcept;
|
|
/**
|
|
* Create a new padded string buffer.
|
|
*
|
|
* @param length the size of the string.
|
|
*/
|
|
explicit inline padded_string(size_t length) noexcept;
|
|
/**
|
|
* Create a new padded string by copying the given input.
|
|
*
|
|
* @param data the buffer to copy
|
|
* @param length the number of bytes to copy
|
|
*/
|
|
explicit inline padded_string(const char *data, size_t length) noexcept;
|
|
/**
|
|
* Create a new padded string by copying the given input.
|
|
*
|
|
* @param str_ the string to copy
|
|
*/
|
|
inline padded_string(const std::string & str_ ) noexcept;
|
|
/**
|
|
* Create a new padded string by copying the given input.
|
|
*
|
|
* @param str_ the string to copy
|
|
*/
|
|
inline padded_string(std::string_view sv_) noexcept;
|
|
/**
|
|
* Move one padded string into another.
|
|
*
|
|
* The original padded string will be reduced to zero capacity.
|
|
*
|
|
* @param o the string to move.
|
|
*/
|
|
inline padded_string(padded_string &&o) noexcept;
|
|
/**
|
|
* Move one padded string into another.
|
|
*
|
|
* The original padded string will be reduced to zero capacity.
|
|
*
|
|
* @param o the string to move.
|
|
*/
|
|
inline padded_string &operator=(padded_string &&o) noexcept;
|
|
inline void swap(padded_string &o) noexcept;
|
|
~padded_string() noexcept;
|
|
|
|
/**
|
|
* The length of the string.
|
|
*
|
|
* Does not include padding.
|
|
*/
|
|
size_t size() const noexcept;
|
|
|
|
/**
|
|
* The length of the string.
|
|
*
|
|
* Does not include padding.
|
|
*/
|
|
size_t length() const noexcept;
|
|
|
|
/**
|
|
* The string data.
|
|
**/
|
|
const char *data() const noexcept;
|
|
|
|
/**
|
|
* The string data.
|
|
**/
|
|
char *data() noexcept;
|
|
|
|
/**
|
|
* Load this padded string from a file.
|
|
*
|
|
* @param path the path to the file.
|
|
**/
|
|
inline static simdjson_move_result<padded_string> load(const std::string &path) noexcept;
|
|
|
|
private:
|
|
padded_string &operator=(const padded_string &o) = delete;
|
|
padded_string(const padded_string &o) = delete;
|
|
|
|
size_t viable_size;
|
|
char *data_ptr{nullptr};
|
|
|
|
}; // padded_string
|
|
|
|
} // namespace simdjson
|
|
|
|
namespace simdjson::internal {
|
|
|
|
// low-level function to allocate memory with padding so we can read past the
|
|
// "length" bytes safely. if you must provide a pointer to some data, create it
|
|
// with this function: length is the max. size in bytes of the string caller is
|
|
// responsible to free the memory (free(...))
|
|
inline char *allocate_padded_buffer(size_t length) noexcept;
|
|
|
|
} // namespace simdjson::internal;
|
|
|
|
#endif // SIMDJSON_PADDED_STRING_H
|
|
/* end file include/simdjson/padded_string.h */
|
|
/* begin file include/simdjson/implementation.h */
|
|
#ifndef SIMDJSON_IMPLEMENTATION_H
|
|
#define SIMDJSON_IMPLEMENTATION_H
|
|
|
|
#include <optional>
|
|
#include <string>
|
|
#include <atomic>
|
|
#include <vector>
|
|
/* begin file include/simdjson/document.h */
|
|
#ifndef SIMDJSON_DOCUMENT_H
|
|
#define SIMDJSON_DOCUMENT_H
|
|
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <limits>
|
|
#include <sstream>
|
|
/* begin file include/simdjson/simdjson.h */
|
|
/**
|
|
* @file
|
|
* @deprecated We'll be removing this file so it isn't confused with the top level simdjson.h
|
|
*/
|
|
#ifndef SIMDJSON_SIMDJSON_H
|
|
#define SIMDJSON_SIMDJSON_H
|
|
|
|
|
|
#endif // SIMDJSON_H
|
|
/* end file include/simdjson/simdjson.h */
|
|
|
|
namespace simdjson::internal {
|
|
constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF;
|
|
enum class tape_type;
|
|
class tape_ref;
|
|
} // namespace simdjson::internal
|
|
|
|
namespace simdjson {
|
|
|
|
template<size_t max_depth> class document_iterator;
|
|
|
|
/**
|
|
* A parsed JSON document.
|
|
*
|
|
* This class cannot be copied, only moved, to avoid unintended allocations.
|
|
*/
|
|
class document {
|
|
public:
|
|
/**
|
|
* Create a document container with zero capacity.
|
|
*
|
|
* The parser will allocate capacity as needed.
|
|
*/
|
|
document() noexcept=default;
|
|
~document() noexcept=default;
|
|
|
|
/**
|
|
* Take another document's buffers.
|
|
*
|
|
* @param other The document to take. Its capacity is zeroed and it is invalidated.
|
|
*/
|
|
document(document &&other) noexcept = default;
|
|
document(const document &) = delete; // Disallow copying
|
|
/**
|
|
* Take another document's buffers.
|
|
*
|
|
* @param other The document to take. Its capacity is zeroed.
|
|
*/
|
|
document &operator=(document &&other) noexcept = default;
|
|
document &operator=(const document &) = delete; // Disallow copying
|
|
|
|
/** The default batch size for parse_many and load_many */
|
|
static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
|
|
|
|
// Nested classes
|
|
class element;
|
|
class array;
|
|
class object;
|
|
class key_value_pair;
|
|
class parser;
|
|
class stream;
|
|
|
|
class doc_move_result;
|
|
class doc_result;
|
|
class element_result;
|
|
class array_result;
|
|
class object_result;
|
|
class stream_result;
|
|
|
|
// Nested classes. See definitions later in file.
|
|
using iterator = document_iterator<DEFAULT_MAX_DEPTH>;
|
|
|
|
/**
|
|
* Get the root element of this document as a JSON array.
|
|
*/
|
|
element root() const noexcept;
|
|
/**
|
|
* Get the root element of this document as a JSON array.
|
|
*/
|
|
array_result as_array() const noexcept;
|
|
/**
|
|
* Get the root element of this document as a JSON object.
|
|
*/
|
|
object_result as_object() const noexcept;
|
|
/**
|
|
* Get the root element of this document.
|
|
*/
|
|
operator element() const noexcept;
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
/**
|
|
* Read the root element of this document as a JSON array.
|
|
*
|
|
* @return The JSON array.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an array
|
|
*/
|
|
operator array() const noexcept(false);
|
|
/**
|
|
* Read this element as a JSON object (key/value pairs).
|
|
*
|
|
* @return The JSON object.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an object
|
|
*/
|
|
operator object() const noexcept(false);
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with the given key, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
element_result operator[](const std::string_view &s) const noexcept;
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
element_result operator[](const char *s) const noexcept;
|
|
|
|
/**
|
|
* Dump the raw tape for debugging.
|
|
*
|
|
* @param os the stream to output to.
|
|
* @return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
|
|
*/
|
|
bool dump_raw_tape(std::ostream &os) const noexcept;
|
|
|
|
/**
|
|
* Load a JSON document from a file and return it.
|
|
*
|
|
* document doc = document::load("jsonexamples/twitter.json");
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than the file length, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param path The path to load.
|
|
* @return The document, or an error:
|
|
* - IO_ERROR if there was an error opening or reading the file.
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
|
|
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
inline static doc_move_result load(const std::string& path) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document and return a reference to it.
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated. If realloc_if_needed is true,
|
|
* it is assumed that the buffer does *not* have enough padding, and it is reallocated, enlarged
|
|
* and copied before parsing.
|
|
*
|
|
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
|
|
* realloc_if_needed is true.
|
|
* @param len The length of the JSON.
|
|
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
|
|
* @return the document, or an error if the JSON is invalid.
|
|
*/
|
|
inline static doc_move_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document.
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated. If realloc_if_needed is true,
|
|
* it is assumed that the buffer does *not* have enough padding, and it is reallocated, enlarged
|
|
* and copied before parsing.
|
|
*
|
|
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
|
|
* realloc_if_needed is true.
|
|
* @param len The length of the JSON.
|
|
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
|
|
* @return the document, or an error if the JSON is invalid.
|
|
*/
|
|
really_inline static doc_move_result parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document.
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated. If `str.capacity() - str.size()
|
|
* < SIMDJSON_PADDING`, the string will be copied to a string with larger capacity before parsing.
|
|
*
|
|
* @param s The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, or
|
|
* a new string will be created with the extra padding.
|
|
* @return the document, or an error if the JSON is invalid.
|
|
*/
|
|
really_inline static doc_move_result parse(const std::string &s) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document.
|
|
*
|
|
* @param s The JSON to parse.
|
|
* @return the document, or an error if the JSON is invalid.
|
|
*/
|
|
really_inline static doc_move_result parse(const padded_string &s) noexcept;
|
|
|
|
// We do not want to allow implicit conversion from C string to std::string.
|
|
doc_result parse(const char *buf, bool realloc_if_needed = true) noexcept = delete;
|
|
|
|
std::unique_ptr<uint64_t[]> tape;
|
|
std::unique_ptr<uint8_t[]> string_buf;// should be at least byte_capacity
|
|
|
|
private:
|
|
inline error_code set_capacity(size_t len) noexcept;
|
|
template<typename T>
|
|
friend class minify;
|
|
}; // class document
|
|
|
|
template<typename T>
|
|
class minify;
|
|
|
|
/**
|
|
* A parsed, *owned* document, or an error if the parse failed.
|
|
*
|
|
* document &doc = document::parse(json);
|
|
*
|
|
* Returns an owned `document`. When the doc_move_result (or the document retrieved from it) goes out of
|
|
* scope, the document's memory is deallocated.
|
|
*
|
|
* ## Error Codes vs. Exceptions
|
|
*
|
|
* This result type allows the user to pick whether to use exceptions or not.
|
|
*
|
|
* Use like this to avoid exceptions:
|
|
*
|
|
* auto [doc, error] = document::parse(json);
|
|
* if (error) { exit(1); }
|
|
*
|
|
* Use like this if you'd prefer to use exceptions:
|
|
*
|
|
* document doc = document::parse(json);
|
|
*
|
|
*/
|
|
class document::doc_move_result : public simdjson_move_result<document> {
|
|
public:
|
|
|
|
/**
|
|
* Read this document as a JSON objec.
|
|
*
|
|
* @return The object value, or:
|
|
* - UNEXPECTED_TYPE if the JSON document is not an object
|
|
*/
|
|
inline object_result as_object() const noexcept;
|
|
|
|
/**
|
|
* Read this document as a JSON array.
|
|
*
|
|
* @return The array value, or:
|
|
* - UNEXPECTED_TYPE if the JSON document is not an array
|
|
*/
|
|
inline array_result as_array() const noexcept;
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
inline element_result operator[](const std::string_view &key) const noexcept;
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
inline element_result operator[](const char *key) const noexcept;
|
|
|
|
~doc_move_result() noexcept=default;
|
|
doc_move_result(document &&doc, error_code error) noexcept;
|
|
doc_move_result(document &&doc) noexcept;
|
|
doc_move_result(error_code error) noexcept;
|
|
friend class document;
|
|
}; // class document::doc_move_result
|
|
|
|
/**
|
|
* A parsed document reference, or an error if the parse failed.
|
|
*
|
|
* document &doc = document::parse(json);
|
|
*
|
|
* ## Document Ownership
|
|
*
|
|
* The `document &` refers to an internal document the parser reuses on each `parse()` call. It will
|
|
* become invalidated on the next `parse()`.
|
|
*
|
|
* This is more efficient for common cases where documents are parsed and used one at a time. If you
|
|
* need to keep the document around longer, you may *take* it from the parser by casting it:
|
|
*
|
|
* document doc = parser.parse(); // take ownership
|
|
*
|
|
* If you do this, the parser will automatically allocate a new document on the next `parse()` call.
|
|
*
|
|
* ## Error Codes vs. Exceptions
|
|
*
|
|
* This result type allows the user to pick whether to use exceptions or not.
|
|
*
|
|
* Use like this to avoid exceptions:
|
|
*
|
|
* auto [doc, error] = parser.parse(json);
|
|
* if (error) { exit(1); }
|
|
*
|
|
* Use like this if you'd prefer to use exceptions:
|
|
*
|
|
* document &doc = document::parse(json);
|
|
*
|
|
*/
|
|
class document::doc_result : public simdjson_result<document&> {
|
|
public:
|
|
/**
|
|
* Read this document as a JSON objec.
|
|
*
|
|
* @return The object value, or:
|
|
* - UNEXPECTED_TYPE if the JSON document is not an object
|
|
*/
|
|
inline object_result as_object() const noexcept;
|
|
|
|
/**
|
|
* Read this document as a JSON array.
|
|
*
|
|
* @return The array value, or:
|
|
* - UNEXPECTED_TYPE if the JSON document is not an array
|
|
*/
|
|
inline array_result as_array() const noexcept;
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
inline element_result operator[](const std::string_view &key) const noexcept;
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
inline element_result operator[](const char *key) const noexcept;
|
|
|
|
~doc_result()=default;
|
|
doc_result(document &doc, error_code error) noexcept;
|
|
friend class document::parser;
|
|
friend class document::stream;
|
|
}; // class document::doc_result
|
|
|
|
namespace internal {
|
|
/**
|
|
* The possible types in the tape. Internal only.
|
|
*/
|
|
enum class tape_type {
|
|
ROOT = 'r',
|
|
START_ARRAY = '[',
|
|
START_OBJECT = '{',
|
|
END_ARRAY = ']',
|
|
END_OBJECT = '}',
|
|
STRING = '"',
|
|
INT64 = 'l',
|
|
UINT64 = 'u',
|
|
DOUBLE = 'd',
|
|
TRUE_VALUE = 't',
|
|
FALSE_VALUE = 'f',
|
|
NULL_VALUE = 'n'
|
|
};
|
|
|
|
/**
|
|
* A reference to an element on the tape. Internal only.
|
|
*/
|
|
class tape_ref {
|
|
protected:
|
|
really_inline tape_ref() noexcept;
|
|
really_inline tape_ref(const document *_doc, size_t _json_index) noexcept;
|
|
inline size_t after_element() const noexcept;
|
|
really_inline tape_type type() const noexcept;
|
|
really_inline uint64_t tape_value() const noexcept;
|
|
template<typename T>
|
|
really_inline T next_tape_value() const noexcept;
|
|
inline std::string_view get_string_view() const noexcept;
|
|
|
|
/** The document this element references. */
|
|
const document *doc;
|
|
|
|
/** The index of this element on `doc.tape[]` */
|
|
size_t json_index;
|
|
|
|
friend class simdjson::document::key_value_pair;
|
|
template<typename T>
|
|
friend class simdjson::minify;
|
|
};
|
|
} // namespace simdjson::internal
|
|
|
|
/**
|
|
* A JSON element.
|
|
*
|
|
* References an element in a JSON document, representing a JSON null, boolean, string, number,
|
|
* array or object.
|
|
*/
|
|
class document::element : protected internal::tape_ref {
|
|
public:
|
|
/** Create a new, invalid element. */
|
|
really_inline element() noexcept;
|
|
|
|
/** Whether this element is a json `null`. */
|
|
really_inline bool is_null() const noexcept;
|
|
/** Whether this is a JSON `true` or `false` */
|
|
really_inline bool is_bool() const noexcept;
|
|
/** Whether this is a JSON number (e.g. 1, 1.0 or 1e2) */
|
|
really_inline bool is_number() const noexcept;
|
|
/** Whether this is a JSON integer (e.g. 1 or -1, but *not* 1.0 or 1e2) */
|
|
really_inline bool is_integer() const noexcept;
|
|
/** Whether this is a JSON string (e.g. "abc") */
|
|
really_inline bool is_string() const noexcept;
|
|
/** Whether this is a JSON array (e.g. []) */
|
|
really_inline bool is_array() const noexcept;
|
|
/** Whether this is a JSON array (e.g. []) */
|
|
really_inline bool is_object() const noexcept;
|
|
|
|
/**
|
|
* Read this element as a boolean (json `true` or `false`).
|
|
*
|
|
* @return The boolean value, or:
|
|
* - UNEXPECTED_TYPE error if the JSON element is not a boolean
|
|
*/
|
|
inline simdjson_result<bool> as_bool() const noexcept;
|
|
|
|
/**
|
|
* Read this element as a null-terminated string.
|
|
*
|
|
* Does *not* convert other types to a string; requires that the JSON type of the element was
|
|
* an actual string.
|
|
*
|
|
* @return A `string_view` into the string, or:
|
|
* - UNEXPECTED_TYPE error if the JSON element is not a string
|
|
*/
|
|
inline simdjson_result<const char *> as_c_str() const noexcept;
|
|
|
|
/**
|
|
* Read this element as a C++ string_view (string with length).
|
|
*
|
|
* Does *not* convert other types to a string; requires that the JSON type of the element was
|
|
* an actual string.
|
|
*
|
|
* @return A `string_view` into the string, or:
|
|
* - UNEXPECTED_TYPE error if the JSON element is not a string
|
|
*/
|
|
inline simdjson_result<std::string_view> as_string() const noexcept;
|
|
|
|
/**
|
|
* Read this element as an unsigned integer.
|
|
*
|
|
* @return The uninteger value, or:
|
|
* - UNEXPECTED_TYPE if the JSON element is not an integer
|
|
* - NUMBER_OUT_OF_RANGE if the integer doesn't fit in 64 bits or is negative
|
|
*/
|
|
inline simdjson_result<uint64_t> as_uint64_t() const noexcept;
|
|
|
|
/**
|
|
* Read this element as a signed integer.
|
|
*
|
|
* @return The integer value, or:
|
|
* - UNEXPECTED_TYPE if the JSON element is not an integer
|
|
* - NUMBER_OUT_OF_RANGE if the integer doesn't fit in 64 bits
|
|
*/
|
|
inline simdjson_result<int64_t> as_int64_t() const noexcept;
|
|
|
|
/**
|
|
* Read this element as a floating point value.
|
|
*
|
|
* @return The double value, or:
|
|
* - UNEXPECTED_TYPE if the JSON element is not a number
|
|
*/
|
|
inline simdjson_result<double> as_double() const noexcept;
|
|
|
|
/**
|
|
* Read this element as a JSON array.
|
|
*
|
|
* @return The array value, or:
|
|
* - UNEXPECTED_TYPE if the JSON element is not an array
|
|
*/
|
|
inline array_result as_array() const noexcept;
|
|
|
|
/**
|
|
* Read this element as a JSON object (key/value pairs).
|
|
*
|
|
* @return The object value, or:
|
|
* - UNEXPECTED_TYPE if the JSON element is not an object
|
|
*/
|
|
inline object_result as_object() const noexcept;
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
/**
|
|
* Read this element as a boolean.
|
|
*
|
|
* @return The boolean value
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a boolean.
|
|
*/
|
|
inline operator bool() const noexcept(false);
|
|
|
|
/**
|
|
* Read this element as a null-terminated string.
|
|
*
|
|
* Does *not* convert other types to a string; requires that the JSON type of the element was
|
|
* an actual string.
|
|
*
|
|
* @return The string value.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a string.
|
|
*/
|
|
inline explicit operator const char*() const noexcept(false);
|
|
|
|
/**
|
|
* Read this element as a null-terminated string.
|
|
*
|
|
* Does *not* convert other types to a string; requires that the JSON type of the element was
|
|
* an actual string.
|
|
*
|
|
* @return The string value.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a string.
|
|
*/
|
|
inline operator std::string_view() const noexcept(false);
|
|
|
|
/**
|
|
* Read this element as an unsigned integer.
|
|
*
|
|
* @return The integer value.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an integer
|
|
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
|
|
*/
|
|
inline operator uint64_t() const noexcept(false);
|
|
/**
|
|
* Read this element as an signed integer.
|
|
*
|
|
* @return The integer value.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an integer
|
|
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits
|
|
*/
|
|
inline operator int64_t() const noexcept(false);
|
|
/**
|
|
* Read this element as an double.
|
|
*
|
|
* @return The double value.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not a number
|
|
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
|
|
*/
|
|
inline operator double() const noexcept(false);
|
|
/**
|
|
* Read this element as a JSON array.
|
|
*
|
|
* @return The JSON array.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an array
|
|
*/
|
|
inline operator document::array() const noexcept(false);
|
|
/**
|
|
* Read this element as a JSON object (key/value pairs).
|
|
*
|
|
* @return The JSON object.
|
|
* @exception simdjson_error(UNEXPECTED_TYPE) if the JSON element is not an object
|
|
*/
|
|
inline operator document::object() const noexcept(false);
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
inline element_result operator[](const std::string_view &s) const noexcept;
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* Note: The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
* - UNEXPECTED_TYPE if the document is not an object
|
|
*/
|
|
inline element_result operator[](const char *s) const noexcept;
|
|
|
|
private:
|
|
really_inline element(const document *_doc, size_t _json_index) noexcept;
|
|
friend class document;
|
|
friend class document::element_result;
|
|
template<typename T>
|
|
friend class minify;
|
|
};
|
|
|
|
/**
|
|
* Represents a JSON array.
|
|
*/
|
|
class document::array : protected internal::tape_ref {
|
|
public:
|
|
/** Create a new, invalid array */
|
|
really_inline array() noexcept;
|
|
|
|
class iterator : tape_ref {
|
|
public:
|
|
/**
|
|
* Get the actual value
|
|
*/
|
|
inline element operator*() const noexcept;
|
|
/**
|
|
* Get the next value.
|
|
*
|
|
* Part of the std::iterator interface.
|
|
*/
|
|
inline void operator++() noexcept;
|
|
/**
|
|
* Check if these values come from the same place in the JSON.
|
|
*
|
|
* Part of the std::iterator interface.
|
|
*/
|
|
inline bool operator!=(const iterator& other) const noexcept;
|
|
private:
|
|
really_inline iterator(const document *_doc, size_t _json_index) noexcept;
|
|
friend class array;
|
|
};
|
|
|
|
/**
|
|
* Return the first array element.
|
|
*
|
|
* Part of the std::iterable interface.
|
|
*/
|
|
inline iterator begin() const noexcept;
|
|
/**
|
|
* One past the last array element.
|
|
*
|
|
* Part of the std::iterable interface.
|
|
*/
|
|
inline iterator end() const noexcept;
|
|
|
|
private:
|
|
really_inline array(const document *_doc, size_t _json_index) noexcept;
|
|
friend class document::element;
|
|
friend class document::element_result;
|
|
template<typename T>
|
|
friend class minify;
|
|
};
|
|
|
|
/**
|
|
* Represents a JSON object.
|
|
*/
|
|
class document::object : protected internal::tape_ref {
|
|
public:
|
|
/** Create a new, invalid object */
|
|
really_inline object() noexcept;
|
|
|
|
class iterator : protected internal::tape_ref {
|
|
public:
|
|
/**
|
|
* Get the actual key/value pair
|
|
*/
|
|
inline const document::key_value_pair operator*() const noexcept;
|
|
/**
|
|
* Get the next key/value pair.
|
|
*
|
|
* Part of the std::iterator interface.
|
|
*/
|
|
inline void operator++() noexcept;
|
|
/**
|
|
* Check if these key value pairs come from the same place in the JSON.
|
|
*
|
|
* Part of the std::iterator interface.
|
|
*/
|
|
inline bool operator!=(const iterator& other) const noexcept;
|
|
/**
|
|
* Get the key of this key/value pair.
|
|
*/
|
|
inline std::string_view key() const noexcept;
|
|
/**
|
|
* Get the key of this key/value pair.
|
|
*/
|
|
inline const char *key_c_str() const noexcept;
|
|
/**
|
|
* Get the value of this key/value pair.
|
|
*/
|
|
inline element value() const noexcept;
|
|
private:
|
|
really_inline iterator(const document *_doc, size_t _json_index) noexcept;
|
|
friend class document::object;
|
|
};
|
|
|
|
/**
|
|
* Return the first key/value pair.
|
|
*
|
|
* Part of the std::iterable interface.
|
|
*/
|
|
inline iterator begin() const noexcept;
|
|
/**
|
|
* One past the last key/value pair.
|
|
*
|
|
* Part of the std::iterable interface.
|
|
*/
|
|
inline iterator end() const noexcept;
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
*/
|
|
inline element_result operator[](const std::string_view &s) const noexcept;
|
|
|
|
/**
|
|
* Get the value associated with the given key.
|
|
*
|
|
* Note: The key will be matched against **unescaped** JSON:
|
|
*
|
|
* document::parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
|
|
* document::parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
|
|
*
|
|
* @return The value associated with this field, or:
|
|
* - NO_SUCH_FIELD if the field does not exist in the object
|
|
*/
|
|
inline element_result operator[](const char *s) const noexcept;
|
|
|
|
private:
|
|
really_inline object(const document *_doc, size_t _json_index) noexcept;
|
|
friend class document::element;
|
|
friend class document::element_result;
|
|
template<typename T>
|
|
friend class minify;
|
|
};
|
|
|
|
/**
|
|
* Key/value pair in an object.
|
|
*/
|
|
class document::key_value_pair {
|
|
public:
|
|
std::string_view key;
|
|
document::element value;
|
|
|
|
private:
|
|
really_inline key_value_pair(std::string_view _key, document::element _value) noexcept;
|
|
friend class document::object;
|
|
};
|
|
|
|
|
|
/** The result of a JSON navigation that may fail. */
|
|
class document::element_result : public simdjson_result<document::element> {
|
|
public:
|
|
really_inline element_result(element value) noexcept;
|
|
really_inline element_result(error_code error) noexcept;
|
|
|
|
/** Whether this is a JSON `null` */
|
|
inline simdjson_result<bool> is_null() const noexcept;
|
|
inline simdjson_result<bool> as_bool() const noexcept;
|
|
inline simdjson_result<std::string_view> as_string() const noexcept;
|
|
inline simdjson_result<const char *> as_c_str() const noexcept;
|
|
inline simdjson_result<uint64_t> as_uint64_t() const noexcept;
|
|
inline simdjson_result<int64_t> as_int64_t() const noexcept;
|
|
inline simdjson_result<double> as_double() const noexcept;
|
|
inline array_result as_array() const noexcept;
|
|
inline object_result as_object() const noexcept;
|
|
|
|
inline element_result operator[](const std::string_view &s) const noexcept;
|
|
inline element_result operator[](const char *s) const noexcept;
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
inline operator bool() const noexcept(false);
|
|
inline explicit operator const char*() const noexcept(false);
|
|
inline operator std::string_view() const noexcept(false);
|
|
inline operator uint64_t() const noexcept(false);
|
|
inline operator int64_t() const noexcept(false);
|
|
inline operator double() const noexcept(false);
|
|
inline operator array() const noexcept(false);
|
|
inline operator object() const noexcept(false);
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
};
|
|
|
|
/** The result of a JSON conversion that may fail. */
|
|
class document::array_result : public simdjson_result<document::array> {
|
|
public:
|
|
really_inline array_result(array value) noexcept;
|
|
really_inline array_result(error_code error) noexcept;
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
inline array::iterator begin() const noexcept(false);
|
|
inline array::iterator end() const noexcept(false);
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
};
|
|
|
|
/** The result of a JSON conversion that may fail. */
|
|
class document::object_result : public simdjson_result<document::object> {
|
|
public:
|
|
really_inline object_result(object value) noexcept;
|
|
really_inline object_result(error_code error) noexcept;
|
|
|
|
inline element_result operator[](const std::string_view &s) const noexcept;
|
|
inline element_result operator[](const char *s) const noexcept;
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
inline object::iterator begin() const noexcept(false);
|
|
inline object::iterator end() const noexcept(false);
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
};
|
|
|
|
/**
|
|
* A persistent document parser.
|
|
*
|
|
* The parser is designed to be reused, holding the internal buffers necessary to do parsing,
|
|
* as well as memory for a single document. The parsed document is overwritten on each parse.
|
|
*
|
|
* This class cannot be copied, only moved, to avoid unintended allocations.
|
|
*
|
|
* @note This is not thread safe: one parser cannot produce two documents at the same time!
|
|
*/
|
|
class document::parser {
|
|
public:
|
|
/**
|
|
* Create a JSON parser.
|
|
*
|
|
* The new parser will have zero capacity.
|
|
*
|
|
* @param max_capacity The maximum document length the parser can automatically handle. The parser
|
|
* will allocate more capacity on an as needed basis (when it sees documents too big to handle)
|
|
* up to this amount. The parser still starts with zero capacity no matter what this number is:
|
|
* to allocate an initial capacity, call set_capacity() after constructing the parser. Defaults
|
|
* to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
|
|
* @param max_depth The maximum depth--number of nested objects and arrays--this parser can handle.
|
|
* This will not be allocated until parse() is called for the first time. Defaults to
|
|
* DEFAULT_MAX_DEPTH.
|
|
*/
|
|
really_inline parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
|
|
~parser()=default;
|
|
|
|
/**
|
|
* Take another parser's buffers and state.
|
|
*
|
|
* @param other The parser to take. Its capacity is zeroed.
|
|
*/
|
|
parser(document::parser &&other) = default;
|
|
parser(const document::parser &) = delete; // Disallow copying
|
|
/**
|
|
* Take another parser's buffers and state.
|
|
*
|
|
* @param other The parser to take. Its capacity is zeroed.
|
|
*/
|
|
parser &operator=(document::parser &&other) = default;
|
|
parser &operator=(const document::parser &) = delete; // Disallow copying
|
|
|
|
/**
|
|
* Load a JSON document from a file and return a reference to it.
|
|
*
|
|
* document::parser parser;
|
|
* const document &doc = parser.load("jsonexamples/twitter.json");
|
|
*
|
|
* ### IMPORTANT: Document Lifetime
|
|
*
|
|
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
|
* documents because it reuses the same buffers, but you *must* use the document before you
|
|
* destroy the parser or call parse() again.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than the file length, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param path The path to load.
|
|
* @return The document, or an error:
|
|
* - IO_ERROR if there was an error opening or reading the file.
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
|
|
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
inline doc_result load(const std::string& path) noexcept;
|
|
|
|
/**
|
|
* Load a file containing many JSON documents.
|
|
*
|
|
* document::parser parser;
|
|
* for (const document &doc : parser.parse_many(path)) {
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### Format
|
|
*
|
|
* The file must contain a series of one or more JSON documents, concatenated into a single
|
|
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
|
* then starts parsing the next document at that point. (It does this with more parallelism and
|
|
* lookahead than you might think, though.)
|
|
*
|
|
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
|
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
|
* arrays or objects) MUST be separated with whitespace.
|
|
*
|
|
* ### Error Handling
|
|
*
|
|
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
|
* it will be yielded as the first result. Iteration always stops after the first error.
|
|
*
|
|
* As with all other simdjson methods, non-exception error handling is readily available through
|
|
* the same interface, requiring you to check the error before using the document:
|
|
*
|
|
* document::parser parser;
|
|
* for (auto [doc, error] : parser.load_many(path)) {
|
|
* if (error) { cerr << error << endl; exit(1); }
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### Threads
|
|
*
|
|
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
|
* hood to do some lookahead.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
|
|
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
|
* spot is cache-related: small enough to fit in cache, yet big enough to
|
|
* parse as many documents as possible in one tight loop.
|
|
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
|
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
|
* will yield 0 documents rather than an EMPTY error. Errors:
|
|
* - IO_ERROR if there was an error opening or reading the file.
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
|
|
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
inline document::stream load_many(const std::string& path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document and return a temporary reference to it.
|
|
*
|
|
* document::parser parser;
|
|
* const document &doc = parser.parse(buf, len);
|
|
*
|
|
* ### IMPORTANT: Document Lifetime
|
|
*
|
|
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
|
* documents because it reuses the same buffers, but you *must* use the document before you
|
|
* destroy the parser or call parse() again.
|
|
*
|
|
* ### REQUIRED: Buffer Padding
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated.
|
|
*
|
|
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
|
|
* and it is copied into an enlarged temporary buffer before parsing.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than len, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
|
|
* realloc_if_needed is true.
|
|
* @param len The length of the JSON.
|
|
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
|
|
* @return The document, or an error:
|
|
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
|
|
* and memory allocation fails.
|
|
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
inline doc_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document and return a temporary reference to it.
|
|
*
|
|
* document::parser parser;
|
|
* const document &doc = parser.parse(buf, len);
|
|
*
|
|
* ### IMPORTANT: Document Lifetime
|
|
*
|
|
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
|
* documents because it reuses the same buffers, but you *must* use the document before you
|
|
* destroy the parser or call parse() again.
|
|
*
|
|
* ### REQUIRED: Buffer Padding
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated.
|
|
*
|
|
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
|
|
* and it is copied into an enlarged temporary buffer before parsing.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than len, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
|
|
* realloc_if_needed is true.
|
|
* @param len The length of the JSON.
|
|
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
|
|
* @return The document, or an error:
|
|
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
|
|
* and memory allocation fails.
|
|
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
really_inline doc_result parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document and return a temporary reference to it.
|
|
*
|
|
* document::parser parser;
|
|
* const document &doc = parser.parse(s);
|
|
*
|
|
* ### IMPORTANT: Document Lifetime
|
|
*
|
|
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
|
* documents because it reuses the same buffers, but you *must* use the document before you
|
|
* destroy the parser or call parse() again.
|
|
*
|
|
* ### REQUIRED: Buffer Padding
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated.
|
|
*
|
|
* If s.capacity() is less than SIMDJSON_PADDING, the string will be copied into an enlarged
|
|
* temporary buffer before parsing.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than len, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param s The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, or
|
|
* a new string will be created with the extra padding.
|
|
* @return The document, or an error:
|
|
* - MEMALLOC if the string does not have enough padding or the parser does not have
|
|
* enough capacity, and memory allocation fails.
|
|
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
really_inline doc_result parse(const std::string &s) noexcept;
|
|
|
|
/**
|
|
* Parse a JSON document and return a temporary reference to it.
|
|
*
|
|
* document::parser parser;
|
|
* const document &doc = parser.parse(s);
|
|
*
|
|
* ### IMPORTANT: Document Lifetime
|
|
*
|
|
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
|
* documents because it reuses the same buffers, but you *must* use the document before you
|
|
* destroy the parser or call parse() again.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param s The JSON to parse.
|
|
* @return The document, or an error:
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
|
|
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
really_inline doc_result parse(const padded_string &s) noexcept;
|
|
|
|
// We do not want to allow implicit conversion from C string to std::string.
|
|
really_inline doc_result parse(const char *buf) noexcept = delete;
|
|
|
|
/**
|
|
* Parse a buffer containing many JSON documents.
|
|
*
|
|
* document::parser parser;
|
|
* for (const document &doc : parser.parse_many(buf, len)) {
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### Format
|
|
*
|
|
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
|
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
|
* then starts parsing the next document at that point. (It does this with more parallelism and
|
|
* lookahead than you might think, though.)
|
|
*
|
|
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
|
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
|
* arrays or objects) MUST be separated with whitespace.
|
|
*
|
|
* ### Error Handling
|
|
*
|
|
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
|
* it will be yielded as the first result. Iteration always stops after the first error.
|
|
*
|
|
* As with all other simdjson methods, non-exception error handling is readily available through
|
|
* the same interface, requiring you to check the error before using the document:
|
|
*
|
|
* document::parser parser;
|
|
* for (auto [doc, error] : parser.parse_many(buf, len)) {
|
|
* if (error) { cerr << error << endl; exit(1); }
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### REQUIRED: Buffer Padding
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated.
|
|
*
|
|
* ### Threads
|
|
*
|
|
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
|
* hood to do some lookahead.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
|
|
* @param len The length of the concatenated JSON.
|
|
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
|
* spot is cache-related: small enough to fit in cache, yet big enough to
|
|
* parse as many documents as possible in one tight loop.
|
|
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
|
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
|
* will yield 0 documents rather than an EMPTY error. Errors:
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
|
|
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
|
* - other json errors if parsing fails.
|
|
*/
|
|
inline stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
|
|
|
/**
|
|
* Parse a buffer containing many JSON documents.
|
|
*
|
|
* document::parser parser;
|
|
* for (const document &doc : parser.parse_many(buf, len)) {
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### Format
|
|
*
|
|
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
|
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
|
* then starts parsing the next document at that point. (It does this with more parallelism and
|
|
* lookahead than you might think, though.)
|
|
*
|
|
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
|
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
|
* arrays or objects) MUST be separated with whitespace.
|
|
*
|
|
* ### Error Handling
|
|
*
|
|
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
|
* it will be yielded as the first result. Iteration always stops after the first error.
|
|
*
|
|
* As with all other simdjson methods, non-exception error handling is readily available through
|
|
* the same interface, requiring you to check the error before using the document:
|
|
*
|
|
* document::parser parser;
|
|
* for (auto [doc, error] : parser.parse_many(buf, len)) {
|
|
* if (error) { cerr << error << endl; exit(1); }
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### REQUIRED: Buffer Padding
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated.
|
|
*
|
|
* ### Threads
|
|
*
|
|
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
|
* hood to do some lookahead.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
|
|
* @param len The length of the concatenated JSON.
|
|
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
|
* spot is cache-related: small enough to fit in cache, yet big enough to
|
|
* parse as many documents as possible in one tight loop.
|
|
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
|
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
|
* will yield 0 documents rather than an EMPTY error. Errors:
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
|
|
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
|
* - other json errors if parsing fails
|
|
*/
|
|
inline stream parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
|
|
|
/**
|
|
* Parse a buffer containing many JSON documents.
|
|
*
|
|
* document::parser parser;
|
|
* for (const document &doc : parser.parse_many(buf, len)) {
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### Format
|
|
*
|
|
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
|
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
|
* then starts parsing the next document at that point. (It does this with more parallelism and
|
|
* lookahead than you might think, though.)
|
|
*
|
|
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
|
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
|
* arrays or objects) MUST be separated with whitespace.
|
|
*
|
|
* ### Error Handling
|
|
*
|
|
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
|
* it will be yielded as the first result. Iteration always stops after the first error.
|
|
*
|
|
* As with all other simdjson methods, non-exception error handling is readily available through
|
|
* the same interface, requiring you to check the error before using the document:
|
|
*
|
|
* document::parser parser;
|
|
* for (auto [doc, error] : parser.parse_many(buf, len)) {
|
|
* if (error) { cerr << error << endl; exit(1); }
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### REQUIRED: Buffer Padding
|
|
*
|
|
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
|
* those bytes are initialized to, as long as they are allocated.
|
|
*
|
|
* ### Threads
|
|
*
|
|
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
|
* hood to do some lookahead.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
|
|
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
|
* spot is cache-related: small enough to fit in cache, yet big enough to
|
|
* parse as many documents as possible in one tight loop.
|
|
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
|
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
|
* will yield 0 documents rather than an EMPTY error. Errors:
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
|
|
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
|
* - other json errors if parsing fails
|
|
*/
|
|
inline stream parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
|
|
|
/**
|
|
* Parse a buffer containing many JSON documents.
|
|
*
|
|
* document::parser parser;
|
|
* for (const document &doc : parser.parse_many(buf, len)) {
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### Format
|
|
*
|
|
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
|
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
|
|
* then starts parsing the next document at that point. (It does this with more parallelism and
|
|
* lookahead than you might think, though.)
|
|
*
|
|
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
|
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
|
* arrays or objects) MUST be separated with whitespace.
|
|
*
|
|
* ### Error Handling
|
|
*
|
|
* All errors are returned during iteration: if there is a global error such as memory allocation,
|
|
* it will be yielded as the first result. Iteration always stops after the first error.
|
|
*
|
|
* As with all other simdjson methods, non-exception error handling is readily available through
|
|
* the same interface, requiring you to check the error before using the document:
|
|
*
|
|
* document::parser parser;
|
|
* for (auto [doc, error] : parser.parse_many(buf, len)) {
|
|
* if (error) { cerr << error << endl; exit(1); }
|
|
* cout << std::string(doc["title"]) << endl;
|
|
* }
|
|
*
|
|
* ### Threads
|
|
*
|
|
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
|
|
* hood to do some lookahead.
|
|
*
|
|
* ### Parser Capacity
|
|
*
|
|
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
|
* to handle it (up to max_capacity).
|
|
*
|
|
* @param s The concatenated JSON to parse.
|
|
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
|
* spot is cache-related: small enough to fit in cache, yet big enough to
|
|
* parse as many documents as possible in one tight loop.
|
|
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
|
|
* @return The stream. If there is an error, it will be returned during iteration. An empty input
|
|
* will yield 0 documents rather than an EMPTY error. Errors:
|
|
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
|
|
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
|
|
* - other json errors if parsing fails
|
|
*/
|
|
inline stream parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
|
|
|
// We do not want to allow implicit conversion from C string to std::string.
|
|
really_inline doc_result parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;
|
|
|
|
/**
|
|
* The largest document this parser can automatically support.
|
|
*
|
|
* The parser may reallocate internal buffers as needed up to this amount.
|
|
*
|
|
* @return Maximum capacity, in bytes.
|
|
*/
|
|
really_inline size_t max_capacity() const noexcept;
|
|
|
|
/**
|
|
* The largest document this parser can support without reallocating.
|
|
*
|
|
* @return Current capacity, in bytes.
|
|
*/
|
|
really_inline size_t capacity() const noexcept;
|
|
|
|
/**
|
|
* The maximum level of nested object and arrays supported by this parser.
|
|
*
|
|
* @return Maximum depth, in bytes.
|
|
*/
|
|
really_inline size_t max_depth() const noexcept;
|
|
|
|
/**
|
|
* Set max_capacity. This is the largest document this parser can automatically support.
|
|
*
|
|
* The parser may reallocate internal buffers as needed up to this amount.
|
|
*
|
|
* This call will not allocate or deallocate, even if capacity is currently above max_capacity.
|
|
*
|
|
* @param max_capacity The new maximum capacity, in bytes.
|
|
*/
|
|
really_inline void set_max_capacity(size_t max_capacity) noexcept;
|
|
|
|
/**
|
|
* Set capacity. This is the largest document this parser can support without reallocating.
|
|
*
|
|
* This will allocate or deallocate as necessary.
|
|
*
|
|
* @param capacity The new capacity, in bytes.
|
|
*
|
|
* @return MEMALLOC if unsuccessful, SUCCESS otherwise.
|
|
*/
|
|
WARN_UNUSED inline error_code set_capacity(size_t capacity) noexcept;
|
|
|
|
/**
|
|
* Set the maximum level of nested object and arrays supported by this parser.
|
|
*
|
|
* This will allocate or deallocate as necessary.
|
|
*
|
|
* @param max_depth The new maximum depth, in bytes.
|
|
*
|
|
* @return MEMALLOC if unsuccessful, SUCCESS otherwise.
|
|
*/
|
|
WARN_UNUSED inline error_code set_max_depth(size_t max_depth) noexcept;
|
|
|
|
/**
|
|
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
|
|
* and `max_depth` depth.
|
|
*
|
|
* Equivalent to calling set_capacity() and set_max_depth().
|
|
*
|
|
* @param capacity The new capacity.
|
|
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
|
|
* @return true if successful, false if allocation failed.
|
|
*/
|
|
WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
|
|
|
|
// type aliases for backcompat
|
|
using Iterator = document::iterator;
|
|
using InvalidJSON = simdjson_error;
|
|
|
|
// Next location to write to in the tape
|
|
uint32_t current_loc{0};
|
|
|
|
// structural indices passed from stage 1 to stage 2
|
|
uint32_t n_structural_indexes{0};
|
|
std::unique_ptr<uint32_t[]> structural_indexes;
|
|
|
|
// location and return address of each open { or [
|
|
std::unique_ptr<uint32_t[]> containing_scope_offset;
|
|
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
|
std::unique_ptr<void*[]> ret_address;
|
|
#else
|
|
std::unique_ptr<char[]> ret_address;
|
|
#endif
|
|
|
|
// Next place to write a string
|
|
uint8_t *current_string_buf_loc;
|
|
|
|
bool valid{false};
|
|
error_code error{UNINITIALIZED};
|
|
|
|
// Document we're writing to
|
|
document doc;
|
|
|
|
//
|
|
// TODO these are deprecated; use the results of parse instead.
|
|
//
|
|
|
|
// returns true if the document parsed was valid
|
|
inline bool is_valid() const noexcept;
|
|
|
|
// return an error code corresponding to the last parsing attempt, see
|
|
// simdjson.h will return UNITIALIZED if no parsing was attempted
|
|
inline int get_error_code() const noexcept;
|
|
|
|
// return the string equivalent of "get_error_code"
|
|
inline std::string get_error_message() const noexcept;
|
|
|
|
// print the json to std::ostream (should be valid)
|
|
// return false if the tape is likely wrong (e.g., you did not parse a valid
|
|
// JSON).
|
|
inline bool print_json(std::ostream &os) const noexcept;
|
|
inline bool dump_raw_tape(std::ostream &os) const noexcept;
|
|
|
|
//
|
|
// Parser callbacks: these are internal!
|
|
//
|
|
// TODO find a way to do this without exposing the interface or crippling performance
|
|
//
|
|
|
|
// this should be called when parsing (right before writing the tapes)
|
|
inline void init_stage2() noexcept;
|
|
really_inline error_code on_error(error_code new_error_code) noexcept;
|
|
really_inline error_code on_success(error_code success_code) noexcept;
|
|
really_inline bool on_start_document(uint32_t depth) noexcept;
|
|
really_inline bool on_start_object(uint32_t depth) noexcept;
|
|
really_inline bool on_start_array(uint32_t depth) noexcept;
|
|
// TODO we're not checking this bool
|
|
really_inline bool on_end_document(uint32_t depth) noexcept;
|
|
really_inline bool on_end_object(uint32_t depth) noexcept;
|
|
really_inline bool on_end_array(uint32_t depth) noexcept;
|
|
really_inline bool on_true_atom() noexcept;
|
|
really_inline bool on_false_atom() noexcept;
|
|
really_inline bool on_null_atom() noexcept;
|
|
really_inline uint8_t *on_start_string() noexcept;
|
|
really_inline bool on_end_string(uint8_t *dst) noexcept;
|
|
really_inline bool on_number_s64(int64_t value) noexcept;
|
|
really_inline bool on_number_u64(uint64_t value) noexcept;
|
|
really_inline bool on_number_double(double value) noexcept;
|
|
|
|
private:
|
|
//
|
|
// The maximum document length this parser supports.
|
|
//
|
|
// Buffers are large enough to handle any document up to this length.
|
|
//
|
|
size_t _capacity{0};
|
|
|
|
//
|
|
// The maximum document length this parser will automatically support.
|
|
//
|
|
// The parser will not be automatically allocated above this amount.
|
|
//
|
|
size_t _max_capacity;
|
|
|
|
//
|
|
// The maximum depth (number of nested objects and arrays) supported by this parser.
|
|
//
|
|
// Defaults to DEFAULT_MAX_DEPTH.
|
|
//
|
|
size_t _max_depth;
|
|
|
|
// all nodes are stored on the doc.tape using a 64-bit word.
|
|
//
|
|
// strings, double and ints are stored as
|
|
// a 64-bit word with a pointer to the actual value
|
|
//
|
|
//
|
|
//
|
|
// for objects or arrays, store [ or { at the beginning and } and ] at the
|
|
// end. For the openings ([ or {), we annotate them with a reference to the
|
|
// location on the doc.tape of the end, and for then closings (} and ]), we
|
|
// annotate them with a reference to the location of the opening
|
|
//
|
|
//
|
|
|
|
inline void write_tape(uint64_t val, internal::tape_type t) noexcept;
|
|
inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) noexcept;
|
|
|
|
// Ensure we have enough capacity to handle at least desired_capacity bytes,
|
|
// and auto-allocate if not.
|
|
inline error_code ensure_capacity(size_t desired_capacity) noexcept;
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
// Used internally to get the document
|
|
inline const document &get_document() const noexcept(false);
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
template<size_t max_depth> friend class document_iterator;
|
|
friend class document::stream;
|
|
}; // class parser
|
|
|
|
/**
|
|
* Minifies a JSON element or document, printing the smallest possible valid JSON.
|
|
*
|
|
* document doc = document::parse(" [ 1 , 2 , 3 ] "_pad);
|
|
* cout << minify(doc) << endl; // prints [1,2,3]
|
|
*
|
|
*/
|
|
template<typename T>
|
|
class minify {
|
|
public:
|
|
/**
|
|
* Create a new minifier.
|
|
*
|
|
* @param _value The document or element to minify.
|
|
*/
|
|
inline minify(const T &_value) noexcept : value{_value} {}
|
|
|
|
/**
|
|
* Minify JSON to a string.
|
|
*/
|
|
inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
|
|
|
|
/**
|
|
* Minify JSON to an output stream.
|
|
*/
|
|
inline std::ostream& print(std::ostream& out);
|
|
private:
|
|
const T &value;
|
|
};
|
|
|
|
/**
|
|
* Minify JSON to an output stream.
|
|
*
|
|
* @param out The output stream.
|
|
* @param formatter The minifier.
|
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
*/
|
|
template<typename T>
|
|
inline std::ostream& operator<<(std::ostream& out, minify<T> formatter) { return formatter.print(out); }
|
|
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the document will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The document to print.
|
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document &value) { return out << minify(value); }
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::element &value) { return out << minify(value); };
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::array &value) { return out << minify(value); }
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::object &value) { return out << minify(value); }
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::key_value_pair &value) { return out << minify(value); }
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
* thrown).
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::doc_move_result &value) noexcept(false) { return out << minify(value); }
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
* thrown).
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::doc_result &value) noexcept(false) { return out << minify(value); }
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
* thrown).
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::element_result &value) noexcept(false) { return out << minify(value); }
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
* thrown).
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::array_result &value) noexcept(false) { return out << minify(value); }
|
|
/**
|
|
* Print JSON to an output stream.
|
|
*
|
|
* By default, the value will be printed minified.
|
|
*
|
|
* @param out The output stream.
|
|
* @param value The value to print.
|
|
* @throw simdjson_error if the result being printed has an error. If there is an error with the
|
|
* underlying output stream, that error will be propagated (simdjson_error will not be
|
|
* thrown).
|
|
*/
|
|
inline std::ostream& operator<<(std::ostream& out, const document::object_result &value) noexcept(false) { return out << minify(value); }
|
|
|
|
#endif
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_DOCUMENT_H
|
|
/* end file include/simdjson/simdjson.h */
|
|
|
|
namespace simdjson {
|
|
|
|
/**
|
|
* An implementation of simdjson for a particular CPU architecture.
|
|
*
|
|
* Also used to maintain the currently active implementation. The active implementation is
|
|
* automatically initialized on first use to the most advanced implementation supported by the host.
|
|
*/
|
|
class implementation {
|
|
public:
|
|
/**
|
|
* The name of this implementation.
|
|
*
|
|
* const implementation *impl = simdjson::active_implementation;
|
|
* cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
|
|
*
|
|
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
|
|
*/
|
|
virtual const std::string &name() const { return _name; }
|
|
|
|
/**
|
|
* The description of this implementation.
|
|
*
|
|
* const implementation *impl = simdjson::active_implementation;
|
|
* cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
|
|
*
|
|
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
|
|
*/
|
|
virtual const std::string &description() const { return _description; }
|
|
|
|
/**
|
|
* The instruction sets this implementation is compiled against.
|
|
*
|
|
* @return a mask of all required `instruction_set` values
|
|
*/
|
|
virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; };
|
|
|
|
/**
|
|
* Run a full document parse (ensure_capacity, stage1 and stage2).
|
|
*
|
|
* Overridden by each implementation.
|
|
*
|
|
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
|
* @param len the length of the json document.
|
|
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
* @return the error code, or SUCCESS if there was no error.
|
|
*/
|
|
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0;
|
|
|
|
/**
|
|
* Stage 1 of the document parser.
|
|
*
|
|
* Overridden by each implementation.
|
|
*
|
|
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
|
* @param len the length of the json document.
|
|
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
* @param streaming whether this is being called by document::parser::parse_many.
|
|
* @return the error code, or SUCCESS if there was no error.
|
|
*/
|
|
WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept = 0;
|
|
|
|
/**
|
|
* Stage 2 of the document parser.
|
|
*
|
|
* Overridden by each implementation.
|
|
*
|
|
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
|
* @param len the length of the json document.
|
|
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
* @return the error code, or SUCCESS if there was no error.
|
|
*/
|
|
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0;
|
|
|
|
/**
|
|
* Stage 2 of the document parser for document::parser::parse_many.
|
|
*
|
|
* Overridden by each implementation.
|
|
*
|
|
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
|
|
* @param len the length of the json document.
|
|
* @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity.
|
|
* @param next_json the next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time.
|
|
* @return the error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again.
|
|
*/
|
|
WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept = 0;
|
|
|
|
protected:
|
|
really_inline implementation(
|
|
const std::string &name,
|
|
const std::string &description,
|
|
uint32_t required_instruction_sets
|
|
) :
|
|
_name(name),
|
|
_description(description),
|
|
_required_instruction_sets(required_instruction_sets)
|
|
{
|
|
}
|
|
|
|
private:
|
|
/**
|
|
* The name of this implementation.
|
|
*/
|
|
const std::string _name;
|
|
|
|
/**
|
|
* The description of this implementation.
|
|
*/
|
|
const std::string _description;
|
|
|
|
/**
|
|
* Instruction sets required for this implementation.
|
|
*/
|
|
const uint32_t _required_instruction_sets;
|
|
};
|
|
|
|
namespace internal {
|
|
|
|
/**
|
|
* The list of available implementations compiled into simdjson.
|
|
*/
|
|
class available_implementation_list {
|
|
public:
|
|
/** Get the list of available implementations compiled into simdjson */
|
|
really_inline available_implementation_list() {}
|
|
/** Number of implementations */
|
|
size_t size() const noexcept;
|
|
/** STL const begin() iterator */
|
|
const implementation * const *begin() const noexcept;
|
|
/** STL const end() iterator */
|
|
const implementation * const *end() const noexcept;
|
|
|
|
/**
|
|
* Get the implementation with the given name.
|
|
*
|
|
* Case sensitive.
|
|
*
|
|
* const implementation *impl = simdjson::available_implementations["westmere"];
|
|
* if (!impl) { exit(1); }
|
|
* simdjson::active_implementation = impl;
|
|
*
|
|
* @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
|
|
* @return the implementation, or nullptr if the parse failed.
|
|
*/
|
|
const implementation * operator[](const std::string& name) const noexcept {
|
|
for (const implementation * impl : *this) {
|
|
if (impl->name() == name) { return impl; }
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
/**
|
|
* Detect the most advanced implementation supported by the current host.
|
|
*
|
|
* This is used to initialize the implementation on startup.
|
|
*
|
|
* const implementation *impl = simdjson::available_implementation::detect_best_supported();
|
|
* simdjson::active_implementation = impl;
|
|
*
|
|
* @return the most advanced supported implementation for the current host, or an
|
|
* implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
|
|
* implementation. Will never return nullptr.
|
|
*/
|
|
const implementation *detect_best_supported() const noexcept;
|
|
};
|
|
|
|
// Detects best supported implementation on first use, and sets it
|
|
class detect_best_supported_implementation_on_first_use final : public implementation {
|
|
public:
|
|
const std::string& name() const noexcept final { return set_best()->name(); }
|
|
const std::string& description() const noexcept final { return set_best()->description(); }
|
|
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
|
|
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final {
|
|
return set_best()->parse(buf, len, parser);
|
|
}
|
|
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final {
|
|
return set_best()->stage1(buf, len, parser, streaming);
|
|
}
|
|
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final {
|
|
return set_best()->stage2(buf, len, parser);
|
|
}
|
|
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final {
|
|
return set_best()->stage2(buf, len, parser, next_json);
|
|
}
|
|
|
|
really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
|
|
private:
|
|
const implementation *set_best() const noexcept;
|
|
};
|
|
|
|
inline const detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton;
|
|
|
|
template<typename T>
|
|
class atomic_ptr {
|
|
public:
|
|
atomic_ptr(T *_ptr) : ptr{_ptr} {}
|
|
|
|
operator const T*() const { return ptr.load(); }
|
|
const T& operator*() const { return *ptr; }
|
|
const T* operator->() const { return ptr.load(); }
|
|
|
|
operator T*() { return ptr.load(); }
|
|
T& operator*() { return *ptr; }
|
|
T* operator->() { return ptr.load(); }
|
|
T* operator=(T *_ptr) { return ptr = _ptr; }
|
|
|
|
private:
|
|
std::atomic<T*> ptr;
|
|
};
|
|
|
|
} // namespace [simdjson::]internal
|
|
|
|
/**
|
|
* The list of available implementations compiled into simdjson.
|
|
*/
|
|
inline const internal::available_implementation_list available_implementations;
|
|
|
|
/**
|
|
* The active implementation.
|
|
*
|
|
* Automatically initialized on first use to the most advanced implementation supported by this hardware.
|
|
*/
|
|
inline internal::atomic_ptr<const implementation> active_implementation = &internal::detect_best_supported_implementation_on_first_use_singleton;
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_IMPLEMENTATION_H
|
|
/* end file include/simdjson/simdjson.h */
|
|
/* begin file include/simdjson/document_stream.h */
|
|
#ifndef SIMDJSON_DOCUMENT_STREAM_H
|
|
#define SIMDJSON_DOCUMENT_STREAM_H
|
|
|
|
#include <thread>
|
|
|
|
namespace simdjson {
|
|
|
|
template <class string_container = padded_string> class JsonStream;
|
|
|
|
/**
|
|
* A forward-only stream of documents.
|
|
*
|
|
* Produced by document::parser::parse_many.
|
|
*
|
|
*/
|
|
class document::stream {
|
|
public:
|
|
really_inline ~stream() noexcept;
|
|
|
|
/**
|
|
* An iterator through a forward-only stream of documents.
|
|
*/
|
|
class iterator {
|
|
public:
|
|
/**
|
|
* Get the current document (or error).
|
|
*/
|
|
really_inline doc_result operator*() noexcept;
|
|
/**
|
|
* Advance to the next document.
|
|
*/
|
|
inline iterator& operator++() noexcept;
|
|
/**
|
|
* Check if we're at the end yet.
|
|
* @param other the end iterator to compare to.
|
|
*/
|
|
really_inline bool operator!=(const iterator &other) const noexcept;
|
|
|
|
private:
|
|
iterator(stream& stream, bool finished) noexcept;
|
|
/** The stream parser we're iterating through. */
|
|
stream& _stream;
|
|
/** Whether we're finished or not. */
|
|
bool finished;
|
|
friend class stream;
|
|
};
|
|
|
|
/**
|
|
* Start iterating the documents in the stream.
|
|
*/
|
|
really_inline iterator begin() noexcept;
|
|
/**
|
|
* The end of the stream, for iterator comparison purposes.
|
|
*/
|
|
really_inline iterator end() noexcept;
|
|
|
|
private:
|
|
|
|
stream &operator=(const document::stream &) = delete; // Disallow copying
|
|
|
|
stream(document::stream &other) = delete; // Disallow copying
|
|
|
|
really_inline stream(document::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, error_code error = SUCCESS) noexcept;
|
|
|
|
/**
|
|
* Parse the next document found in the buffer previously given to stream.
|
|
*
|
|
* The content should be a valid JSON document encoded as UTF-8. If there is a
|
|
* UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
|
* discouraged.
|
|
*
|
|
* You do NOT need to pre-allocate a parser. This function takes care of
|
|
* pre-allocating a capacity defined by the batch_size defined when creating the
|
|
* stream object.
|
|
*
|
|
* The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in case
|
|
* of success and indicates that the buffer still contains more data to be parsed,
|
|
* meaning this function can be called again to return the next JSON document
|
|
* after this one.
|
|
*
|
|
* The function returns simdjson::SUCCESS (as integer = 0) in case of success
|
|
* and indicates that the buffer has successfully been parsed to the end.
|
|
* Every document it contained has been parsed without error.
|
|
*
|
|
* The function returns an error code from simdjson/simdjson.h in case of failure
|
|
* such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
|
|
* the simdjson::error_message function converts these error codes into a string).
|
|
*
|
|
* You can also check validity by calling parser.is_valid(). The same parser can
|
|
* and should be reused for the other documents in the buffer. */
|
|
inline error_code json_parse() noexcept;
|
|
|
|
/**
|
|
* Returns the location (index) of where the next document should be in the
|
|
* buffer.
|
|
* Can be used for debugging, it tells the user the position of the end of the
|
|
* last
|
|
* valid JSON document parsed
|
|
*/
|
|
inline size_t get_current_buffer_loc() const { return current_buffer_loc; }
|
|
|
|
/**
|
|
* Returns the total amount of complete documents parsed by the stream,
|
|
* in the current buffer, at the given time.
|
|
*/
|
|
inline size_t get_n_parsed_docs() const { return n_parsed_docs; }
|
|
|
|
/**
|
|
* Returns the total amount of data (in bytes) parsed by the stream,
|
|
* in the current buffer, at the given time.
|
|
*/
|
|
inline size_t get_n_bytes_parsed() const { return n_bytes_parsed; }
|
|
|
|
inline const uint8_t *buf() const { return _buf + buf_start; }
|
|
|
|
inline void advance(size_t offset) { buf_start += offset; }
|
|
|
|
inline size_t remaining() const { return _len - buf_start; }
|
|
|
|
document::parser &parser;
|
|
const uint8_t *_buf;
|
|
const size_t _len;
|
|
size_t _batch_size; // this is actually variable!
|
|
size_t buf_start{0};
|
|
size_t next_json{0};
|
|
bool load_next_batch{true};
|
|
size_t current_buffer_loc{0};
|
|
#ifdef SIMDJSON_THREADS_ENABLED
|
|
size_t last_json_buffer_loc{0};
|
|
#endif
|
|
size_t n_parsed_docs{0};
|
|
size_t n_bytes_parsed{0};
|
|
error_code error{SUCCESS_AND_HAS_MORE};
|
|
#ifdef SIMDJSON_THREADS_ENABLED
|
|
error_code stage1_is_ok_thread{SUCCESS};
|
|
std::thread stage_1_thread;
|
|
document::parser parser_thread;
|
|
#endif
|
|
template <class string_container> friend class JsonStream;
|
|
friend class document::parser;
|
|
}; // class document::stream
|
|
|
|
} // end of namespace simdjson
|
|
#endif // SIMDJSON_DOCUMENT_STREAM_H
|
|
/* end file include/simdjson/document_stream.h */
|
|
/* begin file include/simdjson/jsonminifier.h */
|
|
#ifndef SIMDJSON_JSONMINIFIER_H
|
|
#define SIMDJSON_JSONMINIFIER_H
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <string_view>
|
|
|
|
namespace simdjson {
|
|
|
|
// Take input from buf and remove useless whitespace, write it to out; buf and
|
|
// out can be the same pointer. Result is null terminated,
|
|
// return the string length (minus the null termination).
|
|
// The accelerated version of this function only runs on AVX2 hardware.
|
|
size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out);
|
|
|
|
static inline size_t json_minify(const char *buf, size_t len, char *out) {
|
|
return json_minify(reinterpret_cast<const uint8_t *>(buf), len,
|
|
reinterpret_cast<uint8_t *>(out));
|
|
}
|
|
|
|
static inline size_t json_minify(const std::string_view &p, char *out) {
|
|
return json_minify(p.data(), p.size(), out);
|
|
}
|
|
|
|
static inline size_t json_minify(const padded_string &p, char *out) {
|
|
return json_minify(p.data(), p.size(), out);
|
|
}
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_JSONMINIFIER_H
|
|
/* end file include/simdjson/jsonminifier.h */
|
|
|
|
// Deprecated API
|
|
/* begin file include/simdjson/parsedjsoniterator.h */
|
|
// TODO Remove this -- deprecated API and files
|
|
|
|
#ifndef SIMDJSON_PARSEDJSONITERATOR_H
|
|
#define SIMDJSON_PARSEDJSONITERATOR_H
|
|
|
|
/* begin file include/simdjson/document_iterator.h */
|
|
#ifndef SIMDJSON_DOCUMENT_ITERATOR_H
|
|
#define SIMDJSON_DOCUMENT_ITERATOR_H
|
|
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
#include <limits>
|
|
#include <stdexcept>
|
|
|
|
/* begin file include/simdjson/internal/jsonformatutils.h */
|
|
#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H
|
|
#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H
|
|
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
|
|
namespace simdjson::internal {
|
|
|
|
class escape_json_string;
|
|
|
|
inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str);
|
|
|
|
class escape_json_string {
|
|
public:
|
|
escape_json_string(std::string_view _str) noexcept : str{_str} {}
|
|
operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
|
|
private:
|
|
std::string_view str;
|
|
friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped);
|
|
};
|
|
|
|
inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) {
|
|
for (size_t i=0; i<unescaped.str.length(); i++) {
|
|
switch (unescaped.str[i]) {
|
|
case '\b':
|
|
out << "\\b";
|
|
break;
|
|
case '\f':
|
|
out << "\\f";
|
|
break;
|
|
case '\n':
|
|
out << "\\n";
|
|
break;
|
|
case '\r':
|
|
out << "\\r";
|
|
break;
|
|
case '\"':
|
|
out << "\\\"";
|
|
break;
|
|
case '\t':
|
|
out << "\\t";
|
|
break;
|
|
case '\\':
|
|
out << "\\\\";
|
|
break;
|
|
default:
|
|
if ((unsigned char)unescaped.str[i] <= 0x1F) {
|
|
// TODO can this be done once at the beginning, or will it mess up << char?
|
|
std::ios::fmtflags f(out.flags());
|
|
out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(unescaped.str[i]);
|
|
out.flags(f);
|
|
} else {
|
|
out << unescaped.str[i];
|
|
}
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
} // namespace simdjson::internal
|
|
|
|
#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H
|
|
/* end file include/simdjson/internal/jsonformatutils.h */
|
|
|
|
namespace simdjson {
|
|
|
|
template <size_t max_depth> class document_iterator {
|
|
public:
|
|
#if SIMDJSON_EXCEPTIONS
|
|
document_iterator(const document::parser &parser) noexcept(false);
|
|
#endif
|
|
document_iterator(const document &doc) noexcept;
|
|
document_iterator(const document_iterator &o) noexcept;
|
|
document_iterator &operator=(const document_iterator &o) noexcept;
|
|
|
|
inline bool is_ok() const;
|
|
|
|
// useful for debugging purposes
|
|
inline size_t get_tape_location() const;
|
|
|
|
// useful for debugging purposes
|
|
inline size_t get_tape_length() const;
|
|
|
|
// returns the current depth (start at 1 with 0 reserved for the fictitious
|
|
// root node)
|
|
inline size_t get_depth() const;
|
|
|
|
// A scope is a series of nodes at the same depth, typically it is either an
|
|
// object ({) or an array ([). The root node has type 'r'.
|
|
inline uint8_t get_scope_type() const;
|
|
|
|
// move forward in document order
|
|
inline bool move_forward();
|
|
|
|
// retrieve the character code of what we're looking at:
|
|
// [{"slutfn are the possibilities
|
|
inline uint8_t get_type() const {
|
|
return current_type; // short functions should be inlined!
|
|
}
|
|
|
|
// get the int64_t value at this node; valid only if get_type is "l"
|
|
inline int64_t get_integer() const {
|
|
if (location + 1 >= tape_length) {
|
|
return 0; // default value in case of error
|
|
}
|
|
return static_cast<int64_t>(doc.tape[location + 1]);
|
|
}
|
|
|
|
// get the value as uint64; valid only if if get_type is "u"
|
|
inline uint64_t get_unsigned_integer() const {
|
|
if (location + 1 >= tape_length) {
|
|
return 0; // default value in case of error
|
|
}
|
|
return doc.tape[location + 1];
|
|
}
|
|
|
|
// get the string value at this node (NULL ended); valid only if get_type is "
|
|
// note that tabs, and line endings are escaped in the returned value (see
|
|
// print_with_escapes) return value is valid UTF-8, it may contain NULL chars
|
|
// within the string: get_string_length determines the true string length.
|
|
inline const char *get_string() const {
|
|
return reinterpret_cast<const char *>(
|
|
doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t));
|
|
}
|
|
|
|
// return the length of the string in bytes
|
|
inline uint32_t get_string_length() const {
|
|
uint32_t answer;
|
|
memcpy(&answer,
|
|
reinterpret_cast<const char *>(doc.string_buf.get() +
|
|
(current_val & internal::JSON_VALUE_MASK)),
|
|
sizeof(uint32_t));
|
|
return answer;
|
|
}
|
|
|
|
// get the double value at this node; valid only if
|
|
// get_type() is "d"
|
|
inline double get_double() const {
|
|
if (location + 1 >= tape_length) {
|
|
return std::numeric_limits<double>::quiet_NaN(); // default value in
|
|
// case of error
|
|
}
|
|
double answer;
|
|
memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
|
|
return answer;
|
|
}
|
|
|
|
inline bool is_object_or_array() const { return is_object() || is_array(); }
|
|
|
|
inline bool is_object() const { return get_type() == '{'; }
|
|
|
|
inline bool is_array() const { return get_type() == '['; }
|
|
|
|
inline bool is_string() const { return get_type() == '"'; }
|
|
|
|
// Returns true if the current type of node is an signed integer.
|
|
// You can get its value with `get_integer()`.
|
|
inline bool is_integer() const { return get_type() == 'l'; }
|
|
|
|
// Returns true if the current type of node is an unsigned integer.
|
|
// You can get its value with `get_unsigned_integer()`.
|
|
//
|
|
// NOTE:
|
|
// Only a large value, which is out of range of a 64-bit signed integer, is
|
|
// represented internally as an unsigned node. On the other hand, a typical
|
|
// positive integer, such as 1, 42, or 1000000, is as a signed node.
|
|
// Be aware this function returns false for a signed node.
|
|
inline bool is_unsigned_integer() const { return get_type() == 'u'; }
|
|
|
|
inline bool is_double() const { return get_type() == 'd'; }
|
|
|
|
inline bool is_number() const {
|
|
return is_integer() || is_unsigned_integer() || is_double();
|
|
}
|
|
|
|
inline bool is_true() const { return get_type() == 't'; }
|
|
|
|
inline bool is_false() const { return get_type() == 'f'; }
|
|
|
|
inline bool is_null() const { return get_type() == 'n'; }
|
|
|
|
static bool is_object_or_array(uint8_t type) {
|
|
return ((type == '[') || (type == '{'));
|
|
}
|
|
|
|
// when at {, go one level deep, looking for a given key
|
|
// if successful, we are left pointing at the value,
|
|
// if not, we are still pointing at the object ({)
|
|
// (in case of repeated keys, this only finds the first one).
|
|
// We seek the key using C's strcmp so if your JSON strings contain
|
|
// NULL chars, this would trigger a false positive: if you expect that
|
|
// to be the case, take extra precautions.
|
|
// Furthermore, we do the comparison character-by-character
|
|
// without taking into account Unicode equivalence.
|
|
inline bool move_to_key(const char *key);
|
|
|
|
// as above, but case insensitive lookup (strcmpi instead of strcmp)
|
|
inline bool move_to_key_insensitive(const char *key);
|
|
|
|
// when at {, go one level deep, looking for a given key
|
|
// if successful, we are left pointing at the value,
|
|
// if not, we are still pointing at the object ({)
|
|
// (in case of repeated keys, this only finds the first one).
|
|
// The string we search for can contain NULL values.
|
|
// Furthermore, we do the comparison character-by-character
|
|
// without taking into account Unicode equivalence.
|
|
inline bool move_to_key(const char *key, uint32_t length);
|
|
|
|
// when at a key location within an object, this moves to the accompanying
|
|
// value (located next to it). This is equivalent but much faster than
|
|
// calling "next()".
|
|
inline void move_to_value();
|
|
|
|
// when at [, go one level deep, and advance to the given index.
|
|
// if successful, we are left pointing at the value,
|
|
// if not, we are still pointing at the array ([)
|
|
inline bool move_to_index(uint32_t index);
|
|
|
|
// Moves the iterator to the value corresponding to the json pointer.
|
|
// Always search from the root of the document.
|
|
// if successful, we are left pointing at the value,
|
|
// if not, we are still pointing the same value we were pointing before the
|
|
// call. The json pointer follows the rfc6901 standard's syntax:
|
|
// https://tools.ietf.org/html/rfc6901 However, the standard says "If a
|
|
// referenced member name is not unique in an object, the member that is
|
|
// referenced is undefined, and evaluation fails". Here we just return the
|
|
// first corresponding value. The length parameter is the length of the
|
|
// jsonpointer string ('pointer').
|
|
bool move_to(const char *pointer, uint32_t length);
|
|
|
|
// Moves the iterator to the value corresponding to the json pointer.
|
|
// Always search from the root of the document.
|
|
// if successful, we are left pointing at the value,
|
|
// if not, we are still pointing the same value we were pointing before the
|
|
// call. The json pointer implementation follows the rfc6901 standard's
|
|
// syntax: https://tools.ietf.org/html/rfc6901 However, the standard says
|
|
// "If a referenced member name is not unique in an object, the member that
|
|
// is referenced is undefined, and evaluation fails". Here we just return
|
|
// the first corresponding value.
|
|
inline bool move_to(const std::string &pointer) {
|
|
return move_to(pointer.c_str(), pointer.length());
|
|
}
|
|
|
|
private:
|
|
// Almost the same as move_to(), except it searches from the current
|
|
// position. The pointer's syntax is identical, though that case is not
|
|
// handled by the rfc6901 standard. The '/' is still required at the
|
|
// beginning. However, contrary to move_to(), the URI Fragment Identifier
|
|
// Representation is not supported here. Also, in case of failure, we are
|
|
// left pointing at the closest value it could reach. For these reasons it
|
|
// is private. It exists because it is used by move_to().
|
|
bool relative_move_to(const char *pointer, uint32_t length);
|
|
|
|
public:
|
|
// throughout return true if we can do the navigation, false
|
|
// otherwise
|
|
|
|
// Withing a given scope (series of nodes at the same depth within either an
|
|
// array or an object), we move forward.
|
|
// Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, {
|
|
// and [. At the object ({) or at the array ([), you can issue a "down" to
|
|
// visit their content. valid if we're not at the end of a scope (returns
|
|
// true).
|
|
inline bool next();
|
|
|
|
// Within a given scope (series of nodes at the same depth within either an
|
|
// array or an object), we move backward.
|
|
// Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true
|
|
// when starting at the end of the scope. At the object ({) or at the array
|
|
// ([), you can issue a "down" to visit their content.
|
|
// Performance warning: This function is implemented by starting again
|
|
// from the beginning of the scope and scanning forward. You should expect
|
|
// it to be relatively slow.
|
|
inline bool prev();
|
|
|
|
// Moves back to either the containing array or object (type { or [) from
|
|
// within a contained scope.
|
|
// Valid unless we are at the first level of the document
|
|
inline bool up();
|
|
|
|
// Valid if we're at a [ or { and it starts a non-empty scope; moves us to
|
|
// start of that deeper scope if it not empty. Thus, given [true, null,
|
|
// {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node.
|
|
inline bool down();
|
|
|
|
// move us to the start of our current scope,
|
|
// a scope is a series of nodes at the same level
|
|
inline void to_start_scope();
|
|
|
|
inline void rewind() {
|
|
while (up())
|
|
;
|
|
}
|
|
|
|
// void to_end_scope(); // move us to
|
|
// the start of our current scope; always succeeds
|
|
|
|
// print the node we are currently pointing at
|
|
bool print(std::ostream &os, bool escape_strings = true) const;
|
|
typedef struct {
|
|
size_t start_of_scope;
|
|
uint8_t scope_type;
|
|
} scopeindex_t;
|
|
|
|
private:
|
|
const document &doc;
|
|
size_t depth;
|
|
size_t location; // our current location on a tape
|
|
size_t tape_length;
|
|
uint8_t current_type;
|
|
uint64_t current_val;
|
|
scopeindex_t depth_index[max_depth];
|
|
};
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_DOCUMENT_ITERATOR_H
|
|
/* end file include/simdjson/internal/jsonformatutils.h */
|
|
|
|
#endif
|
|
/* end file include/simdjson/internal/jsonformatutils.h */
|
|
/* begin file include/simdjson/jsonparser.h */
|
|
// TODO Remove this -- deprecated API and files
|
|
|
|
#ifndef SIMDJSON_JSONPARSER_H
|
|
#define SIMDJSON_JSONPARSER_H
|
|
|
|
/* begin file include/simdjson/parsedjson.h */
|
|
// TODO Remove this -- deprecated API and files
|
|
|
|
#ifndef SIMDJSON_PARSEDJSON_H
|
|
#define SIMDJSON_PARSEDJSON_H
|
|
|
|
|
|
namespace simdjson {
|
|
|
|
using ParsedJson = document::parser;
|
|
|
|
} // namespace simdjson
|
|
#endif
|
|
/* end file include/simdjson/parsedjson.h */
|
|
/* begin file include/simdjson/jsonioutil.h */
|
|
#ifndef SIMDJSON_JSONIOUTIL_H
|
|
#define SIMDJSON_JSONIOUTIL_H
|
|
|
|
#include <exception>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
|
|
|
|
namespace simdjson {
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
inline padded_string get_corpus(const std::string &filename) {
|
|
return padded_string::load(filename);
|
|
}
|
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_JSONIOUTIL_H
|
|
/* end file include/simdjson/jsonioutil.h */
|
|
|
|
namespace simdjson {
|
|
|
|
//
|
|
// C API (json_parse and build_parsed_json) declarations
|
|
//
|
|
|
|
inline int json_parse(const uint8_t *buf, size_t len, document::parser &parser, bool realloc_if_needed = true) noexcept {
|
|
error_code code = parser.parse(buf, len, realloc_if_needed).error();
|
|
// The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
|
|
// bits in the parser instead of heeding the result code. The normal parser unsets those in
|
|
// anticipation of making the error code ephemeral.
|
|
// Here we put the code back into the parser, until we've removed this method.
|
|
parser.valid = code == SUCCESS;
|
|
parser.error = code;
|
|
return code;
|
|
}
|
|
inline int json_parse(const char *buf, size_t len, document::parser &parser, bool realloc_if_needed = true) noexcept {
|
|
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, parser, realloc_if_needed);
|
|
}
|
|
inline int json_parse(const std::string &s, document::parser &parser, bool realloc_if_needed = true) noexcept {
|
|
return json_parse(s.data(), s.length(), parser, realloc_if_needed);
|
|
}
|
|
inline int json_parse(const padded_string &s, document::parser &parser) noexcept {
|
|
return json_parse(s.data(), s.length(), parser, false);
|
|
}
|
|
|
|
WARN_UNUSED static document::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept {
|
|
document::parser parser;
|
|
json_parse(buf, len, parser, realloc_if_needed);
|
|
return parser;
|
|
}
|
|
WARN_UNUSED inline document::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept {
|
|
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
|
|
}
|
|
WARN_UNUSED inline document::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept {
|
|
return build_parsed_json(s.data(), s.length(), realloc_if_needed);
|
|
}
|
|
WARN_UNUSED inline document::parser build_parsed_json(const padded_string &s) noexcept {
|
|
return build_parsed_json(s.data(), s.length(), false);
|
|
}
|
|
|
|
// We do not want to allow implicit conversion from C string to std::string.
|
|
int json_parse(const char *buf, document::parser &parser) noexcept = delete;
|
|
document::parser build_parsed_json(const char *buf) noexcept = delete;
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif
|
|
/* end file include/simdjson/jsonioutil.h */
|
|
/* begin file include/simdjson/jsonstream.h */
|
|
// TODO Remove this -- deprecated API and files
|
|
|
|
#ifndef SIMDJSON_JSONSTREAM_H
|
|
#define SIMDJSON_JSONSTREAM_H
|
|
|
|
|
|
namespace simdjson {
|
|
|
|
/**
|
|
* @deprecated use document::stream instead.
|
|
*
|
|
* The main motivation for this piece of software is to achieve maximum speed and offer
|
|
* good quality of life while parsing files containing multiple JSON documents.
|
|
*
|
|
* Since we want to offer flexibility and not restrict ourselves to a specific file
|
|
* format, we support any file that contains any valid JSON documents separated by one
|
|
* or more character that is considered a whitespace by the JSON spec.
|
|
* Namely: space, nothing, linefeed, carriage return, horizontal tab.
|
|
* Anything that is not whitespace will be parsed as a JSON document and could lead
|
|
* to failure.
|
|
*
|
|
* To offer maximum parsing speed, our implementation processes the data inside the
|
|
* buffer by batches and their size is defined by the parameter "batch_size".
|
|
* By loading data in batches, we can optimize the time spent allocating data in the
|
|
* parser and can also open the possibility of multi-threading.
|
|
* The batch_size must be at least as large as the biggest document in the file, but
|
|
* not too large in order to submerge the chached memory. We found that 1MB is
|
|
* somewhat a sweet spot for now. Eventually, this batch_size could be fully
|
|
* automated and be optimal at all times.
|
|
*
|
|
* The template parameter (string_container) must
|
|
* support the data() and size() methods, returning a pointer
|
|
* to a char* and to the number of bytes respectively.
|
|
* The simdjson parser may read up to SIMDJSON_PADDING bytes beyond the end
|
|
* of the string, so if you do not use a padded_string container,
|
|
* you have the responsibility to overallocated. If you fail to
|
|
* do so, your software may crash if you cross a page boundary,
|
|
* and you should expect memory checkers to object.
|
|
* Most users should use a simdjson::padded_string.
|
|
*/
|
|
template <class string_container> class JsonStream {
|
|
public:
|
|
/* Create a JsonStream object that can be used to parse sequentially the valid
|
|
* JSON documents found in the buffer "buf".
|
|
*
|
|
* The batch_size must be at least as large as the biggest document in the
|
|
* file, but
|
|
* not too large to submerge the cached memory. We found that 1MB is
|
|
* somewhat a sweet spot for now.
|
|
*
|
|
* The user is expected to call the following json_parse method to parse the
|
|
* next
|
|
* valid JSON document found in the buffer. This method can and is expected
|
|
* to be
|
|
* called in a loop.
|
|
*
|
|
* Various methods are offered to keep track of the status, like
|
|
* get_current_buffer_loc,
|
|
* get_n_parsed_docs, get_n_bytes_parsed, etc.
|
|
*
|
|
* */
|
|
JsonStream(const string_container &s, size_t _batch_size = 1000000) noexcept;
|
|
|
|
~JsonStream() noexcept;
|
|
|
|
/* Parse the next document found in the buffer previously given to JsonStream.
|
|
|
|
* The content should be a valid JSON document encoded as UTF-8. If there is a
|
|
* UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
|
|
* discouraged.
|
|
*
|
|
* You do NOT need to pre-allocate a parser. This function takes care of
|
|
* pre-allocating a capacity defined by the batch_size defined when creating
|
|
the
|
|
* JsonStream object.
|
|
*
|
|
* The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in
|
|
case
|
|
* of success and indicates that the buffer still contains more data to be
|
|
parsed,
|
|
* meaning this function can be called again to return the next JSON document
|
|
* after this one.
|
|
*
|
|
* The function returns simdjson::SUCCESS (as integer = 0) in case of success
|
|
* and indicates that the buffer has successfully been parsed to the end.
|
|
* Every document it contained has been parsed without error.
|
|
*
|
|
* The function returns an error code from simdjson/simdjson.h in case of
|
|
failure
|
|
* such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and
|
|
so forth;
|
|
* the simdjson::error_message function converts these error codes into a
|
|
* string).
|
|
*
|
|
* You can also check validity by calling parser.is_valid(). The same parser
|
|
can
|
|
* and should be reused for the other documents in the buffer. */
|
|
int json_parse(document::parser &parser) noexcept;
|
|
|
|
/* Returns the location (index) of where the next document should be in the
|
|
* buffer.
|
|
* Can be used for debugging, it tells the user the position of the end of the
|
|
* last
|
|
* valid JSON document parsed*/
|
|
inline size_t get_current_buffer_loc() const noexcept { return stream ? stream->current_buffer_loc : 0; }
|
|
|
|
/* Returns the total amount of complete documents parsed by the JsonStream,
|
|
* in the current buffer, at the given time.*/
|
|
inline size_t get_n_parsed_docs() const noexcept { return stream ? stream->n_parsed_docs : 0; }
|
|
|
|
/* Returns the total amount of data (in bytes) parsed by the JsonStream,
|
|
* in the current buffer, at the given time.*/
|
|
inline size_t get_n_bytes_parsed() const noexcept { return stream ? stream->n_bytes_parsed : 0; }
|
|
|
|
private:
|
|
const string_container &str;
|
|
const size_t batch_size;
|
|
document::stream *stream{nullptr};
|
|
}; // end of class JsonStream
|
|
|
|
} // end of namespace simdjson
|
|
|
|
#endif // SIMDJSON_JSONSTREAM_H
|
|
/* end file include/simdjson/jsonstream.h */
|
|
|
|
// Inline functions
|
|
/* begin file include/simdjson/inline/document.h */
|
|
#ifndef SIMDJSON_INLINE_DOCUMENT_H
|
|
#define SIMDJSON_INLINE_DOCUMENT_H
|
|
|
|
// Inline implementations go in here.
|
|
|
|
#include <iostream>
|
|
|
|
namespace simdjson {
|
|
|
|
//
|
|
// element_result inline implementation
|
|
//
|
|
really_inline document::element_result::element_result(element value) noexcept : simdjson_result<element>(value) {}
|
|
really_inline document::element_result::element_result(error_code error) noexcept : simdjson_result<element>(error) {}
|
|
inline simdjson_result<bool> document::element_result::is_null() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.is_null();
|
|
}
|
|
inline simdjson_result<bool> document::element_result::as_bool() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_bool();
|
|
}
|
|
inline simdjson_result<const char*> document::element_result::as_c_str() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_c_str();
|
|
}
|
|
inline simdjson_result<std::string_view> document::element_result::as_string() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_string();
|
|
}
|
|
inline simdjson_result<uint64_t> document::element_result::as_uint64_t() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_uint64_t();
|
|
}
|
|
inline simdjson_result<int64_t> document::element_result::as_int64_t() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_int64_t();
|
|
}
|
|
inline simdjson_result<double> document::element_result::as_double() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_double();
|
|
}
|
|
inline document::array_result document::element_result::as_array() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_array();
|
|
}
|
|
inline document::object_result document::element_result::as_object() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.as_object();
|
|
}
|
|
|
|
inline document::element_result document::element_result::operator[](const std::string_view &key) const noexcept {
|
|
if (error()) { return *this; }
|
|
return first[key];
|
|
}
|
|
inline document::element_result document::element_result::operator[](const char *key) const noexcept {
|
|
if (error()) { return *this; }
|
|
return first[key];
|
|
}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
inline document::element_result::operator bool() const noexcept(false) {
|
|
return as_bool();
|
|
}
|
|
inline document::element_result::operator const char *() const noexcept(false) {
|
|
return as_c_str();
|
|
}
|
|
inline document::element_result::operator std::string_view() const noexcept(false) {
|
|
return as_string();
|
|
}
|
|
inline document::element_result::operator uint64_t() const noexcept(false) {
|
|
return as_uint64_t();
|
|
}
|
|
inline document::element_result::operator int64_t() const noexcept(false) {
|
|
return as_int64_t();
|
|
}
|
|
inline document::element_result::operator double() const noexcept(false) {
|
|
return as_double();
|
|
}
|
|
inline document::element_result::operator document::array() const noexcept(false) {
|
|
return as_array();
|
|
}
|
|
inline document::element_result::operator document::object() const noexcept(false) {
|
|
return as_object();
|
|
}
|
|
|
|
#endif
|
|
|
|
//
|
|
// array_result inline implementation
|
|
//
|
|
really_inline document::array_result::array_result(array value) noexcept : simdjson_result<array>(value) {}
|
|
really_inline document::array_result::array_result(error_code error) noexcept : simdjson_result<array>(error) {}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
inline document::array::iterator document::array_result::begin() const noexcept(false) {
|
|
if (error()) { throw simdjson_error(error()); }
|
|
return first.begin();
|
|
}
|
|
inline document::array::iterator document::array_result::end() const noexcept(false) {
|
|
if (error()) { throw simdjson_error(error()); }
|
|
return first.end();
|
|
}
|
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
//
|
|
// object_result inline implementation
|
|
//
|
|
really_inline document::object_result::object_result(object value) noexcept : simdjson_result<object>(value) {}
|
|
really_inline document::object_result::object_result(error_code error) noexcept : simdjson_result<object>(error) {}
|
|
|
|
inline document::element_result document::object_result::operator[](const std::string_view &key) const noexcept {
|
|
if (error()) { return error(); }
|
|
return first[key];
|
|
}
|
|
inline document::element_result document::object_result::operator[](const char *key) const noexcept {
|
|
if (error()) { return error(); }
|
|
return first[key];
|
|
}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
inline document::object::iterator document::object_result::begin() const noexcept(false) {
|
|
if (error()) { throw simdjson_error(error()); }
|
|
return first.begin();
|
|
}
|
|
inline document::object::iterator document::object_result::end() const noexcept(false) {
|
|
if (error()) { throw simdjson_error(error()); }
|
|
return first.end();
|
|
}
|
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
//
|
|
// document inline implementation
|
|
//
|
|
inline document::element document::root() const noexcept {
|
|
return element(this, 1);
|
|
}
|
|
inline document::array_result document::as_array() const noexcept {
|
|
return root().as_array();
|
|
}
|
|
inline document::object_result document::as_object() const noexcept {
|
|
return root().as_object();
|
|
}
|
|
inline document::operator element() const noexcept {
|
|
return root();
|
|
}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
inline document::operator document::array() const noexcept(false) {
|
|
return root();
|
|
}
|
|
inline document::operator document::object() const noexcept(false) {
|
|
return root();
|
|
}
|
|
|
|
#endif
|
|
|
|
inline document::element_result document::operator[](const std::string_view &key) const noexcept {
|
|
return root()[key];
|
|
}
|
|
inline document::element_result document::operator[](const char *key) const noexcept {
|
|
return root()[key];
|
|
}
|
|
|
|
inline document::doc_move_result document::load(const std::string &path) noexcept {
|
|
document::parser parser;
|
|
auto [doc, error] = parser.load(path);
|
|
return doc_move_result((document &&)doc, error);
|
|
}
|
|
|
|
inline document::doc_move_result document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
|
|
document::parser parser;
|
|
auto [doc, error] = parser.parse(buf, len, realloc_if_needed);
|
|
return doc_move_result((document &&)doc, error);
|
|
}
|
|
really_inline document::doc_move_result document::parse(const char *buf, size_t len, bool realloc_if_needed) noexcept {
|
|
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
|
}
|
|
really_inline document::doc_move_result document::parse(const std::string &s) noexcept {
|
|
return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
|
|
}
|
|
really_inline document::doc_move_result document::parse(const padded_string &s) noexcept {
|
|
return parse(s.data(), s.length(), false);
|
|
}
|
|
|
|
WARN_UNUSED
|
|
inline error_code document::set_capacity(size_t capacity) noexcept {
|
|
if (capacity == 0) {
|
|
string_buf.reset();
|
|
tape.reset();
|
|
return SUCCESS;
|
|
}
|
|
|
|
// a pathological input like "[[[[..." would generate len tape elements, so
|
|
// need a capacity of at least len + 1, but it is also possible to do
|
|
// worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"
|
|
//where len + 1 tape elements are
|
|
// generated, see issue https://github.com/lemire/simdjson/issues/345
|
|
size_t tape_capacity = ROUNDUP_N(capacity + 2, 64);
|
|
// a document with only zero-length strings... could have len/3 string
|
|
// and we would need len/3 * 5 bytes on the string buffer
|
|
size_t string_capacity = ROUNDUP_N(5 * capacity / 3 + 32, 64);
|
|
string_buf.reset( new (std::nothrow) uint8_t[string_capacity]);
|
|
tape.reset(new (std::nothrow) uint64_t[tape_capacity]);
|
|
return string_buf && tape ? SUCCESS : MEMALLOC;
|
|
}
|
|
|
|
inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
|
|
uint32_t string_length;
|
|
size_t tape_idx = 0;
|
|
uint64_t tape_val = tape[tape_idx];
|
|
uint8_t type = (tape_val >> 56);
|
|
os << tape_idx << " : " << type;
|
|
tape_idx++;
|
|
size_t how_many = 0;
|
|
if (type == 'r') {
|
|
how_many = tape_val & internal::JSON_VALUE_MASK;
|
|
} else {
|
|
// Error: no starting root node?
|
|
return false;
|
|
}
|
|
os << "\t// pointing to " << how_many << " (right after last node)\n";
|
|
uint64_t payload;
|
|
for (; tape_idx < how_many; tape_idx++) {
|
|
os << tape_idx << " : ";
|
|
tape_val = tape[tape_idx];
|
|
payload = tape_val & internal::JSON_VALUE_MASK;
|
|
type = (tape_val >> 56);
|
|
switch (type) {
|
|
case '"': // we have a string
|
|
os << "string \"";
|
|
memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
|
|
os << internal::escape_json_string(std::string_view(
|
|
(const char *)(string_buf.get() + payload + sizeof(uint32_t)),
|
|
string_length
|
|
));
|
|
os << '"';
|
|
os << '\n';
|
|
break;
|
|
case 'l': // we have a long int
|
|
if (tape_idx + 1 >= how_many) {
|
|
return false;
|
|
}
|
|
os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
|
|
break;
|
|
case 'u': // we have a long uint
|
|
if (tape_idx + 1 >= how_many) {
|
|
return false;
|
|
}
|
|
os << "unsigned integer " << tape[++tape_idx] << "\n";
|
|
break;
|
|
case 'd': // we have a double
|
|
os << "float ";
|
|
if (tape_idx + 1 >= how_many) {
|
|
return false;
|
|
}
|
|
double answer;
|
|
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
|
os << answer << '\n';
|
|
break;
|
|
case 'n': // we have a null
|
|
os << "null\n";
|
|
break;
|
|
case 't': // we have a true
|
|
os << "true\n";
|
|
break;
|
|
case 'f': // we have a false
|
|
os << "false\n";
|
|
break;
|
|
case '{': // we have an object
|
|
os << "{\t// pointing to next tape location " << payload
|
|
<< " (first node after the scope) \n";
|
|
break;
|
|
case '}': // we end an object
|
|
os << "}\t// pointing to previous tape location " << payload
|
|
<< " (start of the scope) \n";
|
|
break;
|
|
case '[': // we start an array
|
|
os << "[\t// pointing to next tape location " << payload
|
|
<< " (first node after the scope) \n";
|
|
break;
|
|
case ']': // we end an array
|
|
os << "]\t// pointing to previous tape location " << payload
|
|
<< " (start of the scope) \n";
|
|
break;
|
|
case 'r': // we start and end with the root node
|
|
// should we be hitting the root node?
|
|
return false;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
tape_val = tape[tape_idx];
|
|
payload = tape_val & internal::JSON_VALUE_MASK;
|
|
type = (tape_val >> 56);
|
|
os << tape_idx << " : " << type << "\t// pointing to " << payload
|
|
<< " (start root)\n";
|
|
return true;
|
|
}
|
|
|
|
//
|
|
// doc_result inline implementation
|
|
//
|
|
inline document::doc_result::doc_result(document &doc, error_code error) noexcept : simdjson_result<document&>(doc, error) { }
|
|
|
|
inline document::array_result document::doc_result::as_array() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.root().as_array();
|
|
}
|
|
inline document::object_result document::doc_result::as_object() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.root().as_object();
|
|
}
|
|
|
|
inline document::element_result document::doc_result::operator[](const std::string_view &key) const noexcept {
|
|
if (error()) { return error(); }
|
|
return first[key];
|
|
}
|
|
inline document::element_result document::doc_result::operator[](const char *key) const noexcept {
|
|
if (error()) { return error(); }
|
|
return first[key];
|
|
}
|
|
|
|
//
|
|
// doc_move_result inline implementation
|
|
//
|
|
inline document::doc_move_result::doc_move_result(document &&doc, error_code error) noexcept : simdjson_move_result<document>(std::move(doc), error) { }
|
|
inline document::doc_move_result::doc_move_result(document &&doc) noexcept : simdjson_move_result<document>(std::move(doc)) { }
|
|
inline document::doc_move_result::doc_move_result(error_code error) noexcept : simdjson_move_result<document>(error) { }
|
|
|
|
inline document::array_result document::doc_move_result::as_array() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.root().as_array();
|
|
}
|
|
inline document::object_result document::doc_move_result::as_object() const noexcept {
|
|
if (error()) { return error(); }
|
|
return first.root().as_object();
|
|
}
|
|
|
|
inline document::element_result document::doc_move_result::operator[](const std::string_view &key) const noexcept {
|
|
if (error()) { return error(); }
|
|
return first[key];
|
|
}
|
|
inline document::element_result document::doc_move_result::operator[](const char *key) const noexcept {
|
|
if (error()) { return error(); }
|
|
return first[key];
|
|
}
|
|
|
|
//
|
|
// document::parser inline implementation
|
|
//
|
|
really_inline document::parser::parser(size_t max_capacity, size_t max_depth) noexcept
|
|
: _max_capacity{max_capacity}, _max_depth{max_depth} {
|
|
|
|
}
|
|
inline bool document::parser::is_valid() const noexcept { return valid; }
|
|
inline int document::parser::get_error_code() const noexcept { return error; }
|
|
inline std::string document::parser::get_error_message() const noexcept { return error_message(int(error)); }
|
|
inline bool document::parser::print_json(std::ostream &os) const noexcept {
|
|
if (!is_valid()) { return false; }
|
|
os << minify(doc);
|
|
return true;
|
|
}
|
|
inline bool document::parser::dump_raw_tape(std::ostream &os) const noexcept {
|
|
return is_valid() ? doc.dump_raw_tape(os) : false;
|
|
}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
inline const document &document::parser::get_document() const noexcept(false) {
|
|
if (!is_valid()) {
|
|
throw simdjson_error(error);
|
|
}
|
|
return doc;
|
|
}
|
|
|
|
#endif // SIMDJSON_EXCEPTIONS
|
|
|
|
inline document::doc_result document::parser::load(const std::string &path) noexcept {
|
|
auto [json, _error] = padded_string::load(path);
|
|
if (_error) { return doc_result(doc, _error); }
|
|
return parse(json);
|
|
}
|
|
|
|
inline document::stream document::parser::load_many(const std::string &path, size_t batch_size) noexcept {
|
|
auto [json, _error] = padded_string::load(path);
|
|
return stream(*this, reinterpret_cast<const uint8_t*>(json.data()), json.length(), batch_size, _error);
|
|
}
|
|
|
|
inline document::doc_result document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
|
|
error_code code = ensure_capacity(len);
|
|
if (code) { return doc_result(doc, code); }
|
|
|
|
if (realloc_if_needed) {
|
|
const uint8_t *tmp_buf = buf;
|
|
buf = (uint8_t *)internal::allocate_padded_buffer(len);
|
|
if (buf == nullptr)
|
|
return doc_result(doc, MEMALLOC);
|
|
memcpy((void *)buf, tmp_buf, len);
|
|
}
|
|
|
|
code = simdjson::active_implementation->parse(buf, len, *this);
|
|
|
|
// We're indicating validity via the doc_result, so set the parse state back to invalid
|
|
valid = false;
|
|
error = UNINITIALIZED;
|
|
if (realloc_if_needed) {
|
|
aligned_free((void *)buf); // must free before we exit
|
|
}
|
|
return doc_result(doc, code);
|
|
}
|
|
really_inline document::doc_result document::parser::parse(const char *buf, size_t len, bool realloc_if_needed) noexcept {
|
|
return parse((const uint8_t *)buf, len, realloc_if_needed);
|
|
}
|
|
really_inline document::doc_result document::parser::parse(const std::string &s) noexcept {
|
|
return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
|
|
}
|
|
really_inline document::doc_result document::parser::parse(const padded_string &s) noexcept {
|
|
return parse(s.data(), s.length(), false);
|
|
}
|
|
|
|
inline document::stream document::parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
|
|
return stream(*this, buf, len, batch_size);
|
|
}
|
|
inline document::stream document::parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
|
|
return parse_many((const uint8_t *)buf, len, batch_size);
|
|
}
|
|
inline document::stream document::parser::parse_many(const std::string &s, size_t batch_size) noexcept {
|
|
return parse_many(s.data(), s.length(), batch_size);
|
|
}
|
|
inline document::stream document::parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
|
|
return parse_many(s.data(), s.length(), batch_size);
|
|
}
|
|
|
|
really_inline size_t document::parser::capacity() const noexcept {
|
|
return _capacity;
|
|
}
|
|
really_inline size_t document::parser::max_capacity() const noexcept {
|
|
return _max_capacity;
|
|
}
|
|
really_inline size_t document::parser::max_depth() const noexcept {
|
|
return _max_depth;
|
|
}
|
|
|
|
WARN_UNUSED
|
|
inline error_code document::parser::set_capacity(size_t capacity) noexcept {
|
|
if (_capacity == capacity) {
|
|
return SUCCESS;
|
|
}
|
|
|
|
// Set capacity to 0 until we finish, in case there's an error
|
|
_capacity = 0;
|
|
|
|
//
|
|
// Reallocate the document
|
|
//
|
|
error_code err = doc.set_capacity(capacity);
|
|
if (err) { return err; }
|
|
|
|
//
|
|
// Don't allocate 0 bytes, just return.
|
|
//
|
|
if (capacity == 0) {
|
|
structural_indexes.reset();
|
|
return SUCCESS;
|
|
}
|
|
|
|
//
|
|
// Initialize stage 1 output
|
|
//
|
|
uint32_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7;
|
|
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures]); // TODO realloc
|
|
if (!structural_indexes) {
|
|
return MEMALLOC;
|
|
}
|
|
|
|
_capacity = capacity;
|
|
return SUCCESS;
|
|
}
|
|
|
|
really_inline void document::parser::set_max_capacity(size_t max_capacity) noexcept {
|
|
_max_capacity = max_capacity;
|
|
}
|
|
|
|
WARN_UNUSED inline error_code document::parser::set_max_depth(size_t max_depth) noexcept {
|
|
if (max_depth == _max_depth && ret_address) { return SUCCESS; }
|
|
|
|
_max_depth = 0;
|
|
|
|
if (max_depth == 0) {
|
|
ret_address.reset();
|
|
containing_scope_offset.reset();
|
|
return SUCCESS;
|
|
}
|
|
|
|
//
|
|
// Initialize stage 2 state
|
|
//
|
|
containing_scope_offset.reset(new (std::nothrow) uint32_t[max_depth]); // TODO realloc
|
|
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
|
ret_address.reset(new (std::nothrow) void *[max_depth]);
|
|
#else
|
|
ret_address.reset(new (std::nothrow) char[max_depth]);
|
|
#endif
|
|
|
|
if (!ret_address || !containing_scope_offset) {
|
|
// Could not allocate memory
|
|
return MEMALLOC;
|
|
}
|
|
|
|
_max_depth = max_depth;
|
|
return SUCCESS;
|
|
}
|
|
|
|
WARN_UNUSED inline bool document::parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
|
|
return !set_capacity(capacity) && !set_max_depth(max_depth);
|
|
}
|
|
|
|
inline error_code document::parser::ensure_capacity(size_t desired_capacity) noexcept {
|
|
// If we don't have enough capacity, (try to) automatically bump it.
|
|
if (unlikely(desired_capacity > capacity())) {
|
|
if (desired_capacity > max_capacity()) {
|
|
return error = CAPACITY;
|
|
}
|
|
|
|
error = set_capacity(desired_capacity);
|
|
if (error) { return error; }
|
|
}
|
|
|
|
// Allocate depth-based buffers if they aren't already.
|
|
error = set_max_depth(max_depth());
|
|
if (error) { return error; }
|
|
|
|
// If the last doc was taken, we need to allocate a new one
|
|
if (!doc.tape) {
|
|
error = doc.set_capacity(desired_capacity);
|
|
if (error) { return error; }
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
//
|
|
// tape_ref inline implementation
|
|
//
|
|
really_inline internal::tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {}
|
|
really_inline internal::tape_ref::tape_ref(const document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {}
|
|
|
|
inline size_t internal::tape_ref::after_element() const noexcept {
|
|
switch (type()) {
|
|
case tape_type::START_ARRAY:
|
|
case tape_type::START_OBJECT:
|
|
return tape_value();
|
|
case tape_type::UINT64:
|
|
case tape_type::INT64:
|
|
case tape_type::DOUBLE:
|
|
return json_index + 2;
|
|
default:
|
|
return json_index + 1;
|
|
}
|
|
}
|
|
really_inline internal::tape_type internal::tape_ref::type() const noexcept {
|
|
return static_cast<tape_type>(doc->tape[json_index] >> 56);
|
|
}
|
|
really_inline uint64_t internal::tape_ref::tape_value() const noexcept {
|
|
return doc->tape[json_index] & internal::JSON_VALUE_MASK;
|
|
}
|
|
template<typename T>
|
|
really_inline T internal::tape_ref::next_tape_value() const noexcept {
|
|
static_assert(sizeof(T) == sizeof(uint64_t));
|
|
return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
|
|
}
|
|
inline std::string_view internal::tape_ref::get_string_view() const noexcept {
|
|
size_t string_buf_index = tape_value();
|
|
uint32_t len;
|
|
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
|
|
return std::string_view(
|
|
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
|
|
len
|
|
);
|
|
}
|
|
|
|
//
|
|
// array inline implementation
|
|
//
|
|
really_inline document::array::array() noexcept : internal::tape_ref() {}
|
|
really_inline document::array::array(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) {}
|
|
inline document::array::iterator document::array::begin() const noexcept {
|
|
return iterator(doc, json_index + 1);
|
|
}
|
|
inline document::array::iterator document::array::end() const noexcept {
|
|
return iterator(doc, after_element() - 1);
|
|
}
|
|
|
|
|
|
//
|
|
// document::array::iterator inline implementation
|
|
//
|
|
really_inline document::array::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
|
|
inline document::element document::array::iterator::operator*() const noexcept {
|
|
return element(doc, json_index);
|
|
}
|
|
inline bool document::array::iterator::operator!=(const document::array::iterator& other) const noexcept {
|
|
return json_index != other.json_index;
|
|
}
|
|
inline void document::array::iterator::operator++() noexcept {
|
|
json_index = after_element();
|
|
}
|
|
|
|
//
|
|
// object inline implementation
|
|
//
|
|
really_inline document::object::object() noexcept : internal::tape_ref() {}
|
|
really_inline document::object::object(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { };
|
|
inline document::object::iterator document::object::begin() const noexcept {
|
|
return iterator(doc, json_index + 1);
|
|
}
|
|
inline document::object::iterator document::object::end() const noexcept {
|
|
return iterator(doc, after_element() - 1);
|
|
}
|
|
inline document::element_result document::object::operator[](const std::string_view &key) const noexcept {
|
|
iterator end_field = end();
|
|
for (iterator field = begin(); field != end_field; ++field) {
|
|
if (key == field.key()) {
|
|
return field.value();
|
|
}
|
|
}
|
|
return NO_SUCH_FIELD;
|
|
}
|
|
inline document::element_result document::object::operator[](const char *key) const noexcept {
|
|
iterator end_field = end();
|
|
for (iterator field = begin(); field != end_field; ++field) {
|
|
if (!strcmp(key, field.key_c_str())) {
|
|
return field.value();
|
|
}
|
|
}
|
|
return NO_SUCH_FIELD;
|
|
}
|
|
|
|
//
|
|
// document::object::iterator inline implementation
|
|
//
|
|
really_inline document::object::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
|
|
inline const document::key_value_pair document::object::iterator::operator*() const noexcept {
|
|
return key_value_pair(key(), value());
|
|
}
|
|
inline bool document::object::iterator::operator!=(const document::object::iterator& other) const noexcept {
|
|
return json_index != other.json_index;
|
|
}
|
|
inline void document::object::iterator::operator++() noexcept {
|
|
json_index++;
|
|
json_index = after_element();
|
|
}
|
|
inline std::string_view document::object::iterator::key() const noexcept {
|
|
size_t string_buf_index = tape_value();
|
|
uint32_t len;
|
|
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
|
|
return std::string_view(
|
|
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
|
|
len
|
|
);
|
|
}
|
|
inline const char* document::object::iterator::key_c_str() const noexcept {
|
|
return reinterpret_cast<const char *>(&doc->string_buf[tape_value() + sizeof(uint32_t)]);
|
|
}
|
|
inline document::element document::object::iterator::value() const noexcept {
|
|
return element(doc, json_index + 1);
|
|
}
|
|
|
|
//
|
|
// document::key_value_pair inline implementation
|
|
//
|
|
inline document::key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
|
|
key(_key), value(_value) {}
|
|
|
|
//
|
|
// element inline implementation
|
|
//
|
|
really_inline document::element::element() noexcept : internal::tape_ref() {}
|
|
really_inline document::element::element(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
|
|
|
|
really_inline bool document::element::is_null() const noexcept {
|
|
return type() == internal::tape_type::NULL_VALUE;
|
|
}
|
|
really_inline bool document::element::is_bool() const noexcept {
|
|
return type() == internal::tape_type::TRUE_VALUE || type() == internal::tape_type::FALSE_VALUE;
|
|
}
|
|
really_inline bool document::element::is_number() const noexcept {
|
|
return type() == internal::tape_type::UINT64 || type() == internal::tape_type::INT64 || type() == internal::tape_type::DOUBLE;
|
|
}
|
|
really_inline bool document::element::is_integer() const noexcept {
|
|
return type() == internal::tape_type::UINT64 || type() == internal::tape_type::INT64;
|
|
}
|
|
really_inline bool document::element::is_string() const noexcept {
|
|
return type() == internal::tape_type::STRING;
|
|
}
|
|
really_inline bool document::element::is_array() const noexcept {
|
|
return type() == internal::tape_type::START_ARRAY;
|
|
}
|
|
really_inline bool document::element::is_object() const noexcept {
|
|
return type() == internal::tape_type::START_OBJECT;
|
|
}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
inline document::element::operator bool() const noexcept(false) { return as_bool(); }
|
|
inline document::element::operator const char*() const noexcept(false) { return as_c_str(); }
|
|
inline document::element::operator std::string_view() const noexcept(false) { return as_string(); }
|
|
inline document::element::operator uint64_t() const noexcept(false) { return as_uint64_t(); }
|
|
inline document::element::operator int64_t() const noexcept(false) { return as_int64_t(); }
|
|
inline document::element::operator double() const noexcept(false) { return as_double(); }
|
|
inline document::element::operator document::array() const noexcept(false) { return as_array(); }
|
|
inline document::element::operator document::object() const noexcept(false) { return as_object(); }
|
|
|
|
#endif
|
|
|
|
inline simdjson_result<bool> document::element::as_bool() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::TRUE_VALUE:
|
|
return true;
|
|
case internal::tape_type::FALSE_VALUE:
|
|
return false;
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline simdjson_result<const char *> document::element::as_c_str() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::STRING: {
|
|
size_t string_buf_index = tape_value();
|
|
return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
|
|
}
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline simdjson_result<std::string_view> document::element::as_string() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::STRING:
|
|
return get_string_view();
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline simdjson_result<uint64_t> document::element::as_uint64_t() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::UINT64:
|
|
return next_tape_value<uint64_t>();
|
|
case internal::tape_type::INT64: {
|
|
int64_t result = next_tape_value<int64_t>();
|
|
if (result < 0) {
|
|
return NUMBER_OUT_OF_RANGE;
|
|
}
|
|
return static_cast<uint64_t>(result);
|
|
}
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline simdjson_result<int64_t> document::element::as_int64_t() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::UINT64: {
|
|
uint64_t result = next_tape_value<uint64_t>();
|
|
// Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std
|
|
if (result > (std::numeric_limits<uint64_t>::max)()) {
|
|
return NUMBER_OUT_OF_RANGE;
|
|
}
|
|
return static_cast<int64_t>(result);
|
|
}
|
|
case internal::tape_type::INT64:
|
|
return next_tape_value<int64_t>();
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline simdjson_result<double> document::element::as_double() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::UINT64:
|
|
return next_tape_value<uint64_t>();
|
|
case internal::tape_type::INT64: {
|
|
return next_tape_value<int64_t>();
|
|
int64_t result = tape_value();
|
|
if (result < 0) {
|
|
return NUMBER_OUT_OF_RANGE;
|
|
}
|
|
return result;
|
|
}
|
|
case internal::tape_type::DOUBLE:
|
|
return next_tape_value<double>();
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline document::array_result document::element::as_array() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::START_ARRAY:
|
|
return array(doc, json_index);
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline document::object_result document::element::as_object() const noexcept {
|
|
switch (type()) {
|
|
case internal::tape_type::START_OBJECT:
|
|
return object(doc, json_index);
|
|
default:
|
|
return INCORRECT_TYPE;
|
|
}
|
|
}
|
|
inline document::element_result document::element::operator[](const std::string_view &key) const noexcept {
|
|
auto [obj, error] = as_object();
|
|
if (error) { return error; }
|
|
return obj[key];
|
|
}
|
|
inline document::element_result document::element::operator[](const char *key) const noexcept {
|
|
auto [obj, error] = as_object();
|
|
if (error) { return error; }
|
|
return obj[key];
|
|
}
|
|
|
|
//
|
|
// minify inline implementation
|
|
//
|
|
|
|
template<>
|
|
inline std::ostream& minify<document>::print(std::ostream& out) {
|
|
return out << minify<document::element>(value.root());
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::element>::print(std::ostream& out) {
|
|
using tape_type=internal::tape_type;
|
|
size_t depth = 0;
|
|
constexpr size_t MAX_DEPTH = 16;
|
|
bool is_object[MAX_DEPTH];
|
|
is_object[0] = false;
|
|
bool after_value = false;
|
|
|
|
internal::tape_ref iter(value.doc, value.json_index);
|
|
do {
|
|
// print commas after each value
|
|
if (after_value) {
|
|
out << ",";
|
|
}
|
|
// If we are in an object, print the next key and :, and skip to the next value.
|
|
if (is_object[depth]) {
|
|
out << '"' << internal::escape_json_string(iter.get_string_view()) << "\":";
|
|
iter.json_index++;
|
|
}
|
|
switch (iter.type()) {
|
|
|
|
// Arrays
|
|
case tape_type::START_ARRAY: {
|
|
// If we're too deep, we need to recurse to go deeper.
|
|
depth++;
|
|
if (unlikely(depth >= MAX_DEPTH)) {
|
|
out << minify<document::array>(document::array(iter.doc, iter.json_index));
|
|
iter.json_index = iter.tape_value() - 1; // Jump to the ]
|
|
depth--;
|
|
break;
|
|
}
|
|
|
|
// Output start [
|
|
out << '[';
|
|
iter.json_index++;
|
|
|
|
// Handle empty [] (we don't want to come back around and print commas)
|
|
if (iter.type() == tape_type::END_ARRAY) {
|
|
out << ']';
|
|
depth--;
|
|
break;
|
|
}
|
|
|
|
is_object[depth] = false;
|
|
after_value = false;
|
|
continue;
|
|
}
|
|
|
|
// Objects
|
|
case tape_type::START_OBJECT: {
|
|
// If we're too deep, we need to recurse to go deeper.
|
|
depth++;
|
|
if (unlikely(depth >= MAX_DEPTH)) {
|
|
out << minify<document::object>(document::object(iter.doc, iter.json_index));
|
|
iter.json_index = iter.tape_value() - 1; // Jump to the }
|
|
depth--;
|
|
break;
|
|
}
|
|
|
|
// Output start {
|
|
out << '{';
|
|
iter.json_index++;
|
|
|
|
// Handle empty {} (we don't want to come back around and print commas)
|
|
if (iter.type() == tape_type::END_OBJECT) {
|
|
out << '}';
|
|
depth--;
|
|
break;
|
|
}
|
|
|
|
is_object[depth] = true;
|
|
after_value = false;
|
|
continue;
|
|
}
|
|
|
|
// Scalars
|
|
case tape_type::STRING:
|
|
out << '"' << internal::escape_json_string(iter.get_string_view()) << '"';
|
|
break;
|
|
case tape_type::INT64:
|
|
out << iter.next_tape_value<int64_t>();
|
|
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
|
|
break;
|
|
case tape_type::UINT64:
|
|
out << iter.next_tape_value<uint64_t>();
|
|
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
|
|
break;
|
|
case tape_type::DOUBLE:
|
|
out << iter.next_tape_value<double>();
|
|
iter.json_index++; // numbers take up 2 spots, so we need to increment extra
|
|
break;
|
|
case tape_type::TRUE_VALUE:
|
|
out << "true";
|
|
break;
|
|
case tape_type::FALSE_VALUE:
|
|
out << "false";
|
|
break;
|
|
case tape_type::NULL_VALUE:
|
|
out << "null";
|
|
break;
|
|
|
|
// These are impossible
|
|
case tape_type::END_ARRAY:
|
|
case tape_type::END_OBJECT:
|
|
case tape_type::ROOT:
|
|
abort();
|
|
}
|
|
iter.json_index++;
|
|
after_value = true;
|
|
|
|
// Handle multiple ends in a row
|
|
while (depth != 0 && (iter.type() == tape_type::END_ARRAY || iter.type() == tape_type::END_OBJECT)) {
|
|
out << char(iter.type());
|
|
depth--;
|
|
iter.json_index++;
|
|
}
|
|
|
|
// Stop when we're at depth 0
|
|
} while (depth != 0);
|
|
|
|
return out;
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::object>::print(std::ostream& out) {
|
|
out << '{';
|
|
auto pair = value.begin();
|
|
auto end = value.end();
|
|
if (pair != end) {
|
|
out << minify<document::key_value_pair>(*pair);
|
|
for (++pair; pair != end; ++pair) {
|
|
out << "," << minify<document::key_value_pair>(*pair);
|
|
}
|
|
}
|
|
return out << '}';
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::array>::print(std::ostream& out) {
|
|
out << '[';
|
|
auto element = value.begin();
|
|
auto end = value.end();
|
|
if (element != end) {
|
|
out << minify<document::element>(*element);
|
|
for (++element; element != end; ++element) {
|
|
out << "," << minify<document::element>(*element);
|
|
}
|
|
}
|
|
return out << ']';
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::key_value_pair>::print(std::ostream& out) {
|
|
return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value;
|
|
}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
template<>
|
|
inline std::ostream& minify<document::doc_move_result>::print(std::ostream& out) {
|
|
if (value.error()) { throw simdjson_error(value.error()); }
|
|
return out << minify<document>(value.first);
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::doc_result>::print(std::ostream& out) {
|
|
if (value.error()) { throw simdjson_error(value.error()); }
|
|
return out << minify<document>(value.first);
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::element_result>::print(std::ostream& out) {
|
|
if (value.error()) { throw simdjson_error(value.error()); }
|
|
return out << minify<document::element>(value.first);
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::array_result>::print(std::ostream& out) {
|
|
if (value.error()) { throw simdjson_error(value.error()); }
|
|
return out << minify<document::array>(value.first);
|
|
}
|
|
template<>
|
|
inline std::ostream& minify<document::object_result>::print(std::ostream& out) {
|
|
if (value.error()) { throw simdjson_error(value.error()); }
|
|
return out << minify<document::object>(value.first);
|
|
}
|
|
|
|
#endif
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_INLINE_DOCUMENT_H
|
|
/* end file include/simdjson/inline/document.h */
|
|
/* begin file include/simdjson/inline/document_iterator.h */
|
|
#ifndef SIMDJSON_INLINE_DOCUMENT_ITERATOR_H
|
|
#define SIMDJSON_INLINE_DOCUMENT_ITERATOR_H
|
|
|
|
|
|
namespace simdjson {
|
|
|
|
// Because of template weirdness, the actual class definition is inline in the document class
|
|
|
|
template <size_t max_depth>
|
|
WARN_UNUSED bool document_iterator<max_depth>::is_ok() const {
|
|
return location < tape_length;
|
|
}
|
|
|
|
// useful for debugging purposes
|
|
template <size_t max_depth>
|
|
size_t document_iterator<max_depth>::get_tape_location() const {
|
|
return location;
|
|
}
|
|
|
|
// useful for debugging purposes
|
|
template <size_t max_depth>
|
|
size_t document_iterator<max_depth>::get_tape_length() const {
|
|
return tape_length;
|
|
}
|
|
|
|
// returns the current depth (start at 1 with 0 reserved for the fictitious root
|
|
// node)
|
|
template <size_t max_depth>
|
|
size_t document_iterator<max_depth>::get_depth() const {
|
|
return depth;
|
|
}
|
|
|
|
// A scope is a series of nodes at the same depth, typically it is either an
|
|
// object ({) or an array ([). The root node has type 'r'.
|
|
template <size_t max_depth>
|
|
uint8_t document_iterator<max_depth>::get_scope_type() const {
|
|
return depth_index[depth].scope_type;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::move_forward() {
|
|
if (location + 1 >= tape_length) {
|
|
return false; // we are at the end!
|
|
}
|
|
|
|
if ((current_type == '[') || (current_type == '{')) {
|
|
// We are entering a new scope
|
|
depth++;
|
|
assert(depth < max_depth);
|
|
depth_index[depth].start_of_scope = location;
|
|
depth_index[depth].scope_type = current_type;
|
|
} else if ((current_type == ']') || (current_type == '}')) {
|
|
// Leaving a scope.
|
|
depth--;
|
|
} else if (is_number()) {
|
|
// these types use 2 locations on the tape, not just one.
|
|
location += 1;
|
|
}
|
|
|
|
location += 1;
|
|
current_val = doc.tape[location];
|
|
current_type = (current_val >> 56);
|
|
return true;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
void document_iterator<max_depth>::move_to_value() {
|
|
// assume that we are on a key, so move by 1.
|
|
location += 1;
|
|
current_val = doc.tape[location];
|
|
current_type = (current_val >> 56);
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::move_to_key(const char *key) {
|
|
if (down()) {
|
|
do {
|
|
const bool right_key = (strcmp(get_string(), key) == 0);
|
|
move_to_value();
|
|
if (right_key) {
|
|
return true;
|
|
}
|
|
} while (next());
|
|
up();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::move_to_key_insensitive(
|
|
const char *key) {
|
|
if (down()) {
|
|
do {
|
|
const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0);
|
|
move_to_value();
|
|
if (right_key) {
|
|
return true;
|
|
}
|
|
} while (next());
|
|
up();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::move_to_key(const char *key,
|
|
uint32_t length) {
|
|
if (down()) {
|
|
do {
|
|
bool right_key = ((get_string_length() == length) &&
|
|
(memcmp(get_string(), key, length) == 0));
|
|
move_to_value();
|
|
if (right_key) {
|
|
return true;
|
|
}
|
|
} while (next());
|
|
up();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::move_to_index(uint32_t index) {
|
|
if (down()) {
|
|
uint32_t i = 0;
|
|
for (; i < index; i++) {
|
|
if (!next()) {
|
|
break;
|
|
}
|
|
}
|
|
if (i == index) {
|
|
return true;
|
|
}
|
|
up();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <size_t max_depth> bool document_iterator<max_depth>::prev() {
|
|
size_t target_location = location;
|
|
to_start_scope();
|
|
size_t npos = location;
|
|
if (target_location == npos) {
|
|
return false; // we were already at the start
|
|
}
|
|
size_t oldnpos;
|
|
// we have that npos < target_location here
|
|
do {
|
|
oldnpos = npos;
|
|
if ((current_type == '[') || (current_type == '{')) {
|
|
// we need to jump
|
|
npos = (current_val & internal::JSON_VALUE_MASK);
|
|
} else {
|
|
npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
|
|
}
|
|
} while (npos < target_location);
|
|
location = oldnpos;
|
|
current_val = doc.tape[location];
|
|
current_type = current_val >> 56;
|
|
return true;
|
|
}
|
|
|
|
template <size_t max_depth> bool document_iterator<max_depth>::up() {
|
|
if (depth == 1) {
|
|
return false; // don't allow moving back to root
|
|
}
|
|
to_start_scope();
|
|
// next we just move to the previous value
|
|
depth--;
|
|
location -= 1;
|
|
current_val = doc.tape[location];
|
|
current_type = (current_val >> 56);
|
|
return true;
|
|
}
|
|
|
|
template <size_t max_depth> bool document_iterator<max_depth>::down() {
|
|
if (location + 1 >= tape_length) {
|
|
return false;
|
|
}
|
|
if ((current_type == '[') || (current_type == '{')) {
|
|
size_t npos = (current_val & internal::JSON_VALUE_MASK);
|
|
if (npos == location + 2) {
|
|
return false; // we have an empty scope
|
|
}
|
|
depth++;
|
|
assert(depth < max_depth);
|
|
location = location + 1;
|
|
depth_index[depth].start_of_scope = location;
|
|
depth_index[depth].scope_type = current_type;
|
|
current_val = doc.tape[location];
|
|
current_type = (current_val >> 56);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
void document_iterator<max_depth>::to_start_scope() {
|
|
location = depth_index[depth].start_of_scope;
|
|
current_val = doc.tape[location];
|
|
current_type = (current_val >> 56);
|
|
}
|
|
|
|
template <size_t max_depth> bool document_iterator<max_depth>::next() {
|
|
size_t npos;
|
|
if ((current_type == '[') || (current_type == '{')) {
|
|
// we need to jump
|
|
npos = (current_val & internal::JSON_VALUE_MASK);
|
|
} else {
|
|
npos = location + (is_number() ? 2 : 1);
|
|
}
|
|
uint64_t next_val = doc.tape[npos];
|
|
uint8_t next_type = (next_val >> 56);
|
|
if ((next_type == ']') || (next_type == '}')) {
|
|
return false; // we reached the end of the scope
|
|
}
|
|
location = npos;
|
|
current_val = next_val;
|
|
current_type = next_type;
|
|
return true;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
document_iterator<max_depth>::document_iterator(const document &doc_) noexcept
|
|
: doc(doc_), depth(0), location(0), tape_length(0) {
|
|
depth_index[0].start_of_scope = location;
|
|
current_val = doc.tape[location++];
|
|
current_type = (current_val >> 56);
|
|
depth_index[0].scope_type = current_type;
|
|
tape_length = current_val & internal::JSON_VALUE_MASK;
|
|
if (location < tape_length) {
|
|
// If we make it here, then depth_capacity must >=2, but the compiler
|
|
// may not know this.
|
|
current_val = doc.tape[location];
|
|
current_type = (current_val >> 56);
|
|
depth++;
|
|
assert(depth < max_depth);
|
|
depth_index[depth].start_of_scope = location;
|
|
depth_index[depth].scope_type = current_type;
|
|
}
|
|
}
|
|
|
|
#if SIMDJSON_EXCEPTIONS
|
|
|
|
template <size_t max_depth>
|
|
document_iterator<max_depth>::document_iterator(const document::parser &parser) noexcept(false)
|
|
: document_iterator(parser.get_document()) {}
|
|
|
|
#endif
|
|
|
|
template <size_t max_depth>
|
|
document_iterator<max_depth>::document_iterator(
|
|
const document_iterator &o) noexcept
|
|
: doc(o.doc), depth(o.depth), location(o.location),
|
|
tape_length(o.tape_length), current_type(o.current_type),
|
|
current_val(o.current_val) {
|
|
memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
document_iterator<max_depth> &document_iterator<max_depth>::
|
|
operator=(const document_iterator &o) noexcept {
|
|
doc = o.doc;
|
|
depth = o.depth;
|
|
location = o.location;
|
|
tape_length = o.tape_length;
|
|
current_type = o.current_type;
|
|
current_val = o.current_val;
|
|
memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
|
|
return *this;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::print(std::ostream &os, bool escape_strings) const {
|
|
if (!is_ok()) {
|
|
return false;
|
|
}
|
|
switch (current_type) {
|
|
case '"': // we have a string
|
|
os << '"';
|
|
if (escape_strings) {
|
|
os << internal::escape_json_string(std::string_view(get_string(), get_string_length()));
|
|
} else {
|
|
// was: os << get_string();, but given that we can include null chars, we
|
|
// have to do something crazier:
|
|
std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator<char>(os));
|
|
}
|
|
os << '"';
|
|
break;
|
|
case 'l': // we have a long int
|
|
os << get_integer();
|
|
break;
|
|
case 'u':
|
|
os << get_unsigned_integer();
|
|
break;
|
|
case 'd':
|
|
os << get_double();
|
|
break;
|
|
case 'n': // we have a null
|
|
os << "null";
|
|
break;
|
|
case 't': // we have a true
|
|
os << "true";
|
|
break;
|
|
case 'f': // we have a false
|
|
os << "false";
|
|
break;
|
|
case '{': // we have an object
|
|
case '}': // we end an object
|
|
case '[': // we start an array
|
|
case ']': // we end an array
|
|
os << static_cast<char>(current_type);
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::move_to(const char *pointer,
|
|
uint32_t length) {
|
|
char *new_pointer = nullptr;
|
|
if (pointer[0] == '#') {
|
|
// Converting fragment representation to string representation
|
|
new_pointer = new char[length];
|
|
uint32_t new_length = 0;
|
|
for (uint32_t i = 1; i < length; i++) {
|
|
if (pointer[i] == '%' && pointer[i + 1] == 'x') {
|
|
#if __cpp_exceptions
|
|
try {
|
|
#endif
|
|
int fragment =
|
|
std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16);
|
|
if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
|
|
// escaping the character
|
|
new_pointer[new_length] = '\\';
|
|
new_length++;
|
|
}
|
|
new_pointer[new_length] = fragment;
|
|
i += 3;
|
|
#if __cpp_exceptions
|
|
} catch (std::invalid_argument &) {
|
|
delete[] new_pointer;
|
|
return false; // the fragment is invalid
|
|
}
|
|
#endif
|
|
} else {
|
|
new_pointer[new_length] = pointer[i];
|
|
}
|
|
new_length++;
|
|
}
|
|
length = new_length;
|
|
pointer = new_pointer;
|
|
}
|
|
|
|
// saving the current state
|
|
size_t depth_s = depth;
|
|
size_t location_s = location;
|
|
uint8_t current_type_s = current_type;
|
|
uint64_t current_val_s = current_val;
|
|
|
|
rewind(); // The json pointer is used from the root of the document.
|
|
|
|
bool found = relative_move_to(pointer, length);
|
|
delete[] new_pointer;
|
|
|
|
if (!found) {
|
|
// since the pointer has found nothing, we get back to the original
|
|
// position.
|
|
depth = depth_s;
|
|
location = location_s;
|
|
current_type = current_type_s;
|
|
current_val = current_val_s;
|
|
}
|
|
|
|
return found;
|
|
}
|
|
|
|
template <size_t max_depth>
|
|
bool document_iterator<max_depth>::relative_move_to(const char *pointer,
|
|
uint32_t length) {
|
|
if (length == 0) {
|
|
// returns the whole document
|
|
return true;
|
|
}
|
|
|
|
if (pointer[0] != '/') {
|
|
// '/' must be the first character
|
|
return false;
|
|
}
|
|
|
|
// finding the key in an object or the index in an array
|
|
std::string key_or_index;
|
|
uint32_t offset = 1;
|
|
|
|
// checking for the "-" case
|
|
if (is_array() && pointer[1] == '-') {
|
|
if (length != 2) {
|
|
// the pointer must be exactly "/-"
|
|
// there can't be anything more after '-' as an index
|
|
return false;
|
|
}
|
|
key_or_index = '-';
|
|
offset = length; // will skip the loop coming right after
|
|
}
|
|
|
|
// We either transform the first reference token to a valid json key
|
|
// or we make sure it is a valid index in an array.
|
|
for (; offset < length; offset++) {
|
|
if (pointer[offset] == '/') {
|
|
// beginning of the next key or index
|
|
break;
|
|
}
|
|
if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
|
|
// the index of an array must be an integer
|
|
// we also make sure std::stoi won't discard whitespaces later
|
|
return false;
|
|
}
|
|
if (pointer[offset] == '~') {
|
|
// "~1" represents "/"
|
|
if (pointer[offset + 1] == '1') {
|
|
key_or_index += '/';
|
|
offset++;
|
|
continue;
|
|
}
|
|
// "~0" represents "~"
|
|
if (pointer[offset + 1] == '0') {
|
|
key_or_index += '~';
|
|
offset++;
|
|
continue;
|
|
}
|
|
}
|
|
if (pointer[offset] == '\\') {
|
|
if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
|
|
(pointer[offset + 1] <= 0x1F)) {
|
|
key_or_index += pointer[offset + 1];
|
|
offset++;
|
|
continue;
|
|
}
|
|
return false; // invalid escaped character
|
|
}
|
|
if (pointer[offset] == '\"') {
|
|
// unescaped quote character. this is an invalid case.
|
|
// lets do nothing and assume most pointers will be valid.
|
|
// it won't find any corresponding json key anyway.
|
|
// return false;
|
|
}
|
|
key_or_index += pointer[offset];
|
|
}
|
|
|
|
bool found = false;
|
|
if (is_object()) {
|
|
if (move_to_key(key_or_index.c_str(), key_or_index.length())) {
|
|
found = relative_move_to(pointer + offset, length - offset);
|
|
}
|
|
} else if (is_array()) {
|
|
if (key_or_index == "-") { // handling "-" case first
|
|
if (down()) {
|
|
while (next())
|
|
; // moving to the end of the array
|
|
// moving to the nonexistent value right after...
|
|
size_t npos;
|
|
if ((current_type == '[') || (current_type == '{')) {
|
|
// we need to jump
|
|
npos = (current_val & internal::JSON_VALUE_MASK);
|
|
} else {
|
|
npos =
|
|
location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
|
|
}
|
|
location = npos;
|
|
current_val = doc.tape[npos];
|
|
current_type = (current_val >> 56);
|
|
return true; // how could it fail ?
|
|
}
|
|
} else { // regular numeric index
|
|
// The index can't have a leading '0'
|
|
if (key_or_index[0] == '0' && key_or_index.length() > 1) {
|
|
return false;
|
|
}
|
|
// it cannot be empty
|
|
if (key_or_index.length() == 0) {
|
|
return false;
|
|
}
|
|
// we already checked the index contains only valid digits
|
|
uint32_t index = std::stoi(key_or_index);
|
|
if (move_to_index(index)) {
|
|
found = relative_move_to(pointer + offset, length - offset);
|
|
}
|
|
}
|
|
}
|
|
|
|
return found;
|
|
}
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_INLINE_DOCUMENT_ITERATOR_H
|
|
/* end file include/simdjson/inline/document_iterator.h */
|
|
/* begin file include/simdjson/inline/document_stream.h */
|
|
#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H
|
|
#define SIMDJSON_INLINE_DOCUMENT_STREAM_H
|
|
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <stdexcept>
|
|
#include <thread>
|
|
|
|
namespace simdjson::internal {
|
|
|
|
/**
|
|
* This algorithm is used to quickly identify the buffer position of
|
|
* the last JSON document inside the current batch.
|
|
*
|
|
* It does its work by finding the last pair of structural characters
|
|
* that represent the end followed by the start of a document.
|
|
*
|
|
* Simply put, we iterate over the structural characters, starting from
|
|
* the end. We consider that we found the end of a JSON document when the
|
|
* first element of the pair is NOT one of these characters: '{' '[' ';' ','
|
|
* and when the second element is NOT one of these characters: '}' '}' ';' ','.
|
|
*
|
|
* This simple comparison works most of the time, but it does not cover cases
|
|
* where the batch's structural indexes contain a perfect amount of documents.
|
|
* In such a case, we do not have access to the structural index which follows
|
|
* the last document, therefore, we do not have access to the second element in
|
|
* the pair, and means that we cannot identify the last document. To fix this
|
|
* issue, we keep a count of the open and closed curly/square braces we found
|
|
* while searching for the pair. When we find a pair AND the count of open and
|
|
* closed curly/square braces is the same, we know that we just passed a
|
|
* complete
|
|
* document, therefore the last json buffer location is the end of the batch
|
|
* */
|
|
inline size_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const document::parser &parser) {
|
|
// this function can be generally useful
|
|
if (parser.n_structural_indexes == 0)
|
|
return 0;
|
|
auto last_i = parser.n_structural_indexes - 1;
|
|
if (parser.structural_indexes[last_i] == size) {
|
|
if (last_i == 0)
|
|
return 0;
|
|
last_i = parser.n_structural_indexes - 2;
|
|
}
|
|
auto arr_cnt = 0;
|
|
auto obj_cnt = 0;
|
|
for (auto i = last_i; i > 0; i--) {
|
|
auto idxb = parser.structural_indexes[i];
|
|
switch (buf[idxb]) {
|
|
case ':':
|
|
case ',':
|
|
continue;
|
|
case '}':
|
|
obj_cnt--;
|
|
continue;
|
|
case ']':
|
|
arr_cnt--;
|
|
continue;
|
|
case '{':
|
|
obj_cnt++;
|
|
break;
|
|
case '[':
|
|
arr_cnt++;
|
|
break;
|
|
}
|
|
auto idxa = parser.structural_indexes[i - 1];
|
|
switch (buf[idxa]) {
|
|
case '{':
|
|
case '[':
|
|
case ':':
|
|
case ',':
|
|
continue;
|
|
}
|
|
if (!arr_cnt && !obj_cnt) {
|
|
return last_i + 1;
|
|
}
|
|
return i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// returns true if the provided byte value is an ASCII character
|
|
static inline bool is_ascii(char c) {
|
|
return ((unsigned char)c) <= 127;
|
|
}
|
|
|
|
// if the string ends with UTF-8 values, backtrack
|
|
// up to the first ASCII character. May return 0.
|
|
static inline size_t trimmed_length_safe_utf8(const char * c, size_t len) {
|
|
while ((len > 0) and (not is_ascii(c[len - 1]))) {
|
|
len--;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
} // namespace simdjson::internal
|
|
|
|
namespace simdjson {
|
|
|
|
really_inline document::stream::stream(
|
|
document::parser &_parser,
|
|
const uint8_t *buf,
|
|
size_t len,
|
|
size_t batch_size,
|
|
error_code _error
|
|
) noexcept : parser{_parser}, _buf{buf}, _len{len}, _batch_size(batch_size), error{_error} {
|
|
if (!error) { error = json_parse(); }
|
|
}
|
|
|
|
inline document::stream::~stream() noexcept {
|
|
#ifdef SIMDJSON_THREADS_ENABLED
|
|
if (stage_1_thread.joinable()) {
|
|
stage_1_thread.join();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
really_inline document::stream::iterator document::stream::begin() noexcept {
|
|
return iterator(*this, false);
|
|
}
|
|
|
|
really_inline document::stream::iterator document::stream::end() noexcept {
|
|
return iterator(*this, true);
|
|
}
|
|
|
|
really_inline document::stream::iterator::iterator(stream& stream, bool _is_end) noexcept
|
|
: _stream{stream}, finished{_is_end} {
|
|
}
|
|
|
|
really_inline document::doc_result document::stream::iterator::operator*() noexcept {
|
|
return doc_result(_stream.parser.doc, _stream.error == SUCCESS_AND_HAS_MORE ? SUCCESS : _stream.error);
|
|
}
|
|
|
|
really_inline document::stream::iterator& document::stream::iterator::operator++() noexcept {
|
|
if (_stream.error == SUCCESS_AND_HAS_MORE) {
|
|
_stream.error = _stream.json_parse();
|
|
} else {
|
|
finished = true;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
really_inline bool document::stream::iterator::operator!=(const document::stream::iterator &other) const noexcept {
|
|
return finished != other.finished;
|
|
}
|
|
|
|
#ifdef SIMDJSON_THREADS_ENABLED
|
|
|
|
// threaded version of json_parse
|
|
// todo: simplify this code further
|
|
inline error_code document::stream::json_parse() noexcept {
|
|
error = parser.ensure_capacity(_batch_size);
|
|
if (error) { return error; }
|
|
error = parser_thread.ensure_capacity(_batch_size);
|
|
if (error) { return error; }
|
|
|
|
if (unlikely(load_next_batch)) {
|
|
// First time loading
|
|
if (!stage_1_thread.joinable()) {
|
|
_batch_size = (std::min)(_batch_size, remaining());
|
|
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
|
|
if (_batch_size == 0) {
|
|
return simdjson::UTF8_ERROR;
|
|
}
|
|
auto stage1_is_ok = error_code(simdjson::active_implementation->stage1(buf(), _batch_size, parser, true));
|
|
if (stage1_is_ok != simdjson::SUCCESS) {
|
|
return stage1_is_ok;
|
|
}
|
|
size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
|
if (last_index == 0) {
|
|
if (parser.n_structural_indexes == 0) {
|
|
return simdjson::EMPTY;
|
|
}
|
|
} else {
|
|
parser.n_structural_indexes = last_index + 1;
|
|
}
|
|
}
|
|
// the second thread is running or done.
|
|
else {
|
|
stage_1_thread.join();
|
|
if (stage1_is_ok_thread != simdjson::SUCCESS) {
|
|
return stage1_is_ok_thread;
|
|
}
|
|
std::swap(parser.structural_indexes, parser_thread.structural_indexes);
|
|
parser.n_structural_indexes = parser_thread.n_structural_indexes;
|
|
advance(last_json_buffer_loc);
|
|
n_bytes_parsed += last_json_buffer_loc;
|
|
}
|
|
// let us decide whether we will start a new thread
|
|
if (remaining() - _batch_size > 0) {
|
|
last_json_buffer_loc =
|
|
parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
|
|
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
|
|
if (_batch_size > 0) {
|
|
_batch_size = internal::trimmed_length_safe_utf8(
|
|
(const char *)(buf() + last_json_buffer_loc), _batch_size);
|
|
if (_batch_size == 0) {
|
|
return simdjson::UTF8_ERROR;
|
|
}
|
|
// let us capture read-only variables
|
|
const uint8_t *const b = buf() + last_json_buffer_loc;
|
|
const size_t bs = _batch_size;
|
|
// we call the thread on a lambda that will update
|
|
// this->stage1_is_ok_thread
|
|
// there is only one thread that may write to this value
|
|
stage_1_thread = std::thread([this, b, bs] {
|
|
this->stage1_is_ok_thread = error_code(simdjson::active_implementation->stage1(b, bs, this->parser_thread, true));
|
|
});
|
|
}
|
|
}
|
|
next_json = 0;
|
|
load_next_batch = false;
|
|
} // load_next_batch
|
|
error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
|
|
if (res == simdjson::SUCCESS_AND_HAS_MORE) {
|
|
n_parsed_docs++;
|
|
current_buffer_loc = parser.structural_indexes[next_json];
|
|
load_next_batch = (current_buffer_loc == last_json_buffer_loc);
|
|
} else if (res == simdjson::SUCCESS) {
|
|
n_parsed_docs++;
|
|
if (remaining() > _batch_size) {
|
|
current_buffer_loc = parser.structural_indexes[next_json - 1];
|
|
load_next_batch = true;
|
|
res = simdjson::SUCCESS_AND_HAS_MORE;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
#else // SIMDJSON_THREADS_ENABLED
|
|
|
|
// single-threaded version of json_parse
|
|
inline error_code document::stream::json_parse() noexcept {
|
|
error = parser.ensure_capacity(_batch_size);
|
|
if (error) { return error; }
|
|
|
|
if (unlikely(load_next_batch)) {
|
|
advance(current_buffer_loc);
|
|
n_bytes_parsed += current_buffer_loc;
|
|
_batch_size = (std::min)(_batch_size, remaining());
|
|
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
|
|
auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true);
|
|
if (stage1_is_ok != simdjson::SUCCESS) {
|
|
return stage1_is_ok;
|
|
}
|
|
size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
|
|
if (last_index == 0) {
|
|
if (parser.n_structural_indexes == 0) {
|
|
return EMPTY;
|
|
}
|
|
} else {
|
|
parser.n_structural_indexes = last_index + 1;
|
|
}
|
|
load_next_batch = false;
|
|
} // load_next_batch
|
|
error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
|
|
if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
|
|
n_parsed_docs++;
|
|
current_buffer_loc = parser.structural_indexes[next_json];
|
|
} else if (res == simdjson::SUCCESS) {
|
|
n_parsed_docs++;
|
|
if (remaining() > _batch_size) {
|
|
current_buffer_loc = parser.structural_indexes[next_json - 1];
|
|
next_json = 1;
|
|
load_next_batch = true;
|
|
res = simdjson::SUCCESS_AND_HAS_MORE;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
#endif // SIMDJSON_THREADS_ENABLED
|
|
|
|
} // end of namespace simdjson
|
|
#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H
|
|
/* end file include/simdjson/inline/document_stream.h */
|
|
/* begin file include/simdjson/inline/error.h */
|
|
#ifndef SIMDJSON_INLINE_ERROR_H
|
|
#define SIMDJSON_INLINE_ERROR_H
|
|
|
|
#include <string>
|
|
|
|
namespace simdjson::internal {
|
|
// We store the error code so we can validate the error message is associated with the right code
|
|
struct error_code_info {
|
|
error_code code;
|
|
std::string message;
|
|
};
|
|
// These MUST match the codes in error_code. We check this constraint in basictests.
|
|
inline const error_code_info error_codes[] {
|
|
{ SUCCESS, "No error" },
|
|
{ SUCCESS_AND_HAS_MORE, "No error and buffer still has more data" },
|
|
{ CAPACITY, "This parser can't support a document that big" },
|
|
{ MEMALLOC, "Error allocating memory, we're most likely out of memory" },
|
|
{ TAPE_ERROR, "Something went wrong while writing to the tape" },
|
|
{ DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" },
|
|
{ STRING_ERROR, "Problem while parsing a string" },
|
|
{ T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" },
|
|
{ F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" },
|
|
{ N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" },
|
|
{ NUMBER_ERROR, "Problem while parsing a number" },
|
|
{ UTF8_ERROR, "The input is not valid UTF-8" },
|
|
{ UNINITIALIZED, "Uninitialized" },
|
|
{ EMPTY, "Empty: no JSON found" },
|
|
{ UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" },
|
|
{ UNCLOSED_STRING, "A string is opened, but never closed." },
|
|
{ UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." },
|
|
{ INCORRECT_TYPE, "The JSON element does not have the requested type." },
|
|
{ NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." },
|
|
{ NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." },
|
|
{ IO_ERROR, "Error reading the file." },
|
|
{ UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }
|
|
}; // error_messages[]
|
|
} // namespace simdjson::internal
|
|
|
|
namespace simdjson {
|
|
|
|
inline const char *error_message(error_code error) noexcept {
|
|
// If you're using error_code, we're trusting you got it from the enum.
|
|
return internal::error_codes[int(error)].message.c_str();
|
|
}
|
|
|
|
inline const std::string &error_message(int error) noexcept {
|
|
if (error < 0 || error >= error_code::NUM_ERROR_CODES) {
|
|
return internal::error_codes[UNEXPECTED_ERROR].message;
|
|
}
|
|
return internal::error_codes[error].message;
|
|
}
|
|
|
|
inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept {
|
|
return out << error_message(error);
|
|
}
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_INLINE_ERROR_H
|
|
/* end file include/simdjson/inline/error.h */
|
|
/* begin file include/simdjson/inline/jsonstream.h */
|
|
// TODO Remove this -- deprecated API and files
|
|
|
|
#ifndef SIMDJSON_INLINE_JSONSTREAM_H
|
|
#define SIMDJSON_INLINE_JSONSTREAM_H
|
|
|
|
|
|
namespace simdjson {
|
|
|
|
template <class string_container>
|
|
inline JsonStream<string_container>::JsonStream(const string_container &s, size_t _batch_size) noexcept
|
|
: str(s), batch_size(_batch_size) {
|
|
}
|
|
|
|
template <class string_container>
|
|
inline JsonStream<string_container>::~JsonStream() noexcept {
|
|
if (stream) { delete stream; }
|
|
}
|
|
|
|
template <class string_container>
|
|
inline int JsonStream<string_container>::json_parse(document::parser &parser) noexcept {
|
|
if (unlikely(stream == nullptr)) {
|
|
stream = new document::stream(parser, reinterpret_cast<const uint8_t*>(str.data()), str.length(), batch_size);
|
|
} else {
|
|
if (&parser != &stream->parser) { return stream->error = TAPE_ERROR; }
|
|
stream->error = stream->json_parse();
|
|
}
|
|
return stream->error;
|
|
}
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_INLINE_JSONSTREAM_H
|
|
/* end file include/simdjson/inline/jsonstream.h */
|
|
/* begin file include/simdjson/inline/padded_string.h */
|
|
#ifndef SIMDJSON_INLINE_PADDED_STRING_H
|
|
#define SIMDJSON_INLINE_PADDED_STRING_H
|
|
|
|
|
|
#include <climits>
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
namespace simdjson::internal {
|
|
|
|
// low-level function to allocate memory with padding so we can read past the
|
|
// "length" bytes safely. if you must provide a pointer to some data, create it
|
|
// with this function: length is the max. size in bytes of the string caller is
|
|
// responsible to free the memory (free(...))
|
|
inline char *allocate_padded_buffer(size_t length) noexcept {
|
|
// we could do a simple malloc
|
|
// return (char *) malloc(length + SIMDJSON_PADDING);
|
|
// However, we might as well align to cache lines...
|
|
size_t totalpaddedlength = length + SIMDJSON_PADDING;
|
|
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
|
|
#ifndef NDEBUG
|
|
if (padded_buffer == nullptr) {
|
|
return nullptr;
|
|
}
|
|
#endif // NDEBUG
|
|
memset(padded_buffer + length, 0, totalpaddedlength - length);
|
|
return padded_buffer;
|
|
} // allocate_padded_buffer()
|
|
|
|
} // namespace simdjson::internal
|
|
|
|
namespace simdjson {
|
|
|
|
inline padded_string::padded_string() noexcept : viable_size(0), data_ptr(nullptr) {}
|
|
inline padded_string::padded_string(size_t length) noexcept
|
|
: viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
|
|
if (data_ptr != nullptr)
|
|
data_ptr[length] = '\0'; // easier when you need a c_str
|
|
}
|
|
inline padded_string::padded_string(const char *data, size_t length) noexcept
|
|
: viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
|
|
if ((data != nullptr) and (data_ptr != nullptr)) {
|
|
memcpy(data_ptr, data, length);
|
|
data_ptr[length] = '\0'; // easier when you need a c_str
|
|
}
|
|
}
|
|
// note: do not pass std::string arguments by value
|
|
inline padded_string::padded_string(const std::string & str_ ) noexcept
|
|
: viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
|
|
if (data_ptr != nullptr) {
|
|
memcpy(data_ptr, str_.data(), str_.size());
|
|
data_ptr[str_.size()] = '\0'; // easier when you need a c_str
|
|
}
|
|
}
|
|
// note: do pass std::string_view arguments by value
|
|
inline padded_string::padded_string(std::string_view sv_) noexcept
|
|
: viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
|
|
if (data_ptr != nullptr) {
|
|
memcpy(data_ptr, sv_.data(), sv_.size());
|
|
data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
|
|
}
|
|
}
|
|
inline padded_string::padded_string(padded_string &&o) noexcept
|
|
: viable_size(o.viable_size), data_ptr(o.data_ptr) {
|
|
o.data_ptr = nullptr; // we take ownership
|
|
}
|
|
|
|
inline padded_string &padded_string::operator=(padded_string &&o) noexcept {
|
|
aligned_free_char(data_ptr);
|
|
data_ptr = o.data_ptr;
|
|
viable_size = o.viable_size;
|
|
o.data_ptr = nullptr; // we take ownership
|
|
o.viable_size = 0;
|
|
return *this;
|
|
}
|
|
|
|
inline void padded_string::swap(padded_string &o) noexcept {
|
|
size_t tmp_viable_size = viable_size;
|
|
char *tmp_data_ptr = data_ptr;
|
|
viable_size = o.viable_size;
|
|
data_ptr = o.data_ptr;
|
|
o.data_ptr = tmp_data_ptr;
|
|
o.viable_size = tmp_viable_size;
|
|
}
|
|
|
|
inline padded_string::~padded_string() noexcept {
|
|
aligned_free_char(data_ptr);
|
|
}
|
|
|
|
inline size_t padded_string::size() const noexcept { return viable_size; }
|
|
|
|
inline size_t padded_string::length() const noexcept { return viable_size; }
|
|
|
|
inline const char *padded_string::data() const noexcept { return data_ptr; }
|
|
|
|
inline char *padded_string::data() noexcept { return data_ptr; }
|
|
|
|
inline simdjson_move_result<padded_string> padded_string::load(const std::string &filename) noexcept {
|
|
// Open the file
|
|
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
|
if (fp == nullptr) {
|
|
return IO_ERROR;
|
|
}
|
|
|
|
// Get the file size
|
|
if(std::fseek(fp, 0, SEEK_END) < 0) {
|
|
std::fclose(fp);
|
|
return IO_ERROR;
|
|
}
|
|
long llen = std::ftell(fp);
|
|
if((llen < 0) || (llen == LONG_MAX)) {
|
|
std::fclose(fp);
|
|
return IO_ERROR;
|
|
}
|
|
|
|
// Allocate the padded_string
|
|
size_t len = (size_t) llen;
|
|
padded_string s(len);
|
|
if (s.data() == nullptr) {
|
|
std::fclose(fp);
|
|
return MEMALLOC;
|
|
}
|
|
|
|
// Read the padded_string
|
|
std::rewind(fp);
|
|
size_t bytes_read = std::fread(s.data(), 1, len, fp);
|
|
if (std::fclose(fp) != 0 || bytes_read != len) {
|
|
return IO_ERROR;
|
|
}
|
|
|
|
return std::move(s);
|
|
}
|
|
|
|
} // namespace simdjson
|
|
|
|
#endif // SIMDJSON_INLINE_PADDED_STRING_H
|
|
/* end file include/simdjson/inline/padded_string.h */
|
|
|
|
#endif // SIMDJSON_H
|
|
/* end file include/simdjson/inline/padded_string.h */
|