Last commit for version 0.9.0. (#1503)
* Last commit for version 0.9.0. * Removing space.
This commit is contained in:
parent
e35088d6ff
commit
2db4592571
|
@ -8,11 +8,11 @@ project(simdjson
|
||||||
)
|
)
|
||||||
|
|
||||||
set(PROJECT_VERSION_MAJOR 0)
|
set(PROJECT_VERSION_MAJOR 0)
|
||||||
set(PROJECT_VERSION_MINOR 8)
|
set(PROJECT_VERSION_MINOR 9)
|
||||||
set(PROJECT_VERSION_PATCH 0)
|
set(PROJECT_VERSION_PATCH 0)
|
||||||
set(SIMDJSON_SEMANTIC_VERSION "0.8.0" CACHE STRING "simdjson semantic version")
|
set(SIMDJSON_SEMANTIC_VERSION "0.9.0" CACHE STRING "simdjson semantic version")
|
||||||
set(SIMDJSON_LIB_VERSION "7.0.0" CACHE STRING "simdjson library version")
|
set(SIMDJSON_LIB_VERSION "8.0.0" CACHE STRING "simdjson library version")
|
||||||
set(SIMDJSON_LIB_SOVERSION "7" CACHE STRING "simdjson library soversion")
|
set(SIMDJSON_LIB_SOVERSION "8" CACHE STRING "simdjson library soversion")
|
||||||
set(SIMDJSON_GITHUB_REPOSITORY https://github.com/simdjson/simdjson)
|
set(SIMDJSON_GITHUB_REPOSITORY https://github.com/simdjson/simdjson)
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
|
2
Doxyfile
2
Doxyfile
|
@ -38,7 +38,7 @@ PROJECT_NAME = simdjson
|
||||||
# could be handy for archiving the generated documentation or if some version
|
# could be handy for archiving the generated documentation or if some version
|
||||||
# control system is used.
|
# control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = "0.8.0"
|
PROJECT_NUMBER = "0.9.0"
|
||||||
|
|
||||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||||
# for a project that appears at the top of each page and should give viewer a
|
# for a project that appears at the top of each page and should give viewer a
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
[![Ubuntu 20.04 CI](https://github.com/simdjson/simdjson/workflows/Ubuntu%2020.04%20CI%20(GCC%209)/badge.svg)](https://simdjson.org/plots.html)
|
[![Ubuntu 20.04 CI](https://github.com/simdjson/simdjson/workflows/Ubuntu%2020.04%20CI%20(GCC%209)/badge.svg)](https://simdjson.org/plots.html)
|
||||||
![VS16-CI](https://github.com/simdjson/simdjson/workflows/VS16-CI/badge.svg)
|
![VS16-CI](https://github.com/simdjson/simdjson/workflows/VS16-CI/badge.svg)
|
||||||
![MinGW64-CI](https://github.com/simdjson/simdjson/workflows/MinGW64-CI/badge.svg)
|
![MinGW64-CI](https://github.com/simdjson/simdjson/workflows/MinGW64-CI/badge.svg)
|
||||||
[![][license img]][license] [![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](https://simdjson.org/api/0.8.0/index.html)
|
[![][license img]][license] [![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](https://simdjson.org/api/0.9.0/index.html)
|
||||||
|
|
||||||
simdjson : Parsing gigabytes of JSON per second
|
simdjson : Parsing gigabytes of JSON per second
|
||||||
===============================================
|
===============================================
|
||||||
|
@ -79,7 +79,7 @@ Usage documentation is available:
|
||||||
* [Performance](doc/performance.md) shows some more advanced scenarios and how to tune for them.
|
* [Performance](doc/performance.md) shows some more advanced scenarios and how to tune for them.
|
||||||
* [Implementation Selection](doc/implementation-selection.md) describes runtime CPU detection and
|
* [Implementation Selection](doc/implementation-selection.md) describes runtime CPU detection and
|
||||||
how you can work with it.
|
how you can work with it.
|
||||||
* [API](https://simdjson.org/api/0.8.0/annotated.html) contains the automatically generated API documentation.
|
* [API](https://simdjson.org/api/0.9.0/annotated.html) contains the automatically generated API documentation.
|
||||||
|
|
||||||
Performance results
|
Performance results
|
||||||
-------------------
|
-------------------
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#define SIMDJSON_SIMDJSON_VERSION_H
|
#define SIMDJSON_SIMDJSON_VERSION_H
|
||||||
|
|
||||||
/** The version of simdjson being used (major.minor.revision) */
|
/** The version of simdjson being used (major.minor.revision) */
|
||||||
#define SIMDJSON_VERSION 0.8.0
|
#define SIMDJSON_VERSION 0.9.0
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
enum {
|
enum {
|
||||||
|
@ -15,7 +15,7 @@ enum {
|
||||||
/**
|
/**
|
||||||
* The minor version (major.MINOR.revision) of simdjson being used.
|
* The minor version (major.MINOR.revision) of simdjson being used.
|
||||||
*/
|
*/
|
||||||
SIMDJSON_VERSION_MINOR = 8,
|
SIMDJSON_VERSION_MINOR = 9,
|
||||||
/**
|
/**
|
||||||
* The revision (major.minor.REVISION) of simdjson being used.
|
* The revision (major.minor.REVISION) of simdjson being used.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* auto-generated on 2021-02-10 16:52:04 -0500. Do not edit! */
|
/* auto-generated on 2021-03-16 17:57:23 -0400. Do not edit! */
|
||||||
/* begin file src/simdjson.cpp */
|
/* begin file src/simdjson.cpp */
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
|
||||||
|
@ -1477,7 +1477,8 @@ namespace internal {
|
||||||
{ INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
|
{ INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
|
||||||
{ UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
|
{ UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
|
||||||
{ PARSER_IN_USE, "Cannot parse a new document while a document is still in use." },
|
{ PARSER_IN_USE, "Cannot parse a new document while a document is still in use." },
|
||||||
{ OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }
|
{ OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." },
|
||||||
|
{ INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length." }
|
||||||
}; // error_messages[]
|
}; // error_messages[]
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
@ -2648,6 +2649,7 @@ simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept {
|
||||||
|
|
||||||
const implementation * builtin_implementation() {
|
const implementation * builtin_implementation() {
|
||||||
static const implementation * builtin_impl = available_implementations[STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)];
|
static const implementation * builtin_impl = available_implementations[STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)];
|
||||||
|
assert(builtin_impl);
|
||||||
return builtin_impl;
|
return builtin_impl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2681,7 +2683,6 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/arm64/end.h */
|
/* begin file include/simdjson/arm64/end.h */
|
||||||
#undef arm64
|
|
||||||
/* end file include/simdjson/arm64/end.h */
|
/* end file include/simdjson/arm64/end.h */
|
||||||
/* end file src/arm64/implementation.cpp */
|
/* end file src/arm64/implementation.cpp */
|
||||||
/* begin file src/arm64/dom_parser_implementation.cpp */
|
/* begin file src/arm64/dom_parser_implementation.cpp */
|
||||||
|
@ -2702,9 +2703,9 @@ using namespace simd;
|
||||||
struct json_character_block {
|
struct json_character_block {
|
||||||
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
||||||
|
|
||||||
simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
|
simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; }
|
||||||
simdjson_really_inline uint64_t op() const { return _op; }
|
simdjson_really_inline uint64_t op() const noexcept { return _op; }
|
||||||
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
|
simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
|
||||||
|
|
||||||
uint64_t _whitespace;
|
uint64_t _whitespace;
|
||||||
uint64_t _op;
|
uint64_t _op;
|
||||||
|
@ -3017,7 +3018,7 @@ private:
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
||||||
}
|
}
|
||||||
|
@ -3027,7 +3028,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
in.store(reinterpret_cast<uint8_t*>(buf));
|
in.store(reinterpret_cast<uint8_t*>(buf));
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
if (buf[i] < ' ') { buf[i] = '_'; }
|
if (buf[i] < ' ') { buf[i] = '_'; }
|
||||||
|
@ -3037,7 +3038,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_unused static char * format_mask(uint64_t mask) {
|
simdjson_unused static char * format_mask(uint64_t mask) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<64; i++) {
|
for (size_t i=0; i<64; i++) {
|
||||||
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
||||||
}
|
}
|
||||||
|
@ -3085,6 +3086,10 @@ namespace {
|
||||||
namespace stage1 {
|
namespace stage1 {
|
||||||
|
|
||||||
struct json_string_block {
|
struct json_string_block {
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
|
||||||
|
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
|
||||||
|
|
||||||
// Escaped characters (characters following an escape() character)
|
// Escaped characters (characters following an escape() character)
|
||||||
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
||||||
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
||||||
|
@ -3206,12 +3211,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
|
||||||
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
||||||
|
|
||||||
// Use ^ to turn the beginning quote off, and the end quote on.
|
// Use ^ to turn the beginning quote off, and the end quote on.
|
||||||
return {
|
|
||||||
|
// We are returning a function-local object so either we get a move constructor
|
||||||
|
// or we get copy elision.
|
||||||
|
return json_string_block(
|
||||||
backslash,
|
backslash,
|
||||||
escaped,
|
escaped,
|
||||||
quote,
|
quote,
|
||||||
in_string
|
in_string
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_string_scanner::finish() {
|
simdjson_really_inline error_code json_string_scanner::finish() {
|
||||||
|
@ -3251,20 +3259,26 @@ namespace stage1 {
|
||||||
*/
|
*/
|
||||||
struct json_block {
|
struct json_block {
|
||||||
public:
|
public:
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The start of structurals.
|
* The start of structurals.
|
||||||
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); }
|
simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
|
||||||
/** All JSON whitespace (i.e. not in a string) */
|
/** All JSON whitespace (i.e. not in a string) */
|
||||||
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); }
|
simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
|
||||||
|
|
||||||
// Helpers
|
// Helpers
|
||||||
|
|
||||||
/** Whether the given characters are inside a string (only works on non-quotes) */
|
/** Whether the given characters are inside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
|
||||||
/** Whether the given characters are outside a string (only works on non-quotes) */
|
/** Whether the given characters are outside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
|
||||||
|
|
||||||
// string and escape characters
|
// string and escape characters
|
||||||
json_string_block _string;
|
json_string_block _string;
|
||||||
|
@ -3279,12 +3293,12 @@ private:
|
||||||
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
||||||
* They may reside inside a string.
|
* They may reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); }
|
simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
|
||||||
/**
|
/**
|
||||||
* The start of non-operator runs, like 123, true and "abc".
|
* The start of non-operator runs, like 123, true and "abc".
|
||||||
* It main reside inside a string.
|
* It main reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_scalar_start() {
|
simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
|
||||||
// The term "scalar" refers to anything except structural characters and white space
|
// The term "scalar" refers to anything except structural characters and white space
|
||||||
// (so letters, numbers, quotes).
|
// (so letters, numbers, quotes).
|
||||||
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
||||||
|
@ -3295,7 +3309,7 @@ private:
|
||||||
* Whether the given character is immediately after a non-operator like 123, true.
|
* Whether the given character is immediately after a non-operator like 123, true.
|
||||||
* The characters following a quote are not included.
|
* The characters following a quote are not included.
|
||||||
*/
|
*/
|
||||||
simdjson_really_inline uint64_t follows_potential_scalar() {
|
simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
|
||||||
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
||||||
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
||||||
// white space.
|
// white space.
|
||||||
|
@ -3361,11 +3375,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
|
||||||
// Performance: there are many ways to skin this cat.
|
// Performance: there are many ways to skin this cat.
|
||||||
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
||||||
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
||||||
return {
|
// We are returning a function-local object so either we get a move constructor
|
||||||
strings,
|
// or we get copy elision.
|
||||||
|
return json_block(
|
||||||
|
strings,// strings is a function-local object so either it moves or the copy is elided.
|
||||||
characters,
|
characters,
|
||||||
follows_nonquote_scalar
|
follows_nonquote_scalar
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_scanner::finish() {
|
simdjson_really_inline error_code json_scanner::finish() {
|
||||||
|
@ -3399,13 +3415,13 @@ private:
|
||||||
{}
|
{}
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
|
||||||
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) {
|
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
|
||||||
uint64_t mask = block.whitespace();
|
uint64_t mask = block.whitespace();
|
||||||
in.compress(mask, dst);
|
in.compress(mask, dst);
|
||||||
dst += 64 - count_ones(mask);
|
dst += 64 - count_ones(mask);
|
||||||
|
@ -3620,7 +3636,7 @@ private:
|
||||||
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
|
||||||
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
||||||
|
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
|
@ -3710,7 +3726,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
|
||||||
reader.advance();
|
reader.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) {
|
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
|
||||||
uint64_t unescaped = in.lteq(0x1F);
|
uint64_t unescaped = in.lteq(0x1F);
|
||||||
checker.check_next_input(in);
|
checker.check_next_input(in);
|
||||||
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
||||||
|
@ -4518,12 +4534,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
|
||||||
// practice unless you are in the strange scenario where you have many JSON
|
// practice unless you are in the strange scenario where you have many JSON
|
||||||
// documents made of single atoms.
|
// documents made of single atoms.
|
||||||
//
|
//
|
||||||
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
|
||||||
if (copy == nullptr) { return MEMALLOC; }
|
if (copy.get() == nullptr) { return MEMALLOC; }
|
||||||
std::memcpy(copy, value, iter.remaining_len());
|
std::memcpy(copy.get(), value, iter.remaining_len());
|
||||||
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
||||||
error_code error = visit_number(iter, copy);
|
error_code error = visit_number(iter, copy.get());
|
||||||
free(copy);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4675,7 +4690,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/arm64/end.h */
|
/* begin file include/simdjson/arm64/end.h */
|
||||||
#undef arm64
|
|
||||||
/* end file include/simdjson/arm64/end.h */
|
/* end file include/simdjson/arm64/end.h */
|
||||||
/* end file src/arm64/dom_parser_implementation.cpp */
|
/* end file src/arm64/dom_parser_implementation.cpp */
|
||||||
#endif
|
#endif
|
||||||
|
@ -4705,7 +4719,6 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/fallback/end.h */
|
/* begin file include/simdjson/fallback/end.h */
|
||||||
#undef fallback
|
|
||||||
/* end file include/simdjson/fallback/end.h */
|
/* end file include/simdjson/fallback/end.h */
|
||||||
/* end file src/fallback/implementation.cpp */
|
/* end file src/fallback/implementation.cpp */
|
||||||
/* begin file src/fallback/dom_parser_implementation.cpp */
|
/* begin file src/fallback/dom_parser_implementation.cpp */
|
||||||
|
@ -5795,12 +5808,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
|
||||||
// practice unless you are in the strange scenario where you have many JSON
|
// practice unless you are in the strange scenario where you have many JSON
|
||||||
// documents made of single atoms.
|
// documents made of single atoms.
|
||||||
//
|
//
|
||||||
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
|
||||||
if (copy == nullptr) { return MEMALLOC; }
|
if (copy.get() == nullptr) { return MEMALLOC; }
|
||||||
std::memcpy(copy, value, iter.remaining_len());
|
std::memcpy(copy.get(), value, iter.remaining_len());
|
||||||
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
||||||
error_code error = visit_number(iter, copy);
|
error_code error = visit_number(iter, copy.get());
|
||||||
free(copy);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5923,7 +5935,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/fallback/end.h */
|
/* begin file include/simdjson/fallback/end.h */
|
||||||
#undef fallback
|
|
||||||
/* end file include/simdjson/fallback/end.h */
|
/* end file include/simdjson/fallback/end.h */
|
||||||
/* end file src/fallback/dom_parser_implementation.cpp */
|
/* end file src/fallback/dom_parser_implementation.cpp */
|
||||||
#endif
|
#endif
|
||||||
|
@ -5954,8 +5965,7 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/haswell/end.h */
|
/* begin file include/simdjson/haswell/end.h */
|
||||||
SIMDJSON_UNTARGET_REGION
|
SIMDJSON_UNTARGET_HASWELL
|
||||||
#undef haswell
|
|
||||||
/* end file include/simdjson/haswell/end.h */
|
/* end file include/simdjson/haswell/end.h */
|
||||||
|
|
||||||
/* end file src/haswell/implementation.cpp */
|
/* end file src/haswell/implementation.cpp */
|
||||||
|
@ -5979,19 +5989,19 @@ using namespace simd;
|
||||||
struct json_character_block {
|
struct json_character_block {
|
||||||
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
||||||
// ASCII white-space ('\r','\n','\t',' ')
|
// ASCII white-space ('\r','\n','\t',' ')
|
||||||
simdjson_really_inline uint64_t whitespace() const;
|
simdjson_really_inline uint64_t whitespace() const noexcept;
|
||||||
// non-quote structural characters (comma, colon, braces, brackets)
|
// non-quote structural characters (comma, colon, braces, brackets)
|
||||||
simdjson_really_inline uint64_t op() const;
|
simdjson_really_inline uint64_t op() const noexcept;
|
||||||
// neither a structural character nor a white-space, so letters, numbers and quotes
|
// neither a structural character nor a white-space, so letters, numbers and quotes
|
||||||
simdjson_really_inline uint64_t scalar() const;
|
simdjson_really_inline uint64_t scalar() const noexcept;
|
||||||
|
|
||||||
uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
|
uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
|
||||||
uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
|
uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
|
||||||
};
|
};
|
||||||
|
|
||||||
simdjson_really_inline uint64_t json_character_block::whitespace() const { return _whitespace; }
|
simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; }
|
||||||
simdjson_really_inline uint64_t json_character_block::op() const { return _op; }
|
simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; }
|
||||||
simdjson_really_inline uint64_t json_character_block::scalar() const { return ~(op() | whitespace()); }
|
simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); }
|
||||||
|
|
||||||
// This identifies structural characters (comma, colon, braces, brackets),
|
// This identifies structural characters (comma, colon, braces, brackets),
|
||||||
// and ASCII white-space ('\r','\n','\t',' ').
|
// and ASCII white-space ('\r','\n','\t',' ').
|
||||||
|
@ -6297,7 +6307,7 @@ private:
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
||||||
}
|
}
|
||||||
|
@ -6307,7 +6317,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
in.store(reinterpret_cast<uint8_t*>(buf));
|
in.store(reinterpret_cast<uint8_t*>(buf));
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
if (buf[i] < ' ') { buf[i] = '_'; }
|
if (buf[i] < ' ') { buf[i] = '_'; }
|
||||||
|
@ -6317,7 +6327,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_unused static char * format_mask(uint64_t mask) {
|
simdjson_unused static char * format_mask(uint64_t mask) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<64; i++) {
|
for (size_t i=0; i<64; i++) {
|
||||||
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
||||||
}
|
}
|
||||||
|
@ -6365,6 +6375,10 @@ namespace {
|
||||||
namespace stage1 {
|
namespace stage1 {
|
||||||
|
|
||||||
struct json_string_block {
|
struct json_string_block {
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
|
||||||
|
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
|
||||||
|
|
||||||
// Escaped characters (characters following an escape() character)
|
// Escaped characters (characters following an escape() character)
|
||||||
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
||||||
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
||||||
|
@ -6486,12 +6500,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
|
||||||
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
||||||
|
|
||||||
// Use ^ to turn the beginning quote off, and the end quote on.
|
// Use ^ to turn the beginning quote off, and the end quote on.
|
||||||
return {
|
|
||||||
|
// We are returning a function-local object so either we get a move constructor
|
||||||
|
// or we get copy elision.
|
||||||
|
return json_string_block(
|
||||||
backslash,
|
backslash,
|
||||||
escaped,
|
escaped,
|
||||||
quote,
|
quote,
|
||||||
in_string
|
in_string
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_string_scanner::finish() {
|
simdjson_really_inline error_code json_string_scanner::finish() {
|
||||||
|
@ -6531,20 +6548,26 @@ namespace stage1 {
|
||||||
*/
|
*/
|
||||||
struct json_block {
|
struct json_block {
|
||||||
public:
|
public:
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The start of structurals.
|
* The start of structurals.
|
||||||
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); }
|
simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
|
||||||
/** All JSON whitespace (i.e. not in a string) */
|
/** All JSON whitespace (i.e. not in a string) */
|
||||||
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); }
|
simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
|
||||||
|
|
||||||
// Helpers
|
// Helpers
|
||||||
|
|
||||||
/** Whether the given characters are inside a string (only works on non-quotes) */
|
/** Whether the given characters are inside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
|
||||||
/** Whether the given characters are outside a string (only works on non-quotes) */
|
/** Whether the given characters are outside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
|
||||||
|
|
||||||
// string and escape characters
|
// string and escape characters
|
||||||
json_string_block _string;
|
json_string_block _string;
|
||||||
|
@ -6559,12 +6582,12 @@ private:
|
||||||
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
||||||
* They may reside inside a string.
|
* They may reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); }
|
simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
|
||||||
/**
|
/**
|
||||||
* The start of non-operator runs, like 123, true and "abc".
|
* The start of non-operator runs, like 123, true and "abc".
|
||||||
* It main reside inside a string.
|
* It main reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_scalar_start() {
|
simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
|
||||||
// The term "scalar" refers to anything except structural characters and white space
|
// The term "scalar" refers to anything except structural characters and white space
|
||||||
// (so letters, numbers, quotes).
|
// (so letters, numbers, quotes).
|
||||||
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
||||||
|
@ -6575,7 +6598,7 @@ private:
|
||||||
* Whether the given character is immediately after a non-operator like 123, true.
|
* Whether the given character is immediately after a non-operator like 123, true.
|
||||||
* The characters following a quote are not included.
|
* The characters following a quote are not included.
|
||||||
*/
|
*/
|
||||||
simdjson_really_inline uint64_t follows_potential_scalar() {
|
simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
|
||||||
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
||||||
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
||||||
// white space.
|
// white space.
|
||||||
|
@ -6641,11 +6664,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
|
||||||
// Performance: there are many ways to skin this cat.
|
// Performance: there are many ways to skin this cat.
|
||||||
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
||||||
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
||||||
return {
|
// We are returning a function-local object so either we get a move constructor
|
||||||
strings,
|
// or we get copy elision.
|
||||||
|
return json_block(
|
||||||
|
strings,// strings is a function-local object so either it moves or the copy is elided.
|
||||||
characters,
|
characters,
|
||||||
follows_nonquote_scalar
|
follows_nonquote_scalar
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_scanner::finish() {
|
simdjson_really_inline error_code json_scanner::finish() {
|
||||||
|
@ -6679,13 +6704,13 @@ private:
|
||||||
{}
|
{}
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
|
||||||
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) {
|
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
|
||||||
uint64_t mask = block.whitespace();
|
uint64_t mask = block.whitespace();
|
||||||
in.compress(mask, dst);
|
in.compress(mask, dst);
|
||||||
dst += 64 - count_ones(mask);
|
dst += 64 - count_ones(mask);
|
||||||
|
@ -6900,7 +6925,7 @@ private:
|
||||||
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
|
||||||
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
||||||
|
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
|
@ -6990,7 +7015,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
|
||||||
reader.advance();
|
reader.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) {
|
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
|
||||||
uint64_t unescaped = in.lteq(0x1F);
|
uint64_t unescaped = in.lteq(0x1F);
|
||||||
checker.check_next_input(in);
|
checker.check_next_input(in);
|
||||||
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
||||||
|
@ -7797,12 +7822,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
|
||||||
// practice unless you are in the strange scenario where you have many JSON
|
// practice unless you are in the strange scenario where you have many JSON
|
||||||
// documents made of single atoms.
|
// documents made of single atoms.
|
||||||
//
|
//
|
||||||
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
|
||||||
if (copy == nullptr) { return MEMALLOC; }
|
if (copy.get() == nullptr) { return MEMALLOC; }
|
||||||
std::memcpy(copy, value, iter.remaining_len());
|
std::memcpy(copy.get(), value, iter.remaining_len());
|
||||||
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
||||||
error_code error = visit_number(iter, copy);
|
error_code error = visit_number(iter, copy.get());
|
||||||
free(copy);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7952,8 +7976,7 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/haswell/end.h */
|
/* begin file include/simdjson/haswell/end.h */
|
||||||
SIMDJSON_UNTARGET_REGION
|
SIMDJSON_UNTARGET_HASWELL
|
||||||
#undef haswell
|
|
||||||
/* end file include/simdjson/haswell/end.h */
|
/* end file include/simdjson/haswell/end.h */
|
||||||
/* end file src/haswell/dom_parser_implementation.cpp */
|
/* end file src/haswell/dom_parser_implementation.cpp */
|
||||||
#endif
|
#endif
|
||||||
|
@ -7983,7 +8006,6 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/ppc64/end.h */
|
/* begin file include/simdjson/ppc64/end.h */
|
||||||
#undef ppc64
|
|
||||||
/* end file include/simdjson/ppc64/end.h */
|
/* end file include/simdjson/ppc64/end.h */
|
||||||
/* end file src/ppc64/implementation.cpp */
|
/* end file src/ppc64/implementation.cpp */
|
||||||
/* begin file src/ppc64/dom_parser_implementation.cpp */
|
/* begin file src/ppc64/dom_parser_implementation.cpp */
|
||||||
|
@ -8004,9 +8026,9 @@ using namespace simd;
|
||||||
struct json_character_block {
|
struct json_character_block {
|
||||||
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
||||||
|
|
||||||
simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
|
simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; }
|
||||||
simdjson_really_inline uint64_t op() const { return _op; }
|
simdjson_really_inline uint64_t op() const noexcept { return _op; }
|
||||||
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
|
simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
|
||||||
|
|
||||||
uint64_t _whitespace;
|
uint64_t _whitespace;
|
||||||
uint64_t _op;
|
uint64_t _op;
|
||||||
|
@ -8290,7 +8312,7 @@ private:
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
||||||
}
|
}
|
||||||
|
@ -8300,7 +8322,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
in.store(reinterpret_cast<uint8_t*>(buf));
|
in.store(reinterpret_cast<uint8_t*>(buf));
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
if (buf[i] < ' ') { buf[i] = '_'; }
|
if (buf[i] < ' ') { buf[i] = '_'; }
|
||||||
|
@ -8310,7 +8332,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_unused static char * format_mask(uint64_t mask) {
|
simdjson_unused static char * format_mask(uint64_t mask) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<64; i++) {
|
for (size_t i=0; i<64; i++) {
|
||||||
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
||||||
}
|
}
|
||||||
|
@ -8358,6 +8380,10 @@ namespace {
|
||||||
namespace stage1 {
|
namespace stage1 {
|
||||||
|
|
||||||
struct json_string_block {
|
struct json_string_block {
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
|
||||||
|
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
|
||||||
|
|
||||||
// Escaped characters (characters following an escape() character)
|
// Escaped characters (characters following an escape() character)
|
||||||
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
||||||
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
||||||
|
@ -8479,12 +8505,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
|
||||||
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
||||||
|
|
||||||
// Use ^ to turn the beginning quote off, and the end quote on.
|
// Use ^ to turn the beginning quote off, and the end quote on.
|
||||||
return {
|
|
||||||
|
// We are returning a function-local object so either we get a move constructor
|
||||||
|
// or we get copy elision.
|
||||||
|
return json_string_block(
|
||||||
backslash,
|
backslash,
|
||||||
escaped,
|
escaped,
|
||||||
quote,
|
quote,
|
||||||
in_string
|
in_string
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_string_scanner::finish() {
|
simdjson_really_inline error_code json_string_scanner::finish() {
|
||||||
|
@ -8524,20 +8553,26 @@ namespace stage1 {
|
||||||
*/
|
*/
|
||||||
struct json_block {
|
struct json_block {
|
||||||
public:
|
public:
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The start of structurals.
|
* The start of structurals.
|
||||||
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); }
|
simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
|
||||||
/** All JSON whitespace (i.e. not in a string) */
|
/** All JSON whitespace (i.e. not in a string) */
|
||||||
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); }
|
simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
|
||||||
|
|
||||||
// Helpers
|
// Helpers
|
||||||
|
|
||||||
/** Whether the given characters are inside a string (only works on non-quotes) */
|
/** Whether the given characters are inside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
|
||||||
/** Whether the given characters are outside a string (only works on non-quotes) */
|
/** Whether the given characters are outside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
|
||||||
|
|
||||||
// string and escape characters
|
// string and escape characters
|
||||||
json_string_block _string;
|
json_string_block _string;
|
||||||
|
@ -8552,12 +8587,12 @@ private:
|
||||||
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
||||||
* They may reside inside a string.
|
* They may reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); }
|
simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
|
||||||
/**
|
/**
|
||||||
* The start of non-operator runs, like 123, true and "abc".
|
* The start of non-operator runs, like 123, true and "abc".
|
||||||
* It main reside inside a string.
|
* It main reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_scalar_start() {
|
simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
|
||||||
// The term "scalar" refers to anything except structural characters and white space
|
// The term "scalar" refers to anything except structural characters and white space
|
||||||
// (so letters, numbers, quotes).
|
// (so letters, numbers, quotes).
|
||||||
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
||||||
|
@ -8568,7 +8603,7 @@ private:
|
||||||
* Whether the given character is immediately after a non-operator like 123, true.
|
* Whether the given character is immediately after a non-operator like 123, true.
|
||||||
* The characters following a quote are not included.
|
* The characters following a quote are not included.
|
||||||
*/
|
*/
|
||||||
simdjson_really_inline uint64_t follows_potential_scalar() {
|
simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
|
||||||
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
||||||
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
||||||
// white space.
|
// white space.
|
||||||
|
@ -8634,11 +8669,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
|
||||||
// Performance: there are many ways to skin this cat.
|
// Performance: there are many ways to skin this cat.
|
||||||
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
||||||
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
||||||
return {
|
// We are returning a function-local object so either we get a move constructor
|
||||||
strings,
|
// or we get copy elision.
|
||||||
|
return json_block(
|
||||||
|
strings,// strings is a function-local object so either it moves or the copy is elided.
|
||||||
characters,
|
characters,
|
||||||
follows_nonquote_scalar
|
follows_nonquote_scalar
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_scanner::finish() {
|
simdjson_really_inline error_code json_scanner::finish() {
|
||||||
|
@ -8672,13 +8709,13 @@ private:
|
||||||
{}
|
{}
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
|
||||||
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) {
|
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
|
||||||
uint64_t mask = block.whitespace();
|
uint64_t mask = block.whitespace();
|
||||||
in.compress(mask, dst);
|
in.compress(mask, dst);
|
||||||
dst += 64 - count_ones(mask);
|
dst += 64 - count_ones(mask);
|
||||||
|
@ -8893,7 +8930,7 @@ private:
|
||||||
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
|
||||||
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
||||||
|
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
|
@ -8983,7 +9020,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
|
||||||
reader.advance();
|
reader.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) {
|
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
|
||||||
uint64_t unescaped = in.lteq(0x1F);
|
uint64_t unescaped = in.lteq(0x1F);
|
||||||
checker.check_next_input(in);
|
checker.check_next_input(in);
|
||||||
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
||||||
|
@ -9791,12 +9828,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
|
||||||
// practice unless you are in the strange scenario where you have many JSON
|
// practice unless you are in the strange scenario where you have many JSON
|
||||||
// documents made of single atoms.
|
// documents made of single atoms.
|
||||||
//
|
//
|
||||||
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
|
||||||
if (copy == nullptr) { return MEMALLOC; }
|
if (copy.get() == nullptr) { return MEMALLOC; }
|
||||||
std::memcpy(copy, value, iter.remaining_len());
|
std::memcpy(copy.get(), value, iter.remaining_len());
|
||||||
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
||||||
error_code error = visit_number(iter, copy);
|
error_code error = visit_number(iter, copy.get());
|
||||||
free(copy);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9948,7 +9984,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/ppc64/end.h */
|
/* begin file include/simdjson/ppc64/end.h */
|
||||||
#undef ppc64
|
|
||||||
/* end file include/simdjson/ppc64/end.h */
|
/* end file include/simdjson/ppc64/end.h */
|
||||||
/* end file src/ppc64/dom_parser_implementation.cpp */
|
/* end file src/ppc64/dom_parser_implementation.cpp */
|
||||||
#endif
|
#endif
|
||||||
|
@ -9979,8 +10014,7 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/westmere/end.h */
|
/* begin file include/simdjson/westmere/end.h */
|
||||||
SIMDJSON_UNTARGET_REGION
|
SIMDJSON_UNTARGET_WESTMERE
|
||||||
#undef westmere
|
|
||||||
/* end file include/simdjson/westmere/end.h */
|
/* end file include/simdjson/westmere/end.h */
|
||||||
/* end file src/westmere/implementation.cpp */
|
/* end file src/westmere/implementation.cpp */
|
||||||
/* begin file src/westmere/dom_parser_implementation.cpp */
|
/* begin file src/westmere/dom_parser_implementation.cpp */
|
||||||
|
@ -10003,9 +10037,9 @@ using namespace simd;
|
||||||
struct json_character_block {
|
struct json_character_block {
|
||||||
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
|
||||||
|
|
||||||
simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
|
simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; }
|
||||||
simdjson_really_inline uint64_t op() const { return _op; }
|
simdjson_really_inline uint64_t op() const noexcept { return _op; }
|
||||||
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
|
simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
|
||||||
|
|
||||||
uint64_t _whitespace;
|
uint64_t _whitespace;
|
||||||
uint64_t _op;
|
uint64_t _op;
|
||||||
|
@ -10319,7 +10353,7 @@ private:
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
|
||||||
}
|
}
|
||||||
|
@ -10329,7 +10363,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
in.store(reinterpret_cast<uint8_t*>(buf));
|
in.store(reinterpret_cast<uint8_t*>(buf));
|
||||||
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
|
||||||
if (buf[i] < ' ') { buf[i] = '_'; }
|
if (buf[i] < ' ') { buf[i] = '_'; }
|
||||||
|
@ -10339,7 +10373,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_unused static char * format_mask(uint64_t mask) {
|
simdjson_unused static char * format_mask(uint64_t mask) {
|
||||||
static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
|
static char buf[sizeof(simd8x64<uint8_t>) + 1];
|
||||||
for (size_t i=0; i<64; i++) {
|
for (size_t i=0; i<64; i++) {
|
||||||
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
|
||||||
}
|
}
|
||||||
|
@ -10387,6 +10421,10 @@ namespace {
|
||||||
namespace stage1 {
|
namespace stage1 {
|
||||||
|
|
||||||
struct json_string_block {
|
struct json_string_block {
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
|
||||||
|
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
|
||||||
|
|
||||||
// Escaped characters (characters following an escape() character)
|
// Escaped characters (characters following an escape() character)
|
||||||
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
simdjson_really_inline uint64_t escaped() const { return _escaped; }
|
||||||
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
|
||||||
|
@ -10508,12 +10546,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
|
||||||
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
|
||||||
|
|
||||||
// Use ^ to turn the beginning quote off, and the end quote on.
|
// Use ^ to turn the beginning quote off, and the end quote on.
|
||||||
return {
|
|
||||||
|
// We are returning a function-local object so either we get a move constructor
|
||||||
|
// or we get copy elision.
|
||||||
|
return json_string_block(
|
||||||
backslash,
|
backslash,
|
||||||
escaped,
|
escaped,
|
||||||
quote,
|
quote,
|
||||||
in_string
|
in_string
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_string_scanner::finish() {
|
simdjson_really_inline error_code json_string_scanner::finish() {
|
||||||
|
@ -10553,20 +10594,26 @@ namespace stage1 {
|
||||||
*/
|
*/
|
||||||
struct json_block {
|
struct json_block {
|
||||||
public:
|
public:
|
||||||
|
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
|
||||||
|
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
|
||||||
|
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The start of structurals.
|
* The start of structurals.
|
||||||
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
* In simdjson prior to v0.3, these were called the pseudo-structural characters.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); }
|
simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
|
||||||
/** All JSON whitespace (i.e. not in a string) */
|
/** All JSON whitespace (i.e. not in a string) */
|
||||||
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); }
|
simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
|
||||||
|
|
||||||
// Helpers
|
// Helpers
|
||||||
|
|
||||||
/** Whether the given characters are inside a string (only works on non-quotes) */
|
/** Whether the given characters are inside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
|
||||||
/** Whether the given characters are outside a string (only works on non-quotes) */
|
/** Whether the given characters are outside a string (only works on non-quotes) */
|
||||||
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); }
|
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
|
||||||
|
|
||||||
// string and escape characters
|
// string and escape characters
|
||||||
json_string_block _string;
|
json_string_block _string;
|
||||||
|
@ -10581,12 +10628,12 @@ private:
|
||||||
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
|
||||||
* They may reside inside a string.
|
* They may reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); }
|
simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
|
||||||
/**
|
/**
|
||||||
* The start of non-operator runs, like 123, true and "abc".
|
* The start of non-operator runs, like 123, true and "abc".
|
||||||
* It main reside inside a string.
|
* It main reside inside a string.
|
||||||
**/
|
**/
|
||||||
simdjson_really_inline uint64_t potential_scalar_start() {
|
simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
|
||||||
// The term "scalar" refers to anything except structural characters and white space
|
// The term "scalar" refers to anything except structural characters and white space
|
||||||
// (so letters, numbers, quotes).
|
// (so letters, numbers, quotes).
|
||||||
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
|
||||||
|
@ -10597,7 +10644,7 @@ private:
|
||||||
* Whether the given character is immediately after a non-operator like 123, true.
|
* Whether the given character is immediately after a non-operator like 123, true.
|
||||||
* The characters following a quote are not included.
|
* The characters following a quote are not included.
|
||||||
*/
|
*/
|
||||||
simdjson_really_inline uint64_t follows_potential_scalar() {
|
simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
|
||||||
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
|
||||||
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
|
||||||
// white space.
|
// white space.
|
||||||
|
@ -10663,11 +10710,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
|
||||||
// Performance: there are many ways to skin this cat.
|
// Performance: there are many ways to skin this cat.
|
||||||
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
|
||||||
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
|
||||||
return {
|
// We are returning a function-local object so either we get a move constructor
|
||||||
strings,
|
// or we get copy elision.
|
||||||
|
return json_block(
|
||||||
|
strings,// strings is a function-local object so either it moves or the copy is elided.
|
||||||
characters,
|
characters,
|
||||||
follows_nonquote_scalar
|
follows_nonquote_scalar
|
||||||
};
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline error_code json_scanner::finish() {
|
simdjson_really_inline error_code json_scanner::finish() {
|
||||||
|
@ -10701,13 +10750,13 @@ private:
|
||||||
{}
|
{}
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
|
||||||
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) {
|
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
|
||||||
uint64_t mask = block.whitespace();
|
uint64_t mask = block.whitespace();
|
||||||
in.compress(mask, dst);
|
in.compress(mask, dst);
|
||||||
dst += 64 - count_ones(mask);
|
dst += 64 - count_ones(mask);
|
||||||
|
@ -10922,7 +10971,7 @@ private:
|
||||||
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx);
|
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
|
||||||
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
|
||||||
|
|
||||||
json_scanner scanner{};
|
json_scanner scanner{};
|
||||||
|
@ -11012,7 +11061,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
|
||||||
reader.advance();
|
reader.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) {
|
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
|
||||||
uint64_t unescaped = in.lteq(0x1F);
|
uint64_t unescaped = in.lteq(0x1F);
|
||||||
checker.check_next_input(in);
|
checker.check_next_input(in);
|
||||||
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
|
||||||
|
@ -11819,12 +11868,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
|
||||||
// practice unless you are in the strange scenario where you have many JSON
|
// practice unless you are in the strange scenario where you have many JSON
|
||||||
// documents made of single atoms.
|
// documents made of single atoms.
|
||||||
//
|
//
|
||||||
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
|
||||||
if (copy == nullptr) { return MEMALLOC; }
|
if (copy.get() == nullptr) { return MEMALLOC; }
|
||||||
std::memcpy(copy, value, iter.remaining_len());
|
std::memcpy(copy.get(), value, iter.remaining_len());
|
||||||
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
||||||
error_code error = visit_number(iter, copy);
|
error_code error = visit_number(iter, copy.get());
|
||||||
free(copy);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11975,8 +12023,7 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
|
||||||
} // namespace simdjson
|
} // namespace simdjson
|
||||||
|
|
||||||
/* begin file include/simdjson/westmere/end.h */
|
/* begin file include/simdjson/westmere/end.h */
|
||||||
SIMDJSON_UNTARGET_REGION
|
SIMDJSON_UNTARGET_WESTMERE
|
||||||
#undef westmere
|
|
||||||
/* end file include/simdjson/westmere/end.h */
|
/* end file include/simdjson/westmere/end.h */
|
||||||
/* end file src/westmere/dom_parser_implementation.cpp */
|
/* end file src/westmere/dom_parser_implementation.cpp */
|
||||||
#endif
|
#endif
|
||||||
|
|
24701
singleheader/simdjson.h
24701
singleheader/simdjson.h
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue