Last commit for version 0.9.0. (#1503)

* Last commit for version 0.9.0.

* Removing space.
This commit is contained in:
Daniel Lemire 2021-03-17 11:08:44 -04:00 committed by GitHub
parent e35088d6ff
commit 2db4592571
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 4006 additions and 21036 deletions

View File

@ -8,11 +8,11 @@ project(simdjson
) )
set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MAJOR 0)
set(PROJECT_VERSION_MINOR 8) set(PROJECT_VERSION_MINOR 9)
set(PROJECT_VERSION_PATCH 0) set(PROJECT_VERSION_PATCH 0)
set(SIMDJSON_SEMANTIC_VERSION "0.8.0" CACHE STRING "simdjson semantic version") set(SIMDJSON_SEMANTIC_VERSION "0.9.0" CACHE STRING "simdjson semantic version")
set(SIMDJSON_LIB_VERSION "7.0.0" CACHE STRING "simdjson library version") set(SIMDJSON_LIB_VERSION "8.0.0" CACHE STRING "simdjson library version")
set(SIMDJSON_LIB_SOVERSION "7" CACHE STRING "simdjson library soversion") set(SIMDJSON_LIB_SOVERSION "8" CACHE STRING "simdjson library soversion")
set(SIMDJSON_GITHUB_REPOSITORY https://github.com/simdjson/simdjson) set(SIMDJSON_GITHUB_REPOSITORY https://github.com/simdjson/simdjson)
include(GNUInstallDirs) include(GNUInstallDirs)

View File

@ -38,7 +38,7 @@ PROJECT_NAME = simdjson
# could be handy for archiving the generated documentation or if some version # could be handy for archiving the generated documentation or if some version
# control system is used. # control system is used.
PROJECT_NUMBER = "0.8.0" PROJECT_NUMBER = "0.9.0"
# Using the PROJECT_BRIEF tag one can provide an optional one line description # Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a # for a project that appears at the top of each page and should give viewer a

View File

@ -3,7 +3,7 @@
[![Ubuntu 20.04 CI](https://github.com/simdjson/simdjson/workflows/Ubuntu%2020.04%20CI%20(GCC%209)/badge.svg)](https://simdjson.org/plots.html) [![Ubuntu 20.04 CI](https://github.com/simdjson/simdjson/workflows/Ubuntu%2020.04%20CI%20(GCC%209)/badge.svg)](https://simdjson.org/plots.html)
![VS16-CI](https://github.com/simdjson/simdjson/workflows/VS16-CI/badge.svg) ![VS16-CI](https://github.com/simdjson/simdjson/workflows/VS16-CI/badge.svg)
![MinGW64-CI](https://github.com/simdjson/simdjson/workflows/MinGW64-CI/badge.svg) ![MinGW64-CI](https://github.com/simdjson/simdjson/workflows/MinGW64-CI/badge.svg)
[![][license img]][license] [![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](https://simdjson.org/api/0.8.0/index.html) [![][license img]][license] [![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](https://simdjson.org/api/0.9.0/index.html)
simdjson : Parsing gigabytes of JSON per second simdjson : Parsing gigabytes of JSON per second
=============================================== ===============================================
@ -79,7 +79,7 @@ Usage documentation is available:
* [Performance](doc/performance.md) shows some more advanced scenarios and how to tune for them. * [Performance](doc/performance.md) shows some more advanced scenarios and how to tune for them.
* [Implementation Selection](doc/implementation-selection.md) describes runtime CPU detection and * [Implementation Selection](doc/implementation-selection.md) describes runtime CPU detection and
how you can work with it. how you can work with it.
* [API](https://simdjson.org/api/0.8.0/annotated.html) contains the automatically generated API documentation. * [API](https://simdjson.org/api/0.9.0/annotated.html) contains the automatically generated API documentation.
Performance results Performance results
------------------- -------------------

View File

@ -4,7 +4,7 @@
#define SIMDJSON_SIMDJSON_VERSION_H #define SIMDJSON_SIMDJSON_VERSION_H
/** The version of simdjson being used (major.minor.revision) */ /** The version of simdjson being used (major.minor.revision) */
#define SIMDJSON_VERSION 0.8.0 #define SIMDJSON_VERSION 0.9.0
namespace simdjson { namespace simdjson {
enum { enum {
@ -15,7 +15,7 @@ enum {
/** /**
* The minor version (major.MINOR.revision) of simdjson being used. * The minor version (major.MINOR.revision) of simdjson being used.
*/ */
SIMDJSON_VERSION_MINOR = 8, SIMDJSON_VERSION_MINOR = 9,
/** /**
* The revision (major.minor.REVISION) of simdjson being used. * The revision (major.minor.REVISION) of simdjson being used.
*/ */

View File

@ -1,4 +1,4 @@
/* auto-generated on 2021-02-10 16:52:04 -0500. Do not edit! */ /* auto-generated on 2021-03-16 17:57:23 -0400. Do not edit! */
/* begin file src/simdjson.cpp */ /* begin file src/simdjson.cpp */
#include "simdjson.h" #include "simdjson.h"
@ -1477,7 +1477,8 @@ namespace internal {
{ INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." }, { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
{ UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
{ PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }, { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." },
{ OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." } { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." },
{ INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length." }
}; // error_messages[] }; // error_messages[]
} // namespace internal } // namespace internal
@ -2648,6 +2649,7 @@ simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept {
const implementation * builtin_implementation() { const implementation * builtin_implementation() {
static const implementation * builtin_impl = available_implementations[STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)]; static const implementation * builtin_impl = available_implementations[STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)];
assert(builtin_impl);
return builtin_impl; return builtin_impl;
} }
@ -2681,7 +2683,6 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/arm64/end.h */ /* begin file include/simdjson/arm64/end.h */
#undef arm64
/* end file include/simdjson/arm64/end.h */ /* end file include/simdjson/arm64/end.h */
/* end file src/arm64/implementation.cpp */ /* end file src/arm64/implementation.cpp */
/* begin file src/arm64/dom_parser_implementation.cpp */ /* begin file src/arm64/dom_parser_implementation.cpp */
@ -2702,9 +2703,9 @@ using namespace simd;
struct json_character_block { struct json_character_block {
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in); static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
simdjson_really_inline uint64_t whitespace() const { return _whitespace; } simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; }
simdjson_really_inline uint64_t op() const { return _op; } simdjson_really_inline uint64_t op() const noexcept { return _op; }
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); } simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
uint64_t _whitespace; uint64_t _whitespace;
uint64_t _op; uint64_t _op;
@ -3017,7 +3018,7 @@ private:
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text_64(const uint8_t *text) { simdjson_unused static char * format_input_text_64(const uint8_t *text) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
} }
@ -3027,7 +3028,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) { simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
in.store(reinterpret_cast<uint8_t*>(buf)); in.store(reinterpret_cast<uint8_t*>(buf));
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
if (buf[i] < ' ') { buf[i] = '_'; } if (buf[i] < ' ') { buf[i] = '_'; }
@ -3037,7 +3038,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
} }
simdjson_unused static char * format_mask(uint64_t mask) { simdjson_unused static char * format_mask(uint64_t mask) {
static char *buf = reinterpret_cast<char*>(malloc(64 + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<64; i++) { for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
} }
@ -3085,6 +3086,10 @@ namespace {
namespace stage1 { namespace stage1 {
struct json_string_block { struct json_string_block {
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
// Escaped characters (characters following an escape() character) // Escaped characters (characters following an escape() character)
simdjson_really_inline uint64_t escaped() const { return _escaped; } simdjson_really_inline uint64_t escaped() const { return _escaped; }
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
@ -3206,12 +3211,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63); prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
// Use ^ to turn the beginning quote off, and the end quote on. // Use ^ to turn the beginning quote off, and the end quote on.
return {
// We are returning a function-local object so either we get a move constructor
// or we get copy elision.
return json_string_block(
backslash, backslash,
escaped, escaped,
quote, quote,
in_string in_string
}; );
} }
simdjson_really_inline error_code json_string_scanner::finish() { simdjson_really_inline error_code json_string_scanner::finish() {
@ -3251,20 +3259,26 @@ namespace stage1 {
*/ */
struct json_block { struct json_block {
public: public:
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
/** /**
* The start of structurals. * The start of structurals.
* In simdjson prior to v0.3, these were called the pseudo-structural characters. * In simdjson prior to v0.3, these were called the pseudo-structural characters.
**/ **/
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); } simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
/** All JSON whitespace (i.e. not in a string) */ /** All JSON whitespace (i.e. not in a string) */
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); } simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
// Helpers // Helpers
/** Whether the given characters are inside a string (only works on non-quotes) */ /** Whether the given characters are inside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); } simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
/** Whether the given characters are outside a string (only works on non-quotes) */ /** Whether the given characters are outside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); } simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
// string and escape characters // string and escape characters
json_string_block _string; json_string_block _string;
@ -3279,12 +3293,12 @@ private:
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
* They may reside inside a string. * They may reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); } simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
/** /**
* The start of non-operator runs, like 123, true and "abc". * The start of non-operator runs, like 123, true and "abc".
* It main reside inside a string. * It main reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_scalar_start() { simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
// The term "scalar" refers to anything except structural characters and white space // The term "scalar" refers to anything except structural characters and white space
// (so letters, numbers, quotes). // (so letters, numbers, quotes).
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
@ -3295,7 +3309,7 @@ private:
* Whether the given character is immediately after a non-operator like 123, true. * Whether the given character is immediately after a non-operator like 123, true.
* The characters following a quote are not included. * The characters following a quote are not included.
*/ */
simdjson_really_inline uint64_t follows_potential_scalar() { simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
// white space. // white space.
@ -3361,11 +3375,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
// Performance: there are many ways to skin this cat. // Performance: there are many ways to skin this cat.
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
return { // We are returning a function-local object so either we get a move constructor
strings, // or we get copy elision.
return json_block(
strings,// strings is a function-local object so either it moves or the copy is elided.
characters, characters,
follows_nonquote_scalar follows_nonquote_scalar
}; );
} }
simdjson_really_inline error_code json_scanner::finish() { simdjson_really_inline error_code json_scanner::finish() {
@ -3399,13 +3415,13 @@ private:
{} {}
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
json_scanner scanner{}; json_scanner scanner{};
uint8_t *dst; uint8_t *dst;
}; };
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) { simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
uint64_t mask = block.whitespace(); uint64_t mask = block.whitespace();
in.compress(mask, dst); in.compress(mask, dst);
dst += 64 - count_ones(mask); dst += 64 - count_ones(mask);
@ -3620,7 +3636,7 @@ private:
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
json_scanner scanner{}; json_scanner scanner{};
@ -3710,7 +3726,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
reader.advance(); reader.advance();
} }
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) { simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
uint64_t unescaped = in.lteq(0x1F); uint64_t unescaped = in.lteq(0x1F);
checker.check_next_input(in); checker.check_next_input(in);
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
@ -4518,12 +4534,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
// practice unless you are in the strange scenario where you have many JSON // practice unless you are in the strange scenario where you have many JSON
// documents made of single atoms. // documents made of single atoms.
// //
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING)); std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
if (copy == nullptr) { return MEMALLOC; } if (copy.get() == nullptr) { return MEMALLOC; }
std::memcpy(copy, value, iter.remaining_len()); std::memcpy(copy.get(), value, iter.remaining_len());
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
error_code error = visit_number(iter, copy); error_code error = visit_number(iter, copy.get());
free(copy);
return error; return error;
} }
@ -4675,7 +4690,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/arm64/end.h */ /* begin file include/simdjson/arm64/end.h */
#undef arm64
/* end file include/simdjson/arm64/end.h */ /* end file include/simdjson/arm64/end.h */
/* end file src/arm64/dom_parser_implementation.cpp */ /* end file src/arm64/dom_parser_implementation.cpp */
#endif #endif
@ -4705,7 +4719,6 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/fallback/end.h */ /* begin file include/simdjson/fallback/end.h */
#undef fallback
/* end file include/simdjson/fallback/end.h */ /* end file include/simdjson/fallback/end.h */
/* end file src/fallback/implementation.cpp */ /* end file src/fallback/implementation.cpp */
/* begin file src/fallback/dom_parser_implementation.cpp */ /* begin file src/fallback/dom_parser_implementation.cpp */
@ -5795,12 +5808,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
// practice unless you are in the strange scenario where you have many JSON // practice unless you are in the strange scenario where you have many JSON
// documents made of single atoms. // documents made of single atoms.
// //
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING)); std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
if (copy == nullptr) { return MEMALLOC; } if (copy.get() == nullptr) { return MEMALLOC; }
std::memcpy(copy, value, iter.remaining_len()); std::memcpy(copy.get(), value, iter.remaining_len());
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
error_code error = visit_number(iter, copy); error_code error = visit_number(iter, copy.get());
free(copy);
return error; return error;
} }
@ -5923,7 +5935,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/fallback/end.h */ /* begin file include/simdjson/fallback/end.h */
#undef fallback
/* end file include/simdjson/fallback/end.h */ /* end file include/simdjson/fallback/end.h */
/* end file src/fallback/dom_parser_implementation.cpp */ /* end file src/fallback/dom_parser_implementation.cpp */
#endif #endif
@ -5954,8 +5965,7 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/haswell/end.h */ /* begin file include/simdjson/haswell/end.h */
SIMDJSON_UNTARGET_REGION SIMDJSON_UNTARGET_HASWELL
#undef haswell
/* end file include/simdjson/haswell/end.h */ /* end file include/simdjson/haswell/end.h */
/* end file src/haswell/implementation.cpp */ /* end file src/haswell/implementation.cpp */
@ -5979,19 +5989,19 @@ using namespace simd;
struct json_character_block { struct json_character_block {
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in); static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
// ASCII white-space ('\r','\n','\t',' ') // ASCII white-space ('\r','\n','\t',' ')
simdjson_really_inline uint64_t whitespace() const; simdjson_really_inline uint64_t whitespace() const noexcept;
// non-quote structural characters (comma, colon, braces, brackets) // non-quote structural characters (comma, colon, braces, brackets)
simdjson_really_inline uint64_t op() const; simdjson_really_inline uint64_t op() const noexcept;
// neither a structural character nor a white-space, so letters, numbers and quotes // neither a structural character nor a white-space, so letters, numbers and quotes
simdjson_really_inline uint64_t scalar() const; simdjson_really_inline uint64_t scalar() const noexcept;
uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
}; };
simdjson_really_inline uint64_t json_character_block::whitespace() const { return _whitespace; } simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; }
simdjson_really_inline uint64_t json_character_block::op() const { return _op; } simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; }
simdjson_really_inline uint64_t json_character_block::scalar() const { return ~(op() | whitespace()); } simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); }
// This identifies structural characters (comma, colon, braces, brackets), // This identifies structural characters (comma, colon, braces, brackets),
// and ASCII white-space ('\r','\n','\t',' '). // and ASCII white-space ('\r','\n','\t',' ').
@ -6297,7 +6307,7 @@ private:
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text_64(const uint8_t *text) { simdjson_unused static char * format_input_text_64(const uint8_t *text) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
} }
@ -6307,7 +6317,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) { simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
in.store(reinterpret_cast<uint8_t*>(buf)); in.store(reinterpret_cast<uint8_t*>(buf));
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
if (buf[i] < ' ') { buf[i] = '_'; } if (buf[i] < ' ') { buf[i] = '_'; }
@ -6317,7 +6327,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
} }
simdjson_unused static char * format_mask(uint64_t mask) { simdjson_unused static char * format_mask(uint64_t mask) {
static char *buf = reinterpret_cast<char*>(malloc(64 + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<64; i++) { for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
} }
@ -6365,6 +6375,10 @@ namespace {
namespace stage1 { namespace stage1 {
struct json_string_block { struct json_string_block {
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
// Escaped characters (characters following an escape() character) // Escaped characters (characters following an escape() character)
simdjson_really_inline uint64_t escaped() const { return _escaped; } simdjson_really_inline uint64_t escaped() const { return _escaped; }
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
@ -6486,12 +6500,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63); prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
// Use ^ to turn the beginning quote off, and the end quote on. // Use ^ to turn the beginning quote off, and the end quote on.
return {
// We are returning a function-local object so either we get a move constructor
// or we get copy elision.
return json_string_block(
backslash, backslash,
escaped, escaped,
quote, quote,
in_string in_string
}; );
} }
simdjson_really_inline error_code json_string_scanner::finish() { simdjson_really_inline error_code json_string_scanner::finish() {
@ -6531,20 +6548,26 @@ namespace stage1 {
*/ */
struct json_block { struct json_block {
public: public:
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
/** /**
* The start of structurals. * The start of structurals.
* In simdjson prior to v0.3, these were called the pseudo-structural characters. * In simdjson prior to v0.3, these were called the pseudo-structural characters.
**/ **/
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); } simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
/** All JSON whitespace (i.e. not in a string) */ /** All JSON whitespace (i.e. not in a string) */
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); } simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
// Helpers // Helpers
/** Whether the given characters are inside a string (only works on non-quotes) */ /** Whether the given characters are inside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); } simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
/** Whether the given characters are outside a string (only works on non-quotes) */ /** Whether the given characters are outside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); } simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
// string and escape characters // string and escape characters
json_string_block _string; json_string_block _string;
@ -6559,12 +6582,12 @@ private:
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
* They may reside inside a string. * They may reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); } simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
/** /**
* The start of non-operator runs, like 123, true and "abc". * The start of non-operator runs, like 123, true and "abc".
* It main reside inside a string. * It main reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_scalar_start() { simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
// The term "scalar" refers to anything except structural characters and white space // The term "scalar" refers to anything except structural characters and white space
// (so letters, numbers, quotes). // (so letters, numbers, quotes).
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
@ -6575,7 +6598,7 @@ private:
* Whether the given character is immediately after a non-operator like 123, true. * Whether the given character is immediately after a non-operator like 123, true.
* The characters following a quote are not included. * The characters following a quote are not included.
*/ */
simdjson_really_inline uint64_t follows_potential_scalar() { simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
// white space. // white space.
@ -6641,11 +6664,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
// Performance: there are many ways to skin this cat. // Performance: there are many ways to skin this cat.
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
return { // We are returning a function-local object so either we get a move constructor
strings, // or we get copy elision.
return json_block(
strings,// strings is a function-local object so either it moves or the copy is elided.
characters, characters,
follows_nonquote_scalar follows_nonquote_scalar
}; );
} }
simdjson_really_inline error_code json_scanner::finish() { simdjson_really_inline error_code json_scanner::finish() {
@ -6679,13 +6704,13 @@ private:
{} {}
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
json_scanner scanner{}; json_scanner scanner{};
uint8_t *dst; uint8_t *dst;
}; };
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) { simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
uint64_t mask = block.whitespace(); uint64_t mask = block.whitespace();
in.compress(mask, dst); in.compress(mask, dst);
dst += 64 - count_ones(mask); dst += 64 - count_ones(mask);
@ -6900,7 +6925,7 @@ private:
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
json_scanner scanner{}; json_scanner scanner{};
@ -6990,7 +7015,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
reader.advance(); reader.advance();
} }
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) { simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
uint64_t unescaped = in.lteq(0x1F); uint64_t unescaped = in.lteq(0x1F);
checker.check_next_input(in); checker.check_next_input(in);
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
@ -7797,12 +7822,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
// practice unless you are in the strange scenario where you have many JSON // practice unless you are in the strange scenario where you have many JSON
// documents made of single atoms. // documents made of single atoms.
// //
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING)); std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
if (copy == nullptr) { return MEMALLOC; } if (copy.get() == nullptr) { return MEMALLOC; }
std::memcpy(copy, value, iter.remaining_len()); std::memcpy(copy.get(), value, iter.remaining_len());
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
error_code error = visit_number(iter, copy); error_code error = visit_number(iter, copy.get());
free(copy);
return error; return error;
} }
@ -7952,8 +7976,7 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/haswell/end.h */ /* begin file include/simdjson/haswell/end.h */
SIMDJSON_UNTARGET_REGION SIMDJSON_UNTARGET_HASWELL
#undef haswell
/* end file include/simdjson/haswell/end.h */ /* end file include/simdjson/haswell/end.h */
/* end file src/haswell/dom_parser_implementation.cpp */ /* end file src/haswell/dom_parser_implementation.cpp */
#endif #endif
@ -7983,7 +8006,6 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/ppc64/end.h */ /* begin file include/simdjson/ppc64/end.h */
#undef ppc64
/* end file include/simdjson/ppc64/end.h */ /* end file include/simdjson/ppc64/end.h */
/* end file src/ppc64/implementation.cpp */ /* end file src/ppc64/implementation.cpp */
/* begin file src/ppc64/dom_parser_implementation.cpp */ /* begin file src/ppc64/dom_parser_implementation.cpp */
@ -8004,9 +8026,9 @@ using namespace simd;
struct json_character_block { struct json_character_block {
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in); static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
simdjson_really_inline uint64_t whitespace() const { return _whitespace; } simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; }
simdjson_really_inline uint64_t op() const { return _op; } simdjson_really_inline uint64_t op() const noexcept { return _op; }
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); } simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
uint64_t _whitespace; uint64_t _whitespace;
uint64_t _op; uint64_t _op;
@ -8290,7 +8312,7 @@ private:
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text_64(const uint8_t *text) { simdjson_unused static char * format_input_text_64(const uint8_t *text) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
} }
@ -8300,7 +8322,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) { simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
in.store(reinterpret_cast<uint8_t*>(buf)); in.store(reinterpret_cast<uint8_t*>(buf));
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
if (buf[i] < ' ') { buf[i] = '_'; } if (buf[i] < ' ') { buf[i] = '_'; }
@ -8310,7 +8332,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
} }
simdjson_unused static char * format_mask(uint64_t mask) { simdjson_unused static char * format_mask(uint64_t mask) {
static char *buf = reinterpret_cast<char*>(malloc(64 + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<64; i++) { for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
} }
@ -8358,6 +8380,10 @@ namespace {
namespace stage1 { namespace stage1 {
struct json_string_block { struct json_string_block {
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
// Escaped characters (characters following an escape() character) // Escaped characters (characters following an escape() character)
simdjson_really_inline uint64_t escaped() const { return _escaped; } simdjson_really_inline uint64_t escaped() const { return _escaped; }
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
@ -8479,12 +8505,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63); prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
// Use ^ to turn the beginning quote off, and the end quote on. // Use ^ to turn the beginning quote off, and the end quote on.
return {
// We are returning a function-local object so either we get a move constructor
// or we get copy elision.
return json_string_block(
backslash, backslash,
escaped, escaped,
quote, quote,
in_string in_string
}; );
} }
simdjson_really_inline error_code json_string_scanner::finish() { simdjson_really_inline error_code json_string_scanner::finish() {
@ -8524,20 +8553,26 @@ namespace stage1 {
*/ */
struct json_block { struct json_block {
public: public:
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
/** /**
* The start of structurals. * The start of structurals.
* In simdjson prior to v0.3, these were called the pseudo-structural characters. * In simdjson prior to v0.3, these were called the pseudo-structural characters.
**/ **/
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); } simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
/** All JSON whitespace (i.e. not in a string) */ /** All JSON whitespace (i.e. not in a string) */
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); } simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
// Helpers // Helpers
/** Whether the given characters are inside a string (only works on non-quotes) */ /** Whether the given characters are inside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); } simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
/** Whether the given characters are outside a string (only works on non-quotes) */ /** Whether the given characters are outside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); } simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
// string and escape characters // string and escape characters
json_string_block _string; json_string_block _string;
@ -8552,12 +8587,12 @@ private:
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
* They may reside inside a string. * They may reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); } simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
/** /**
* The start of non-operator runs, like 123, true and "abc". * The start of non-operator runs, like 123, true and "abc".
* It main reside inside a string. * It main reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_scalar_start() { simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
// The term "scalar" refers to anything except structural characters and white space // The term "scalar" refers to anything except structural characters and white space
// (so letters, numbers, quotes). // (so letters, numbers, quotes).
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
@ -8568,7 +8603,7 @@ private:
* Whether the given character is immediately after a non-operator like 123, true. * Whether the given character is immediately after a non-operator like 123, true.
* The characters following a quote are not included. * The characters following a quote are not included.
*/ */
simdjson_really_inline uint64_t follows_potential_scalar() { simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
// white space. // white space.
@ -8634,11 +8669,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
// Performance: there are many ways to skin this cat. // Performance: there are many ways to skin this cat.
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
return { // We are returning a function-local object so either we get a move constructor
strings, // or we get copy elision.
return json_block(
strings,// strings is a function-local object so either it moves or the copy is elided.
characters, characters,
follows_nonquote_scalar follows_nonquote_scalar
}; );
} }
simdjson_really_inline error_code json_scanner::finish() { simdjson_really_inline error_code json_scanner::finish() {
@ -8672,13 +8709,13 @@ private:
{} {}
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
json_scanner scanner{}; json_scanner scanner{};
uint8_t *dst; uint8_t *dst;
}; };
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) { simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
uint64_t mask = block.whitespace(); uint64_t mask = block.whitespace();
in.compress(mask, dst); in.compress(mask, dst);
dst += 64 - count_ones(mask); dst += 64 - count_ones(mask);
@ -8893,7 +8930,7 @@ private:
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
json_scanner scanner{}; json_scanner scanner{};
@ -8983,7 +9020,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
reader.advance(); reader.advance();
} }
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) { simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
uint64_t unescaped = in.lteq(0x1F); uint64_t unescaped = in.lteq(0x1F);
checker.check_next_input(in); checker.check_next_input(in);
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
@ -9791,12 +9828,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
// practice unless you are in the strange scenario where you have many JSON // practice unless you are in the strange scenario where you have many JSON
// documents made of single atoms. // documents made of single atoms.
// //
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING)); std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
if (copy == nullptr) { return MEMALLOC; } if (copy.get() == nullptr) { return MEMALLOC; }
std::memcpy(copy, value, iter.remaining_len()); std::memcpy(copy.get(), value, iter.remaining_len());
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
error_code error = visit_number(iter, copy); error_code error = visit_number(iter, copy.get());
free(copy);
return error; return error;
} }
@ -9948,7 +9984,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/ppc64/end.h */ /* begin file include/simdjson/ppc64/end.h */
#undef ppc64
/* end file include/simdjson/ppc64/end.h */ /* end file include/simdjson/ppc64/end.h */
/* end file src/ppc64/dom_parser_implementation.cpp */ /* end file src/ppc64/dom_parser_implementation.cpp */
#endif #endif
@ -9979,8 +10014,7 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/westmere/end.h */ /* begin file include/simdjson/westmere/end.h */
SIMDJSON_UNTARGET_REGION SIMDJSON_UNTARGET_WESTMERE
#undef westmere
/* end file include/simdjson/westmere/end.h */ /* end file include/simdjson/westmere/end.h */
/* end file src/westmere/implementation.cpp */ /* end file src/westmere/implementation.cpp */
/* begin file src/westmere/dom_parser_implementation.cpp */ /* begin file src/westmere/dom_parser_implementation.cpp */
@ -10003,9 +10037,9 @@ using namespace simd;
struct json_character_block { struct json_character_block {
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in); static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
simdjson_really_inline uint64_t whitespace() const { return _whitespace; } simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; }
simdjson_really_inline uint64_t op() const { return _op; } simdjson_really_inline uint64_t op() const noexcept { return _op; }
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); } simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
uint64_t _whitespace; uint64_t _whitespace;
uint64_t _op; uint64_t _op;
@ -10319,7 +10353,7 @@ private:
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text_64(const uint8_t *text) { simdjson_unused static char * format_input_text_64(const uint8_t *text) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
} }
@ -10329,7 +10363,7 @@ simdjson_unused static char * format_input_text_64(const uint8_t *text) {
// Routines to print masks and text for debugging bitmask operations // Routines to print masks and text for debugging bitmask operations
simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) { simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
in.store(reinterpret_cast<uint8_t*>(buf)); in.store(reinterpret_cast<uint8_t*>(buf));
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) { for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
if (buf[i] < ' ') { buf[i] = '_'; } if (buf[i] < ' ') { buf[i] = '_'; }
@ -10339,7 +10373,7 @@ simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
} }
simdjson_unused static char * format_mask(uint64_t mask) { simdjson_unused static char * format_mask(uint64_t mask) {
static char *buf = reinterpret_cast<char*>(malloc(64 + 1)); static char buf[sizeof(simd8x64<uint8_t>) + 1];
for (size_t i=0; i<64; i++) { for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
} }
@ -10387,6 +10421,10 @@ namespace {
namespace stage1 { namespace stage1 {
struct json_string_block { struct json_string_block {
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
_backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
// Escaped characters (characters following an escape() character) // Escaped characters (characters following an escape() character)
simdjson_really_inline uint64_t escaped() const { return _escaped; } simdjson_really_inline uint64_t escaped() const { return _escaped; }
// Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
@ -10508,12 +10546,15 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s
prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63); prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
// Use ^ to turn the beginning quote off, and the end quote on. // Use ^ to turn the beginning quote off, and the end quote on.
return {
// We are returning a function-local object so either we get a move constructor
// or we get copy elision.
return json_string_block(
backslash, backslash,
escaped, escaped,
quote, quote,
in_string in_string
}; );
} }
simdjson_really_inline error_code json_string_scanner::finish() { simdjson_really_inline error_code json_string_scanner::finish() {
@ -10553,20 +10594,26 @@ namespace stage1 {
*/ */
struct json_block { struct json_block {
public: public:
// We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
_string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
/** /**
* The start of structurals. * The start of structurals.
* In simdjson prior to v0.3, these were called the pseudo-structural characters. * In simdjson prior to v0.3, these were called the pseudo-structural characters.
**/ **/
simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); } simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
/** All JSON whitespace (i.e. not in a string) */ /** All JSON whitespace (i.e. not in a string) */
simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); } simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
// Helpers // Helpers
/** Whether the given characters are inside a string (only works on non-quotes) */ /** Whether the given characters are inside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); } simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
/** Whether the given characters are outside a string (only works on non-quotes) */ /** Whether the given characters are outside a string (only works on non-quotes) */
simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); } simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
// string and escape characters // string and escape characters
json_string_block _string; json_string_block _string;
@ -10581,12 +10628,12 @@ private:
* structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
* They may reside inside a string. * They may reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); } simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
/** /**
* The start of non-operator runs, like 123, true and "abc". * The start of non-operator runs, like 123, true and "abc".
* It main reside inside a string. * It main reside inside a string.
**/ **/
simdjson_really_inline uint64_t potential_scalar_start() { simdjson_really_inline uint64_t potential_scalar_start() const noexcept {
// The term "scalar" refers to anything except structural characters and white space // The term "scalar" refers to anything except structural characters and white space
// (so letters, numbers, quotes). // (so letters, numbers, quotes).
// Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
@ -10597,7 +10644,7 @@ private:
* Whether the given character is immediately after a non-operator like 123, true. * Whether the given character is immediately after a non-operator like 123, true.
* The characters following a quote are not included. * The characters following a quote are not included.
*/ */
simdjson_really_inline uint64_t follows_potential_scalar() { simdjson_really_inline uint64_t follows_potential_scalar() const noexcept {
// _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
// that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
// white space. // white space.
@ -10663,11 +10710,13 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64<uint8_
// Performance: there are many ways to skin this cat. // Performance: there are many ways to skin this cat.
const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
return { // We are returning a function-local object so either we get a move constructor
strings, // or we get copy elision.
return json_block(
strings,// strings is a function-local object so either it moves or the copy is elided.
characters, characters,
follows_nonquote_scalar follows_nonquote_scalar
}; );
} }
simdjson_really_inline error_code json_scanner::finish() { simdjson_really_inline error_code json_scanner::finish() {
@ -10701,13 +10750,13 @@ private:
{} {}
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
json_scanner scanner{}; json_scanner scanner{};
uint8_t *dst; uint8_t *dst;
}; };
simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) { simdjson_really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
uint64_t mask = block.whitespace(); uint64_t mask = block.whitespace();
in.compress(mask, dst); in.compress(mask, dst);
dst += 64 - count_ones(mask); dst += 64 - count_ones(mask);
@ -10922,7 +10971,7 @@ private:
simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes);
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept; simdjson_really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx); simdjson_really_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial);
json_scanner scanner{}; json_scanner scanner{};
@ -11012,7 +11061,7 @@ simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *blo
reader.advance(); reader.advance();
} }
simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, json_block block, size_t idx) { simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
uint64_t unescaped = in.lteq(0x1F); uint64_t unescaped = in.lteq(0x1F);
checker.check_next_input(in); checker.check_next_input(in);
indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
@ -11819,12 +11868,11 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_
// practice unless you are in the strange scenario where you have many JSON // practice unless you are in the strange scenario where you have many JSON
// documents made of single atoms. // documents made of single atoms.
// //
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING)); std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
if (copy == nullptr) { return MEMALLOC; } if (copy.get() == nullptr) { return MEMALLOC; }
std::memcpy(copy, value, iter.remaining_len()); std::memcpy(copy.get(), value, iter.remaining_len());
std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
error_code error = visit_number(iter, copy); error_code error = visit_number(iter, copy.get());
free(copy);
return error; return error;
} }
@ -11975,8 +12023,7 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *
} // namespace simdjson } // namespace simdjson
/* begin file include/simdjson/westmere/end.h */ /* begin file include/simdjson/westmere/end.h */
SIMDJSON_UNTARGET_REGION SIMDJSON_UNTARGET_WESTMERE
#undef westmere
/* end file include/simdjson/westmere/end.h */ /* end file include/simdjson/westmere/end.h */
/* end file src/westmere/dom_parser_implementation.cpp */ /* end file src/westmere/dom_parser_implementation.cpp */
#endif #endif

File diff suppressed because it is too large Load Diff