This updates the minifier. (#446)

This commit is contained in:
Daniel Lemire 2020-01-15 13:45:32 -05:00 committed by GitHub
parent 2dc61fbdc4
commit f611b65bc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 3522 additions and 4294 deletions

View File

@ -1,4 +1,4 @@
/* auto-generated on Wed Dec 18 14:39:04 UTC 2019. Do not edit! */ /* auto-generated on Wed Jan 15 13:09:01 EST 2020. Do not edit! */
#include <iostream> #include <iostream>
#include "simdjson.h" #include "simdjson.h"

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* auto-generated on Wed Dec 18 14:39:04 UTC 2019. Do not edit! */ /* auto-generated on Wed Jan 15 13:09:01 EST 2020. Do not edit! */
/* begin file include/simdjson/simdjson_version.h */ /* begin file include/simdjson/simdjson_version.h */
// /include/simdjson/simdjson_version.h automatically generated by release.py, // /include/simdjson/simdjson_version.h automatically generated by release.py,
// do not change by hand // do not change by hand
@ -18,10 +18,10 @@ enum {
#ifndef SIMDJSON_PORTABILITY_H #ifndef SIMDJSON_PORTABILITY_H
#define SIMDJSON_PORTABILITY_H #define SIMDJSON_PORTABILITY_H
#include <cstdint>
#include <cstddef> #include <cstddef>
#include <cstdlib> #include <cstdint>
#include <cstdio> #include <cstdio>
#include <cstdlib>
#ifdef _MSC_VER #ifdef _MSC_VER
#include <iso646.h> #include <iso646.h>
#endif #endif
@ -34,7 +34,10 @@ enum {
#endif #endif
// this is almost standard? // this is almost standard?
#define STRINGIFY(a) #a #undef STRINGIFY_IMPLEMENTATION_
#undef STRINGIFY
#define STRINGIFY_IMPLEMENTATION_(a) #a
#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a)
// we are going to use runtime dispatch // we are going to use runtime dispatch
#ifdef IS_X86_64 #ifdef IS_X86_64
@ -54,7 +57,7 @@ enum {
#define UNTARGET_REGION _Pragma("GCC pop_options") #define UNTARGET_REGION _Pragma("GCC pop_options")
#endif // clang then gcc #endif // clang then gcc
#endif // x86 #endif // x86
// Default target region macros don't do anything. // Default target region macros don't do anything.
#ifndef TARGET_REGION #ifndef TARGET_REGION
@ -67,9 +70,11 @@ enum {
#define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul") #define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul")
#define TARGET_ARM64 #define TARGET_ARM64
// Threading is disabled
#undef SIMDJSON_THREADS_ENABLED
// Is threading enabled? // Is threading enabled?
#if defined(BOOST_HAS_THREADS) || defined(_REENTRANT) || defined(_MT) #if defined(BOOST_HAS_THREADS) || defined(_REENTRANT) || defined(_MT)
#define SIMDJSON_THREADS_ENABLED 1 #define SIMDJSON_THREADS_ENABLED
#endif #endif
#if defined(__clang__) #if defined(__clang__)
@ -84,7 +89,6 @@ enum {
#include <intrin.h> // visual studio #include <intrin.h> // visual studio
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
#define simdjson_strcasecmp _stricmp #define simdjson_strcasecmp _stricmp
#else #else
@ -493,6 +497,22 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
#ifndef SIMDJSON_SIMDJSON_H #ifndef SIMDJSON_SIMDJSON_H
#define SIMDJSON_SIMDJSON_H #define SIMDJSON_SIMDJSON_H
#ifndef __cplusplus
#error simdjson requires a C++ compiler
#endif
#ifndef SIMDJSON_CPLUSPLUS
#if defined(_MSVC_LANG) && !defined(__clang__)
#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
#else
#define SIMDJSON_CPLUSPLUS __cplusplus
#endif
#endif
#if (SIMDJSON_CPLUSPLUS < 201703L)
#error simdjson requires a compiler compliant with the C++17 standard
#endif
#include <string> #include <string>
namespace simdjson { namespace simdjson {
@ -529,8 +549,8 @@ enum ErrorValues {
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n' N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR, // the input is not valid UTF-8 UTF8_ERROR, // the input is not valid UTF-8
UNITIALIZED, // unknown error, or uninitialized document UNINITIALIZED, // unknown error, or uninitialized document
EMPTY, // no structural document found EMPTY, // no structural element found
UNESCAPED_CHARS, // found unescaped characters in a string. UNESCAPED_CHARS, // found unescaped characters in a string.
UNCLOSED_STRING, // missing quote at the end UNCLOSED_STRING, // missing quote at the end
UNEXPECTED_ERROR // indicative of a bug in simdjson UNEXPECTED_ERROR // indicative of a bug in simdjson
@ -623,39 +643,68 @@ const std::string &error_message(const int);
#include <memory> #include <memory>
#include <string> #include <string>
namespace simdjson { namespace simdjson {
// low-level function to allocate memory with padding so we can read passed the // low-level function to allocate memory with padding so we can read passed the
// "length" bytes safely. if you must provide a pointer to some data, create it // "length" bytes safely. if you must provide a pointer to some data, create it
// with this function: length is the max. size in bytes of the string caller is // with this function: length is the max. size in bytes of the string caller is
// responsible to free the memory (free(...)) // responsible to free the memory (free(...))
char *allocate_padded_buffer(size_t length); inline char *allocate_padded_buffer(size_t length) noexcept {
// we could do a simple malloc
// return (char *) malloc(length + SIMDJSON_PADDING);
// However, we might as well align to cache lines...
size_t totalpaddedlength = length + SIMDJSON_PADDING;
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
#ifndef NDEBUG
if (padded_buffer == nullptr) {
errno = EINVAL;
perror("simdjson::allocate_padded_buffer() aligned_malloc_char() failed");
return nullptr;
}
#endif // NDEBUG
memset(padded_buffer + length, 0, totalpaddedlength - length);
return padded_buffer;
} // allocate_padded_buffer
// Simple string with padded allocation. // Simple string with padded allocation.
// We deliberately forbid copies, users should rely on swap or move // We deliberately forbid copies, users should rely on swap or move
// constructors. // constructors.
class padded_string { struct padded_string final {
public:
explicit padded_string() noexcept : viable_size(0), data_ptr(nullptr) {} explicit padded_string() noexcept : viable_size(0), data_ptr(nullptr) {}
explicit padded_string(size_t length) noexcept explicit padded_string(size_t length) noexcept
: viable_size(length), data_ptr(allocate_padded_buffer(length)) { : viable_size(length), data_ptr(allocate_padded_buffer(length)) {
if (data_ptr != nullptr) if (data_ptr != nullptr)
data_ptr[length] = '\0'; // easier when you need a c_str data_ptr[length] = '\0'; // easier when you need a c_str
} }
explicit padded_string(char *data, size_t length) noexcept explicit padded_string(char *data, size_t length) noexcept
: viable_size(length), data_ptr(allocate_padded_buffer(length)) { : viable_size(length), data_ptr(allocate_padded_buffer(length)) {
if (data_ptr != nullptr) { if ((data != nullptr) and (data_ptr != nullptr)) {
memcpy(data_ptr, data, length); memcpy(data_ptr, data, length);
data_ptr[length] = '\0'; // easier when you need a c_str data_ptr[length] = '\0'; // easier when you need a c_str
} }
} }
padded_string(std::string s) noexcept
: viable_size(s.size()), data_ptr(allocate_padded_buffer(s.size())) { // note: do not pass std::string arguments by value
padded_string(const std::string & str_ ) noexcept
: viable_size(str_.size()), data_ptr(allocate_padded_buffer(str_.size())) {
if (data_ptr != nullptr) { if (data_ptr != nullptr) {
memcpy(data_ptr, s.data(), s.size()); memcpy(data_ptr, str_.data(), str_.size());
data_ptr[s.size()] = '\0'; // easier when you need a c_str data_ptr[str_.size()] = '\0'; // easier when you need a c_str
} }
} }
// note: do pass std::string_view arguments by value
padded_string(std::string_view sv_) noexcept
: viable_size(sv_.size()), data_ptr(allocate_padded_buffer(sv_.size())) {
if (data_ptr != nullptr) {
memcpy(data_ptr, sv_.data(), sv_.size());
data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
}
}
padded_string(padded_string &&o) noexcept padded_string(padded_string &&o) noexcept
: viable_size(o.viable_size), data_ptr(o.data_ptr) { : viable_size(o.viable_size), data_ptr(o.data_ptr) {
o.data_ptr = nullptr; // we take ownership o.data_ptr = nullptr; // we take ownership
@ -678,21 +727,25 @@ public:
o.viable_size = tmp_viable_size; o.viable_size = tmp_viable_size;
} }
~padded_string() { aligned_free_char(data_ptr); } ~padded_string() {
aligned_free_char(data_ptr);
}
size_t size() const { return viable_size; } size_t size() const { return viable_size; }
size_t length() const { return viable_size; } size_t length() const { return viable_size; }
char *data() const { return data_ptr; } char *data() const { return data_ptr; }
private: private:
padded_string &operator=(const padded_string &o) = delete; padded_string &operator=(const padded_string &o) = delete;
padded_string(const padded_string &o) = delete; padded_string(const padded_string &o) = delete;
size_t viable_size; size_t viable_size;
char *data_ptr; char *data_ptr{nullptr};
};
}; // padded_string
} // namespace simdjson } // namespace simdjson
#endif #endif
@ -766,6 +819,7 @@ static inline size_t json_minify(const padded_string &p, char *out) {
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <memory>
#define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF #define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF
@ -782,10 +836,14 @@ class ParsedJson {
public: public:
// create a ParsedJson container with zero capacity, call allocate_capacity to // create a ParsedJson container with zero capacity, call allocate_capacity to
// allocate memory // allocate memory
ParsedJson(); ParsedJson()=default;
~ParsedJson(); ~ParsedJson()=default;
ParsedJson(ParsedJson &&p);
ParsedJson &operator=(ParsedJson &&o); // this is a move only class
ParsedJson(ParsedJson &&p) = default;
ParsedJson(const ParsedJson &p) = delete;
ParsedJson &operator=(ParsedJson &&o) = default;
ParsedJson &operator=(const ParsedJson &o) = delete;
// if needed, allocate memory so that the object is able to process JSON // if needed, allocate memory so that the object is able to process JSON
// documents having up to len bytes and max_depth "depth" // documents having up to len bytes and max_depth "depth"
@ -838,7 +896,8 @@ public:
really_inline void write_tape_s64(int64_t i) { really_inline void write_tape_s64(int64_t i) {
write_tape(0, 'l'); write_tape(0, 'l');
tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i)); std::memcpy(&tape[current_loc], &i, sizeof(i));
++current_loc;
} }
really_inline void write_tape_u64(uint64_t i) { really_inline void write_tape_u64(uint64_t i) {
@ -874,27 +933,22 @@ public:
uint32_t current_loc{0}; uint32_t current_loc{0};
uint32_t n_structural_indexes{0}; uint32_t n_structural_indexes{0};
uint32_t *structural_indexes; std::unique_ptr<uint32_t[]> structural_indexes;
std::unique_ptr<uint64_t[]> tape;
std::unique_ptr<uint32_t[]> containing_scope_offset;
uint64_t *tape;
uint32_t *containing_scope_offset;
#ifdef SIMDJSON_USE_COMPUTED_GOTO #ifdef SIMDJSON_USE_COMPUTED_GOTO
void **ret_address; std::unique_ptr<void*[]> ret_address;
#else #else
char *ret_address; std::unique_ptr<char[]> ret_address;
#endif #endif
uint8_t *string_buf; // should be at least byte_capacity std::unique_ptr<uint8_t[]> string_buf;// should be at least byte_capacity
uint8_t *current_string_buf_loc; uint8_t *current_string_buf_loc;
bool valid{false}; bool valid{false};
int error_code{simdjson::UNITIALIZED}; int error_code{simdjson::UNINITIALIZED};
private:
// we don't want the default constructor to be called
ParsedJson(const ParsedJson &p) =
delete; // we don't want the default constructor to be called
// we don't want the assignment to be called
ParsedJson &operator=(const ParsedJson &o) = delete;
}; };
// dump bits low to high // dump bits low to high
@ -979,14 +1033,14 @@ public:
// within the string: get_string_length determines the true string length. // within the string: get_string_length determines the true string length.
inline const char *get_string() const { inline const char *get_string() const {
return reinterpret_cast<const char *>( return reinterpret_cast<const char *>(
pj->string_buf + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t)); pj->string_buf.get() + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t));
} }
// return the length of the string in bytes // return the length of the string in bytes
inline uint32_t get_string_length() const { inline uint32_t get_string_length() const {
uint32_t answer; uint32_t answer;
memcpy(&answer, memcpy(&answer,
reinterpret_cast<const char *>(pj->string_buf + reinterpret_cast<const char *>(pj->string_buf.get() +
(current_val & JSON_VALUE_MASK)), (current_val & JSON_VALUE_MASK)),
sizeof(uint32_t)); sizeof(uint32_t));
return answer; return answer;
@ -1665,22 +1719,32 @@ bool ParsedJson::BasicIterator<max_depth>::relative_move_to(const char *pointer,
namespace simdjson { namespace simdjson {
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8.
// A function like find_last_json_buf_idx may also prove useful.
template <Architecture T = Architecture::NATIVE> template <Architecture T = Architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming); int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming);
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
// you may want to call on a function like trimmed_length_safe_utf8.
// A function like find_last_json_buf_idx may also prove useful.
template <Architecture T = Architecture::NATIVE> template <Architecture T = Architecture::NATIVE>
int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) {
return find_structural_bits<T>((const uint8_t *)buf, len, pj, streaming); return find_structural_bits<T>((const uint8_t *)buf, len, pj, streaming);
} }
template <Architecture T = Architecture::NATIVE> template <Architecture T = Architecture::NATIVE>
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj){ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
return find_structural_bits<T>((const uint8_t *)buf, len, pj, false); return find_structural_bits<T>(buf, len, pj, false);
} }
template <Architecture T = Architecture::NATIVE> template <Architecture T = Architecture::NATIVE>
int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj) { int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj) {
return find_structural_bits<T>((const uint8_t *)buf, len, pj, false); return find_structural_bits<T>((const uint8_t *)buf, len, pj);
} }
}; // namespace simdjson }; // namespace simdjson
@ -1701,7 +1765,8 @@ WARN_UNUSED int
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj); unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
template <Architecture T = Architecture::NATIVE> template <Architecture T = Architecture::NATIVE>
int unified_machine(const char *buf, size_t len, ParsedJson &pj) { WARN_UNUSED int
unified_machine(const char *buf, size_t len, ParsedJson &pj) {
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, pj); return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, pj);
} }
@ -2003,6 +2068,8 @@ namespace simdjson {
* */ * */
JsonStream(const std::string &s, size_t batch_size = 1000000) : JsonStream(s.data(), s.size(), batch_size) {}; JsonStream(const std::string &s, size_t batch_size = 1000000) : JsonStream(s.data(), s.size(), batch_size) {};
~JsonStream();
/* Parse the next document found in the buffer previously given to JsonStream. /* Parse the next document found in the buffer previously given to JsonStream.
* The content should be a valid JSON document encoded as UTF-8. If there is a * The content should be a valid JSON document encoded as UTF-8. If there is a
@ -2034,12 +2101,14 @@ namespace simdjson {
/* Sets a new buffer for this JsonStream. Will also reinitialize all the variables, /* Sets a new buffer for this JsonStream. Will also reinitialize all the variables,
* which acts as a reset. A new JsonStream without initializing again. * which acts as a reset. A new JsonStream without initializing again.
* */ * */
void set_new_buffer(const char *buf, size_t len); // todo: implement and test this function, note that _batch_size is mutable
// void set_new_buffer(const char *buf, size_t len);
/* Sets a new buffer for this JsonStream. Will also reinitialize all the variables, /* Sets a new buffer for this JsonStream. Will also reinitialize all the variables,
* which is basically a reset. A new JsonStream without initializing again. * which is basically a reset. A new JsonStream without initializing again.
* */ * */
void set_new_buffer(const std::string &s) { set_new_buffer(s.data(), s.size()); } // todo: implement and test this function, note that _batch_size is mutable
// void set_new_buffer(const std::string &s) { set_new_buffer(s.data(), s.size()); }
/* Returns the location (index) of where the next document should be in the buffer. /* Returns the location (index) of where the next document should be in the buffer.
* Can be used for debugging, it tells the user the position of the end of the last * Can be used for debugging, it tells the user the position of the end of the last
@ -2059,43 +2128,89 @@ namespace simdjson {
size_t _len; size_t _len;
size_t _batch_size; size_t _batch_size;
size_t next_json{0}; size_t next_json{0};
bool error_on_last_attempt{false};
bool load_next_batch{true}; bool load_next_batch{true};
size_t current_buffer_loc{0}; size_t current_buffer_loc{0};
size_t last_json_buffer_loc{0}; size_t last_json_buffer_loc{0};
size_t n_parsed_docs{0}; size_t n_parsed_docs{0};
size_t n_bytes_parsed{0}; size_t n_bytes_parsed{0};
#ifdef SIMDJSON_THREADS_ENABLED
int stage1_is_ok_thread{0};
std::thread stage_1_thread; std::thread stage_1_thread;
simdjson::ParsedJson pj_thread; simdjson::ParsedJson pj_thread;
#ifdef SIMDJSON_THREADS_ENABLED
/* This algorithm is used to quickly identify the buffer position of
* the last JSON document inside the current batch.
*
* It does it's work by finding the last pair of structural characters
* that represent the end followed by the start of a document.
*
* Simply put, we iterate over the structural characters, starting from
* the end. We consider that we found the end of a JSON document when the
* first element of the pair is NOT one of these characters: '{' '[' ';' ','
* and when the second element is NOT one of these characters: '}' '}' ';' ','.
*
* This simple comparison works most of the time, but it does not cover cases
* where the batch's structural indexes contain a perfect amount of documents.
* In such a case, we do not have access to the structural index which follows
* the last document, therefore, we do not have access to the second element in
* the pair, and means that we cannot identify the last document. To fix this
* issue, we keep a count of the open and closed curly/square braces we found
* while searching for the pair. When we find a pair AND the count of open and
* closed curly/square braces is the same, we know that we just passed a complete
* document, therefore the last json buffer location is the end of the batch
* */
size_t find_last_json_buf_loc(const ParsedJson &pj);
#endif #endif
}; };
/* This algorithm is used to quickly identify the buffer position of
* the last JSON document inside the current batch.
*
* It does its work by finding the last pair of structural characters
* that represent the end followed by the start of a document.
*
* Simply put, we iterate over the structural characters, starting from
* the end. We consider that we found the end of a JSON document when the
* first element of the pair is NOT one of these characters: '{' '[' ';' ','
* and when the second element is NOT one of these characters: '}' '}' ';' ','.
*
* This simple comparison works most of the time, but it does not cover cases
* where the batch's structural indexes contain a perfect amount of documents.
* In such a case, we do not have access to the structural index which follows
* the last document, therefore, we do not have access to the second element in
* the pair, and means that we cannot identify the last document. To fix this
* issue, we keep a count of the open and closed curly/square braces we found
* while searching for the pair. When we find a pair AND the count of open and
* closed curly/square braces is the same, we know that we just passed a complete
* document, therefore the last json buffer location is the end of the batch
* */
inline size_t find_last_json_buf_idx(const char * buf, size_t size, const ParsedJson &pj) {
// this function can be generally useful
if(pj.n_structural_indexes == 0) return 0;
auto last_i = pj.n_structural_indexes - 1;
if (pj.structural_indexes[last_i] == size) {
if(last_i == 0) return 0;
last_i = pj.n_structural_indexes - 2;
}
auto arr_cnt = 0;
auto obj_cnt = 0;
for (auto i = last_i; i > 0; i--) {
auto idxb = pj.structural_indexes[i];
switch (buf[idxb]) {
case ':':
case ',':
continue;
case '}':
obj_cnt--;
continue;
case ']':
arr_cnt--;
continue;
case '{':
obj_cnt++;
break;
case '[':
arr_cnt++;
break;
}
auto idxa = pj.structural_indexes[i - 1];
switch (buf[idxa]) {
case '{':
case '[':
case ':':
case ',':
continue;
}
if (!arr_cnt && !obj_cnt) {
return last_i+1;
}
return i;
}
return 0;
} }
}
#endif //SIMDJSON_JSONSTREAM_H #endif //SIMDJSON_JSONSTREAM_H
/* end file include/simdjson/jsonstream.h */ /* end file include/simdjson/jsonstream.h */

View File

@ -59,8 +59,14 @@ size_t json_minify(const unsigned char *bytes, size_t how_many,
} }
} // namespace simdjson } // namespace simdjson
#else #else
//
// This fast code is disabled in the context of runtime dispatching.
// See issue https://github.com/lemire/simdjson/issues/384
//
#include "simdprune_tables.h" #include "simdprune_tables.h"
#include <cstring> #include <cstring>
#include <x86intrin.h> // currently, there is no runtime dispatch for the minifier
namespace simdjson { namespace simdjson {
@ -363,18 +369,18 @@ size_t oldjson_minify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF)); int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace)); int pop4 = hamming((~whitespace));
__m256i vmask1 = _mm256_loadu2_m128i( __m128i x1 = _mm256_extracti128_si256(input_lo, 0);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF), __m128i x2 = _mm256_extracti128_si256(input_lo, 1);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF)); __m128i x3 = _mm256_extracti128_si256(input_hi, 0);
__m256i vmask2 = _mm256_loadu2_m128i( __m128i x4 = _mm256_extracti128_si256(input_hi, 1);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF), x1 = skinnycleanm128(x1, mask1);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF)); x2 = skinnycleanm128(x2, mask2);
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); x3 = skinnycleanm128(x3, mask3);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); x4 = skinnycleanm128(x4, mask4);
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), _mm_storeu_si128(reinterpret_cast<__m128i *>(out), x1);
reinterpret_cast<__m128i *>(out), result1); _mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop1), x2);
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), _mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop2), x3);
reinterpret_cast<__m128i *>(out + pop2), result2); _mm_storeu_si128(reinterpret_cast<__m128i *>(out + pop3), x4);
out += pop4; out += pop4;
} }
} }
@ -447,23 +453,24 @@ size_t oldjson_minify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF)); int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace)); int pop4 = hamming((~whitespace));
__m256i vmask1 = _mm256_loadu2_m128i( __m128i x1 = _mm256_extracti128_si256(input_lo, 0);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF), __m128i x2 = _mm256_extracti128_si256(input_lo, 1);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF)); __m128i x3 = _mm256_extracti128_si256(input_hi, 0);
__m256i vmask2 = _mm256_loadu2_m128i( __m128i x4 = _mm256_extracti128_si256(input_hi, 1);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF), x1 = skinnycleanm128(x1, mask1);
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF)); x2 = skinnycleanm128(x2, mask2);
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); x3 = skinnycleanm128(x3, mask3);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); x4 = skinnycleanm128(x4, mask4);
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer), x1);
reinterpret_cast<__m128i *>(buffer), result1); _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + pop1), x2);
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + pop2), x3);
reinterpret_cast<__m128i *>(buffer + pop2), result2); _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + pop3), x4);
memcpy(out, buffer, pop4); memcpy(out, buffer, pop4);
out += pop4; out += pop4;
} }
*out = '\0'; // NULL termination *out = '\0'; // NULL termination
return out - initout; return out - initout;
} }
} // namespace simdjson } // namespace simdjson
#endif #endif