Use simd8 helpers for find_bs_bits_and_quote_bits
This commit is contained in:
parent
4bc128f07e
commit
d89046d515
|
@ -60,7 +60,7 @@ namespace simdjson::arm64::simd {
|
|||
really_inline simd8<bool>::bitmask_t to_bitmask() const {
|
||||
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
||||
uint8x16_t minput = vandq_u8(*this, bit_mask);
|
||||
auto minput = *this & bit_mask;
|
||||
uint8x16_t tmp = vpaddq_u8(minput, minput);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
|
@ -80,7 +80,7 @@ namespace simdjson::arm64::simd {
|
|||
// Zero constructor
|
||||
really_inline simd8() : simd8(zero()) {}
|
||||
// Array constructor
|
||||
really_inline simd8(const uint8_t* values) : simd8(load(values)) {}
|
||||
really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
|
||||
// Splat constructor
|
||||
really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
|
||||
// Member-by-member initialization
|
||||
|
@ -92,6 +92,9 @@ namespace simdjson::arm64::simd {
|
|||
v8, v9, v10,v11,v12,v13,v14,v15
|
||||
}) {}
|
||||
|
||||
// Store to array
|
||||
really_inline void store(uint8_t dst[16]) { return vst1q_u8(dst, *this); }
|
||||
|
||||
// Saturated math
|
||||
really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
|
||||
really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
|
||||
|
@ -159,7 +162,7 @@ namespace simdjson::arm64::simd {
|
|||
|
||||
static really_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); }
|
||||
static really_inline simd8<int8_t> zero() { return vdupq_n_s8(0); }
|
||||
static really_inline simd8<int8_t> load(const int8_t* values) { return vld1q_s8(values); }
|
||||
static really_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); }
|
||||
|
||||
// Conversion from/to SIMD register
|
||||
really_inline simd8(const int8x16_t _value) : value{_value} {}
|
||||
|
@ -181,6 +184,9 @@ namespace simdjson::arm64::simd {
|
|||
v8, v9, v10,v11,v12,v13,v14,v15
|
||||
}) {}
|
||||
|
||||
// Store to array
|
||||
really_inline void store(int8_t dst[16]) { return vst1q_s8(dst, *this); }
|
||||
|
||||
// Explicit conversion to/from unsigned
|
||||
really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
|
||||
really_inline explicit operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(*this); }
|
||||
|
@ -227,8 +233,15 @@ namespace simdjson::arm64::simd {
|
|||
const simd8<T> chunks[4];
|
||||
|
||||
really_inline simd8x64() : chunks{simd8<T>(), simd8<T>(), simd8<T>(), simd8<T>()} {}
|
||||
really_inline simd8x64(const uint8x16_t chunk0, const uint8x16_t chunk1, const uint8x16_t chunk2, const uint8x16_t chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
|
||||
really_inline simd8x64(const T *ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
|
||||
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
|
||||
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
|
||||
|
||||
really_inline void store(T ptr[64]) {
|
||||
this->chunks[0].store(ptr);
|
||||
this->chunks[0].store(ptr+16);
|
||||
this->chunks[0].store(ptr+32);
|
||||
this->chunks[0].store(ptr+48);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk) const
|
||||
|
@ -268,14 +281,13 @@ namespace simdjson::arm64::simd {
|
|||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() const {
|
||||
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
||||
uint8x16_t t0 = vandq_u8(this->chunks[0], bit_mask);
|
||||
uint8x16_t t1 = vandq_u8(this->chunks[1], bit_mask);
|
||||
uint8x16_t t2 = vandq_u8(this->chunks[2], bit_mask);
|
||||
uint8x16_t t3 = vandq_u8(this->chunks[3], bit_mask);
|
||||
uint8x16_t sum0 = vpaddq_u8(t0, t1);
|
||||
uint8x16_t sum1 = vpaddq_u8(t2, t3);
|
||||
const uint8x16_t bit_mask = {
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
|
||||
};
|
||||
// Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
|
||||
uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask);
|
||||
uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask);
|
||||
sum0 = vpaddq_u8(sum0, sum1);
|
||||
sum0 = vpaddq_u8(sum0, sum0);
|
||||
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
|
||||
|
|
|
@ -5,57 +5,38 @@
|
|||
|
||||
#ifdef IS_ARM64
|
||||
|
||||
#include "arm64/simd.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "jsoncharutils.h"
|
||||
|
||||
#ifdef JSON_TEST_STRINGS
|
||||
void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
|
||||
const uint8_t *parsed_end);
|
||||
void found_bad_string(const uint8_t *buf);
|
||||
#endif
|
||||
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
using namespace simd;
|
||||
|
||||
// Holds backslashes and quotes locations.
|
||||
struct parse_string_helper {
|
||||
uint32_t bs_bits;
|
||||
uint32_t quote_bits;
|
||||
really_inline uint32_t bytes_processed() const { return sizeof(uint8x16_t)*2; }
|
||||
static const uint32_t BYTES_PROCESSED = 32;
|
||||
};
|
||||
|
||||
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
|
||||
uint8x16_t v0 = vld1q_u8(src);
|
||||
uint8x16_t v1 = vld1q_u8(src + 16);
|
||||
vst1q_u8(dst, v0);
|
||||
vst1q_u8(dst + 16, v1);
|
||||
static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1));
|
||||
simd8<uint8_t> v0(src);
|
||||
simd8<uint8_t> v1(src + sizeof(v0));
|
||||
v0.store(dst);
|
||||
v1.store(dst + sizeof(v0));
|
||||
|
||||
uint8x16_t bs_mask = vmovq_n_u8('\\');
|
||||
uint8x16_t qt_mask = vmovq_n_u8('"');
|
||||
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
||||
uint8x16_t cmp_bs_0 = vceqq_u8(v0, bs_mask);
|
||||
uint8x16_t cmp_bs_1 = vceqq_u8(v1, bs_mask);
|
||||
uint8x16_t cmp_qt_0 = vceqq_u8(v0, qt_mask);
|
||||
uint8x16_t cmp_qt_1 = vceqq_u8(v1, qt_mask);
|
||||
|
||||
cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask);
|
||||
cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask);
|
||||
cmp_qt_0 = vandq_u8(cmp_qt_0, bit_mask);
|
||||
cmp_qt_1 = vandq_u8(cmp_qt_1, bit_mask);
|
||||
|
||||
uint8x16_t sum0 = vpaddq_u8(cmp_bs_0, cmp_bs_1);
|
||||
uint8x16_t sum1 = vpaddq_u8(cmp_qt_0, cmp_qt_1);
|
||||
sum0 = vpaddq_u8(sum0, sum1);
|
||||
sum0 = vpaddq_u8(sum0, sum0);
|
||||
// Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we
|
||||
// smash them together into a 64-byte mask and get the bitmask from there.
|
||||
uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
|
||||
return {
|
||||
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits
|
||||
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits
|
||||
static_cast<uint32_t>(bs_and_quote), // bs_bits
|
||||
static_cast<uint32_t>(bs_and_quote >> 32) // quote_bits
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#include "generic/stringparsing.h"
|
||||
|
|
|
@ -84,7 +84,7 @@ WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
|
|||
*/
|
||||
|
||||
/* find out where the quote is... */
|
||||
uint32_t quote_dist = trailing_zeroes(helper.quote_bits);
|
||||
auto quote_dist = trailing_zeroes(helper.quote_bits);
|
||||
|
||||
/* NULL termination is still handy if you expect all your strings to
|
||||
* be NULL terminated? */
|
||||
|
@ -92,7 +92,7 @@ WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
|
|||
dst[quote_dist] = 0;
|
||||
|
||||
uint32_t str_length = (dst - start_of_string) + quote_dist;
|
||||
memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t));
|
||||
memcpy(pj.current_string_buf_loc, &str_length, sizeof(str_length));
|
||||
/*****************************
|
||||
* Above, check for overflow in case someone has a crazy string
|
||||
* (>=4GB?) _
|
||||
|
@ -109,7 +109,7 @@ WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
|
|||
}
|
||||
if (((helper.quote_bits - 1) & helper.bs_bits) != 0) {
|
||||
/* find out where the backspace is */
|
||||
uint32_t bs_dist = trailing_zeroes(helper.bs_bits);
|
||||
auto bs_dist = trailing_zeroes(helper.bs_bits);
|
||||
uint8_t escape_char = src[bs_dist + 1];
|
||||
/* we encountered backslash first. Handle backslash */
|
||||
if (escape_char == 'u') {
|
||||
|
@ -136,8 +136,8 @@ WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf,
|
|||
} else {
|
||||
/* they are the same. Since they can't co-occur, it means we
|
||||
* encountered neither. */
|
||||
src += helper.bytes_processed();
|
||||
dst += helper.bytes_processed();
|
||||
src += parse_string_helper::BYTES_PROCESSED;
|
||||
dst += parse_string_helper::BYTES_PROCESSED;
|
||||
}
|
||||
}
|
||||
/* can't be reached */
|
||||
|
|
|
@ -74,13 +74,16 @@ namespace simdjson::haswell::simd {
|
|||
struct base8_numeric: base8<T> {
|
||||
static really_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); }
|
||||
static really_inline simd8<T> zero() { return _mm256_setzero_si256(); }
|
||||
static really_inline simd8<T> load(const T* values) {
|
||||
static really_inline simd8<T> load(const T values[32]) {
|
||||
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
|
||||
}
|
||||
|
||||
really_inline base8_numeric() : base8<T>() {}
|
||||
really_inline base8_numeric(const __m256i _value) : base8<T>(_value) {}
|
||||
|
||||
// Store to array
|
||||
really_inline void store(T dst[32]) { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
|
||||
|
||||
// Addition/subtraction are the same for signed and unsigned
|
||||
really_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); }
|
||||
really_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); }
|
||||
|
@ -131,7 +134,7 @@ namespace simdjson::haswell::simd {
|
|||
// Splat constructor
|
||||
really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
|
||||
// Array constructor
|
||||
really_inline simd8(const int8_t* values) : simd8(load(values)) {}
|
||||
really_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
|
||||
// Member-by-member initialization
|
||||
really_inline simd8(
|
||||
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
|
||||
|
@ -159,7 +162,7 @@ namespace simdjson::haswell::simd {
|
|||
// Splat constructor
|
||||
really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
|
||||
// Array constructor
|
||||
really_inline simd8(const uint8_t* values) : simd8(load(values)) {}
|
||||
really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
|
||||
// Member-by-member initialization
|
||||
really_inline simd8(
|
||||
uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
|
||||
|
@ -184,7 +187,7 @@ namespace simdjson::haswell::simd {
|
|||
|
||||
// Bit-specific operations
|
||||
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
||||
really_inline simd8<bool> any_bits_set() const { return ~(*this == u8'\0'); }
|
||||
really_inline simd8<bool> any_bits_set() const { return ~(*this == uint8_t(0)); }
|
||||
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !_mm256_testz_si256(*this, bits); }
|
||||
really_inline bool any_bits_set_anywhere() const { return !_mm256_testz_si256(*this, *this); }
|
||||
template<int N>
|
||||
|
@ -198,10 +201,13 @@ namespace simdjson::haswell::simd {
|
|||
const simd8<T> chunks[2];
|
||||
|
||||
really_inline simd8x64() : chunks{simd8<T>(), simd8<T>()} {}
|
||||
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
|
||||
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {}
|
||||
|
||||
really_inline simd8x64(const __m256i chunk0, const __m256i chunk1) : chunks{chunk0, chunk1} {}
|
||||
|
||||
really_inline simd8x64(const T *ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {}
|
||||
really_inline void store(T *ptr) {
|
||||
this->chunks[0].store(ptr);
|
||||
this->chunks[0].store(ptr+sizeof(simd8<T>));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk) const
|
||||
|
|
|
@ -5,39 +5,33 @@
|
|||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "haswell/simd.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "jsoncharutils.h"
|
||||
|
||||
#ifdef JSON_TEST_STRINGS
|
||||
void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
|
||||
const uint8_t *parsed_end);
|
||||
void found_bad_string(const uint8_t *buf);
|
||||
#endif
|
||||
|
||||
TARGET_HASWELL
|
||||
namespace simdjson::haswell {
|
||||
|
||||
using namespace simd;
|
||||
|
||||
// Holds backslashes and quotes locations.
|
||||
struct parse_string_helper {
|
||||
uint32_t bs_bits;
|
||||
uint32_t quote_bits;
|
||||
really_inline uint32_t bytes_processed() const { return sizeof(__m256i); }
|
||||
static const uint32_t BYTES_PROCESSED = 32;
|
||||
};
|
||||
|
||||
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// this can read up to 15 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
|
||||
__m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||
// later
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
|
||||
auto quote_mask = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'));
|
||||
static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1));
|
||||
simd8<uint8_t> v(src);
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like later
|
||||
v.store(dst);
|
||||
return {
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(
|
||||
_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')))), // bs_bits
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask)) // quote_bits
|
||||
(v == '\\').to_bitmask(), // bs_bits
|
||||
(v == '"').to_bitmask(), // quote_bits
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -65,6 +65,12 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
|||
return error == 0;
|
||||
}
|
||||
|
||||
#ifdef JSON_TEST_STRINGS
|
||||
void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
|
||||
const uint8_t *parsed_end);
|
||||
void found_bad_string(const uint8_t *buf);
|
||||
#endif
|
||||
|
||||
#include "arm64/stage2_build_tape.h"
|
||||
#include "haswell/stage2_build_tape.h"
|
||||
#include "westmere/stage2_build_tape.h"
|
||||
|
|
|
@ -75,13 +75,16 @@ namespace simdjson::westmere::simd {
|
|||
struct base8_numeric: base8<T> {
|
||||
static really_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); }
|
||||
static really_inline simd8<T> zero() { return _mm_setzero_si128(); }
|
||||
static really_inline simd8<T> load(const T* values) {
|
||||
static really_inline simd8<T> load(const T values[16]) {
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
|
||||
}
|
||||
|
||||
really_inline base8_numeric() : base8<T>() {}
|
||||
really_inline base8_numeric(const __m128i _value) : base8<T>(_value) {}
|
||||
|
||||
// Store to array
|
||||
really_inline void store(T dst[16]) { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
|
||||
|
||||
// Addition/subtraction are the same for signed and unsigned
|
||||
really_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
|
||||
really_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
|
||||
|
@ -174,7 +177,7 @@ namespace simdjson::westmere::simd {
|
|||
|
||||
// Bit-specific operations
|
||||
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
||||
really_inline simd8<bool> any_bits_set() const { return ~(*this == u8'\0'); }
|
||||
really_inline simd8<bool> any_bits_set() const { return ~(*this == uint8_t(0)); }
|
||||
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !_mm_testz_si128(*this, bits); }
|
||||
really_inline bool any_bits_set_anywhere() const { return !_mm_testz_si128(*this, *this); }
|
||||
template<int N>
|
||||
|
@ -188,8 +191,15 @@ namespace simdjson::westmere::simd {
|
|||
const simd8<T> chunks[4];
|
||||
|
||||
really_inline simd8x64() : chunks{simd8<T>(), simd8<T>(), simd8<T>(), simd8<T>()} {}
|
||||
really_inline simd8x64(const __m128i chunk0, const __m128i chunk1, const __m128i chunk2, const __m128i chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
|
||||
really_inline simd8x64(const T *ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
|
||||
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
|
||||
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
|
||||
|
||||
really_inline void store(T ptr[64]) {
|
||||
this->chunks[0].store(ptr);
|
||||
this->chunks[0].store(ptr+16);
|
||||
this->chunks[0].store(ptr+32);
|
||||
this->chunks[0].store(ptr+48);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk) const
|
||||
|
|
|
@ -5,38 +5,35 @@
|
|||
|
||||
#ifdef IS_X86_64
|
||||
|
||||
#include "westmere/simd.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "jsoncharutils.h"
|
||||
|
||||
#ifdef JSON_TEST_STRINGS
|
||||
void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
|
||||
const uint8_t *parsed_end);
|
||||
void found_bad_string(const uint8_t *buf);
|
||||
#endif
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson::westmere {
|
||||
|
||||
using namespace simd;
|
||||
|
||||
// Holds backslashes and quotes locations.
|
||||
struct parse_string_helper {
|
||||
uint32_t bs_bits;
|
||||
uint32_t quote_bits;
|
||||
really_inline uint32_t bytes_processed() const { return sizeof(__m128i); }
|
||||
static const uint32_t BYTES_PROCESSED = 32;
|
||||
};
|
||||
|
||||
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
__m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||
// later
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst), v);
|
||||
auto quote_mask = _mm_cmpeq_epi8(v, _mm_set1_epi8('"'));
|
||||
static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1));
|
||||
simd8<uint8_t> v0(src);
|
||||
simd8<uint8_t> v1(src + 16);
|
||||
v0.store(dst);
|
||||
v1.store(dst + 16);
|
||||
uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
|
||||
return {
|
||||
static_cast<uint32_t>(
|
||||
_mm_movemask_epi8(_mm_cmpeq_epi8(v, _mm_set1_epi8('\\')))), // bs_bits
|
||||
static_cast<uint32_t>(_mm_movemask_epi8(quote_mask)) // quote_bits
|
||||
static_cast<uint32_t>(bs_and_quote), // bs_bits
|
||||
static_cast<uint32_t>(bs_and_quote >> 32) // quote_bits
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue