Use generic simd operators for find_whitespace_and_operators
This commit is contained in:
parent
c89d6bf68b
commit
e383b7a6ab
|
@ -30,7 +30,7 @@ namespace simdjson::arm64::simd {
|
|||
really_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); }
|
||||
really_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); }
|
||||
really_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); }
|
||||
really_inline simd8<T> operator~() const { return this ^ 0xFFu; }
|
||||
really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
|
||||
really_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast | other; return *this_cast; }
|
||||
really_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast & other; return *this_cast; }
|
||||
really_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = (simd8<T>*)this; *this_cast = *this_cast ^ other; return *this_cast; }
|
||||
|
@ -107,16 +107,17 @@ namespace simdjson::arm64::simd {
|
|||
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
|
||||
|
||||
// Bit-specific operations
|
||||
really_inline bool any_bits_set() const { return vmaxvq_u8(*this) != 0; }
|
||||
really_inline bool any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
||||
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
|
||||
really_inline bool any_bits_set_anywhere() const { return vmaxvq_u8(*this) != 0; }
|
||||
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
|
||||
template<int N>
|
||||
really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
|
||||
template<int N>
|
||||
really_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
|
||||
|
||||
// Perform a lookup of the lower 4 bits
|
||||
// Perform a lookup assuming no value is larger than 16
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup4(
|
||||
really_inline simd8<L> lookup_16(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
L replace4, L replace5, L replace6, L replace7,
|
||||
L replace8, L replace9, L replace10, L replace11,
|
||||
|
@ -127,10 +128,25 @@ namespace simdjson::arm64::simd {
|
|||
replace8, replace9, replace10, replace11,
|
||||
replace12, replace13, replace14, replace15
|
||||
);
|
||||
return lookup_table.apply_lookup4_to(*this);
|
||||
return lookup_table.apply_lookup_16_to(*this);
|
||||
}
|
||||
|
||||
really_inline simd8<uint8_t> apply_lookup4_to(const simd8<uint8_t> original) {
|
||||
// Perform a lookup of the lower 4 bits
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup_lower_4_bits(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
L replace4, L replace5, L replace6, L replace7,
|
||||
L replace8, L replace9, L replace10, L replace11,
|
||||
L replace12, L replace13, L replace14, L replace15) const {
|
||||
return (*this & 0xF).lookup_16(
|
||||
replace0, replace1, replace2, replace3,
|
||||
replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11,
|
||||
replace12, replace13, replace14, replace15
|
||||
);
|
||||
}
|
||||
|
||||
really_inline simd8<uint8_t> apply_lookup_16_to(const simd8<uint8_t> original) {
|
||||
return vqtbl1q_u8(*this, original);
|
||||
}
|
||||
};
|
||||
|
@ -187,12 +203,12 @@ namespace simdjson::arm64::simd {
|
|||
|
||||
// Perform a lookup of the lower 4 bits
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup4(
|
||||
really_inline simd8<L> lookup_16(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
L replace4, L replace5, L replace6, L replace7,
|
||||
L replace8, L replace9, L replace10, L replace11,
|
||||
L replace12, L replace13, L replace14, L replace15) const {
|
||||
return simd8<uint8_t>(*this).lookup4(
|
||||
return simd8<uint8_t>(*this).lookup_16(
|
||||
replace0, replace1, replace2, replace3,
|
||||
replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11,
|
||||
|
@ -200,7 +216,7 @@ namespace simdjson::arm64::simd {
|
|||
);
|
||||
}
|
||||
|
||||
really_inline simd8<int8_t> apply_lookup4_to(const simd8<uint8_t> original) {
|
||||
really_inline simd8<int8_t> apply_lookup_16_to(const simd8<uint8_t> original) {
|
||||
return vqtbl1q_s8(*this, original);
|
||||
}
|
||||
};
|
||||
|
@ -222,7 +238,7 @@ namespace simdjson::arm64::simd {
|
|||
each_chunk(this->chunks[3]);
|
||||
}
|
||||
|
||||
template <typename F, typename R=bool>
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0]),
|
||||
|
@ -232,7 +248,7 @@ namespace simdjson::arm64::simd {
|
|||
);
|
||||
}
|
||||
|
||||
template <typename F, typename R=bool>
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(const simd8x64<T> b, F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
using namespace simd;
|
||||
|
||||
really_inline uint64_t compute_quote_mask(const uint64_t quote_bits) {
|
||||
|
||||
#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
|
||||
|
@ -20,31 +22,19 @@ really_inline uint64_t compute_quote_mask(const uint64_t quote_bits) {
|
|||
}
|
||||
|
||||
really_inline void find_whitespace_and_operators(
|
||||
const simd::simd8x64<uint8_t> in,
|
||||
uint64_t &whitespace, uint64_t &op) {
|
||||
const uint8x16_t low_nibble_mask =
|
||||
(uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
|
||||
const uint8x16_t high_nibble_mask =
|
||||
(uint8x16_t){8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
|
||||
const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf);
|
||||
const simd::simd8x64<uint8_t> in,
|
||||
uint64_t &whitespace, uint64_t &op) {
|
||||
|
||||
auto v = in.map([&](auto chunk) {
|
||||
uint8x16_t nib_lo = vandq_u8(chunk, low_nib_and_mask);
|
||||
uint8x16_t nib_hi = vshrq_n_u8(chunk, 4);
|
||||
uint8x16_t shuf_lo = vqtbl1q_u8(low_nibble_mask, nib_lo);
|
||||
uint8x16_t shuf_hi = vqtbl1q_u8(high_nibble_mask, nib_hi);
|
||||
return vandq_u8(shuf_lo, shuf_hi);
|
||||
auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
|
||||
auto nib_lo = chunk & 0xf;
|
||||
auto nib_hi = chunk.shr<4>();
|
||||
auto shuf_lo = nib_lo.lookup_16<uint8_t>(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||
auto shuf_hi = nib_hi.lookup_16<uint8_t>(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||
return shuf_lo & shuf_hi;
|
||||
});
|
||||
|
||||
const uint8x16_t operator_shufti_mask = vmovq_n_u8(0x7);
|
||||
op = v.map([&](auto _v) {
|
||||
return vtstq_u8(_v, operator_shufti_mask);
|
||||
}).to_bitmask();
|
||||
|
||||
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
|
||||
whitespace = v.map([&](auto _v) {
|
||||
return vtstq_u8(_v, whitespace_shufti_mask);
|
||||
}).to_bitmask();
|
||||
op = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x7); }).to_bitmask();
|
||||
whitespace = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x18); }).to_bitmask();
|
||||
}
|
||||
|
||||
#include "generic/simdutf8check.h"
|
||||
|
|
|
@ -36,7 +36,7 @@ struct utf8_checker {
|
|||
}
|
||||
|
||||
really_inline simd8<int8_t> continuation_lengths(simd8<int8_t> high_nibbles) {
|
||||
return high_nibbles.lookup4<int8_t>(
|
||||
return high_nibbles.lookup_16<int8_t>(
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
|
||||
0, 0, 0, 0, // 10xx (continuation)
|
||||
2, 2, // 110x
|
||||
|
@ -102,7 +102,7 @@ struct utf8_checker {
|
|||
// Two-byte characters must start with at least C2
|
||||
// Three-byte characters must start with at least E1
|
||||
// Four-byte characters must start with at least F1
|
||||
simd8<int8_t> initial_mins = off1_high_nibbles.lookup4<int8_t>(
|
||||
simd8<int8_t> initial_mins = off1_high_nibbles.lookup_16<int8_t>(
|
||||
-128, -128, -128, -128, -128, -128, -128, -128, // 0xxx -> false
|
||||
-128, -128, -128, -128, // 10xx -> false
|
||||
0xC2, -128, // 1100 -> C2
|
||||
|
@ -114,7 +114,7 @@ struct utf8_checker {
|
|||
// Two-byte characters starting with at least C2 are always OK
|
||||
// Three-byte characters starting with at least E1 must be followed by at least A0
|
||||
// Four-byte characters starting with at least F1 must be followed by at least 90
|
||||
simd8<int8_t> second_mins = off1_high_nibbles.lookup4<int8_t>(
|
||||
simd8<int8_t> second_mins = off1_high_nibbles.lookup_16<int8_t>(
|
||||
-128, -128, -128, -128, -128, -128, -128, -128, -128, // 0xxx => false
|
||||
-128, -128, -128, // 10xx => false
|
||||
127, 127, // 110x => true
|
||||
|
@ -152,7 +152,7 @@ struct utf8_checker {
|
|||
}
|
||||
|
||||
really_inline void check_next_input(simd8<uint8_t> in) {
|
||||
if (likely(!in.any_bits_set(0x80u))) {
|
||||
if (likely(!in.any_bits_set_anywhere(0x80u))) {
|
||||
this->check_carried_continuations();
|
||||
} else {
|
||||
this->check_utf8_bytes(in);
|
||||
|
@ -161,7 +161,7 @@ struct utf8_checker {
|
|||
|
||||
really_inline void check_next_input(simd8x64<uint8_t> in) {
|
||||
simd8<uint8_t> bits = in.reduce([&](auto a, auto b) { return a | b; });
|
||||
if (likely(!bits.any_bits_set(0x80u))) {
|
||||
if (likely(!bits.any_bits_set_anywhere(0x80u))) {
|
||||
// it is ascii, we just check carried continuations.
|
||||
this->check_carried_continuations();
|
||||
} else {
|
||||
|
@ -171,6 +171,6 @@ struct utf8_checker {
|
|||
}
|
||||
|
||||
really_inline ErrorValues errors() {
|
||||
return this->has_error.any_bits_set() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
|
||||
return this->has_error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
|
||||
}
|
||||
}; // struct utf8_checker
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace simdjson::haswell::simd {
|
|||
really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); }
|
||||
really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); }
|
||||
really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(*this, other); }
|
||||
really_inline Child operator~() const { return this ^ 0xFFu; }
|
||||
really_inline Child operator~() const { return *this ^ 0xFFu; }
|
||||
really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; }
|
||||
really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; }
|
||||
really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; }
|
||||
|
@ -88,20 +88,38 @@ namespace simdjson::haswell::simd {
|
|||
|
||||
// Perform a lookup of the lower 4 bits
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup4(
|
||||
really_inline simd8<L> lookup_lower_4_bits(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
L replace4, L replace5, L replace6, L replace7,
|
||||
L replace8, L replace9, L replace10, L replace11,
|
||||
L replace12, L replace13, L replace14, L replace15) const {
|
||||
|
||||
simd8<L> lookup_table(
|
||||
replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15,
|
||||
replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15
|
||||
replace0, replace1, replace2, replace3,
|
||||
replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11,
|
||||
replace12, replace13, replace14, replace15,
|
||||
replace0, replace1, replace2, replace3,
|
||||
replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11,
|
||||
replace12, replace13, replace14, replace15
|
||||
);
|
||||
return _mm256_shuffle_epi8(lookup_table, *this);
|
||||
}
|
||||
|
||||
// Perform a lookup assuming the value is between 0 and 16
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup_16(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
L replace4, L replace5, L replace6, L replace7,
|
||||
L replace8, L replace9, L replace10, L replace11,
|
||||
L replace12, L replace13, L replace14, L replace15) const {
|
||||
return lookup_lower_4_bits(
|
||||
replace0, replace1, replace2, replace3,
|
||||
replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11,
|
||||
replace12, replace13, replace14, replace15
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Signed bytes
|
||||
|
@ -164,8 +182,10 @@ namespace simdjson::haswell::simd {
|
|||
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return this->max(other) == other; }
|
||||
|
||||
// Bit-specific operations
|
||||
really_inline bool any_bits_set(simd8<uint8_t> bits) const { return !_mm256_testz_si256(*this, bits); }
|
||||
really_inline bool any_bits_set() const { return !_mm256_testz_si256(*this, *this); }
|
||||
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
||||
really_inline simd8<bool> any_bits_set() const { return ~(*this == u8'\0'); }
|
||||
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !_mm256_testz_si256(*this, bits); }
|
||||
really_inline bool any_bits_set_anywhere() const { return !_mm256_testz_si256(*this, *this); }
|
||||
template<int N>
|
||||
really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
|
||||
template<int N>
|
||||
|
@ -189,7 +209,7 @@ namespace simdjson::haswell::simd {
|
|||
each_chunk(this->chunks[1]);
|
||||
}
|
||||
|
||||
template <typename F, typename R=bool>
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0]),
|
||||
|
@ -197,7 +217,7 @@ namespace simdjson::haswell::simd {
|
|||
);
|
||||
}
|
||||
|
||||
template <typename F, typename R=bool>
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(const simd8x64<uint8_t> b, F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
TARGET_HASWELL
|
||||
namespace simdjson::haswell {
|
||||
|
||||
using namespace simd;
|
||||
|
||||
really_inline uint64_t compute_quote_mask(const uint64_t quote_bits) {
|
||||
// There should be no such thing with a processing supporting avx2
|
||||
// but not clmul.
|
||||
|
@ -23,64 +25,13 @@ really_inline void find_whitespace_and_operators(
|
|||
const simd::simd8x64<uint8_t> in,
|
||||
uint64_t &whitespace, uint64_t &op) {
|
||||
|
||||
#ifdef SIMDJSON_NAIVE_STRUCTURAL
|
||||
whitespace = in.map([&](simd8<uint8_t> _in) {
|
||||
return _in == _in.lookup_lower_4_bits<uint8_t>(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
|
||||
}).to_bitmask();
|
||||
|
||||
// You should never need this naive approach, but it can be useful
|
||||
// for research purposes
|
||||
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
|
||||
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
|
||||
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
|
||||
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
|
||||
const __m256i mask_column = _mm256_set1_epi8(0x3a);
|
||||
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
|
||||
op = in.map([&](auto in) {
|
||||
__m256i op = _mm256_cmpeq_epi8(in, mask_open_brace);
|
||||
op = _mm256_or_si256(op, _mm256_cmpeq_epi8(in, mask_close_brace));
|
||||
op = _mm256_or_si256(op, _mm256_cmpeq_epi8(in, mask_open_bracket));
|
||||
op = _mm256_or_si256(op, _mm256_cmpeq_epi8(in, mask_close_bracket));
|
||||
op = _mm256_or_si256(op, _mm256_cmpeq_epi8(in, mask_column));
|
||||
op = _mm256_or_si256(op, _mm256_cmpeq_epi8(in, mask_comma));
|
||||
return op;
|
||||
}).to_bitmask();
|
||||
|
||||
const __m256i mask_space = _mm256_set1_epi8(0x20);
|
||||
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
|
||||
const __m256i mask_tab = _mm256_set1_epi8(0x09);
|
||||
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
|
||||
whitespace = in.map([&](auto in) {
|
||||
__m256i space = _mm256_cmpeq_epi8(in, mask_space);
|
||||
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_linefeed));
|
||||
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_tab));
|
||||
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_carriage));
|
||||
return space;
|
||||
}).to_bitmask();
|
||||
// end of naive approach
|
||||
|
||||
#else // SIMDJSON_NAIVE_STRUCTURAL
|
||||
|
||||
// clang-format off
|
||||
const __m256i operator_table =
|
||||
_mm256_setr_epi8(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{',
|
||||
',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
|
||||
const __m256i white_table = _mm256_setr_epi8(
|
||||
' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100,
|
||||
' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
|
||||
// clang-format on
|
||||
const __m256i op_offset = _mm256_set1_epi8(0xd4u);
|
||||
const __m256i op_mask = _mm256_set1_epi8(32);
|
||||
|
||||
whitespace = in.map([&](auto _in) {
|
||||
return _mm256_cmpeq_epi8(_in, _mm256_shuffle_epi8(white_table, _in));
|
||||
}).to_bitmask();
|
||||
|
||||
op = in.map([&](auto _in) {
|
||||
const __m256i r1 = _mm256_add_epi8(op_offset, _in);
|
||||
const __m256i r2 = _in | op_mask;
|
||||
const __m256i r3 = _mm256_shuffle_epi8(operator_table, r1);
|
||||
return _mm256_cmpeq_epi8(r2, r3);
|
||||
}).to_bitmask();
|
||||
|
||||
#endif // else SIMDJSON_NAIVE_STRUCTURAL
|
||||
op = in.map([&](simd8<uint8_t> _in) {
|
||||
return (_in | 32) == (_in+0xd4u).lookup_lower_4_bits<uint8_t>(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
#include "generic/simdutf8check.h"
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace simdjson::westmere::simd {
|
|||
really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); }
|
||||
really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); }
|
||||
really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(*this, other); }
|
||||
really_inline Child operator~() const { return this ^ 0xFFu; }
|
||||
really_inline Child operator~() const { return *this ^ 0xFFu; }
|
||||
really_inline Child& operator|=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast | other; return *this_cast; }
|
||||
really_inline Child& operator&=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast & other; return *this_cast; }
|
||||
really_inline Child& operator^=(const Child other) { auto this_cast = (Child*)this; *this_cast = *this_cast ^ other; return *this_cast; }
|
||||
|
@ -67,7 +67,7 @@ namespace simdjson::westmere::simd {
|
|||
really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
|
||||
|
||||
really_inline bitmask_t to_bitmask() const { return _mm_movemask_epi8(*this); }
|
||||
really_inline bool any() const { return !_mm_testz_si128(*this, *this) == 0; }
|
||||
really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
@ -89,7 +89,7 @@ namespace simdjson::westmere::simd {
|
|||
|
||||
// Perform a lookup of the lower 4 bits
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup4(
|
||||
really_inline simd8<L> lookup_lower_4_bits(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
L replace4, L replace5, L replace6, L replace7,
|
||||
L replace8, L replace9, L replace10, L replace11,
|
||||
|
@ -103,6 +103,21 @@ namespace simdjson::westmere::simd {
|
|||
);
|
||||
return _mm_shuffle_epi8(lookup_table, *this);
|
||||
}
|
||||
|
||||
// Perform a lookup assuming the value is between 0 and 16
|
||||
template<typename L>
|
||||
really_inline simd8<L> lookup_16(
|
||||
L replace0, L replace1, L replace2, L replace3,
|
||||
L replace4, L replace5, L replace6, L replace7,
|
||||
L replace8, L replace9, L replace10, L replace11,
|
||||
L replace12, L replace13, L replace14, L replace15) const {
|
||||
return lookup_lower_4_bits(
|
||||
replace0, replace1, replace2, replace3,
|
||||
replace4, replace5, replace6, replace7,
|
||||
replace8, replace9, replace10, replace11,
|
||||
replace12, replace13, replace14, replace15
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Signed bytes
|
||||
|
@ -157,8 +172,10 @@ namespace simdjson::westmere::simd {
|
|||
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return this->max(other) == other; }
|
||||
|
||||
// Bit-specific operations
|
||||
really_inline bool any_bits_set(simd8<uint8_t> bits) const { return !_mm_testz_si128(*this, bits); }
|
||||
really_inline bool any_bits_set() const { return !_mm_testz_si128(*this, *this); }
|
||||
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
||||
really_inline simd8<bool> any_bits_set() const { return ~(*this == u8'\0'); }
|
||||
really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !_mm_testz_si128(*this, bits); }
|
||||
really_inline bool any_bits_set_anywhere() const { return !_mm_testz_si128(*this, *this); }
|
||||
template<int N>
|
||||
really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
|
||||
template<int N>
|
||||
|
|
|
@ -11,31 +11,23 @@
|
|||
TARGET_WESTMERE
|
||||
namespace simdjson::westmere {
|
||||
|
||||
using namespace simd;
|
||||
|
||||
really_inline uint64_t compute_quote_mask(const uint64_t quote_bits) {
|
||||
return _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
||||
}
|
||||
|
||||
really_inline void find_whitespace_and_operators(
|
||||
const simd::simd8x64<uint8_t> in,
|
||||
const simd8x64<uint8_t> in,
|
||||
uint64_t &whitespace, uint64_t &op) {
|
||||
|
||||
const __m128i operator_table =
|
||||
_mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
||||
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
|
||||
100, 9, 10, 112, 100, 13, 100, 100);
|
||||
const __m128i op_offset = _mm_set1_epi8(0xd4u);
|
||||
const __m128i op_mask = _mm_set1_epi8(32);
|
||||
|
||||
whitespace = in.map([&](auto _in) {
|
||||
return _mm_cmpeq_epi8(_in, _mm_shuffle_epi8(white_table, _in));
|
||||
whitespace = in.map([&](simd8<uint8_t> _in) {
|
||||
return _in == _in.lookup_lower_4_bits<uint8_t>(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
|
||||
}).to_bitmask();
|
||||
|
||||
op = in.map([&](auto _in) {
|
||||
const __m128i r1 = _mm_add_epi8(op_offset, _in);
|
||||
const __m128i r2 = _mm_or_si128(_in, op_mask);
|
||||
const __m128i r3 = _mm_shuffle_epi8(operator_table, r1);
|
||||
return _mm_cmpeq_epi8(r2, r3);
|
||||
op = in.map([&](simd8<uint8_t> _in) {
|
||||
return (_in | 32) == (_in+0xd4u).lookup_lower_4_bits<uint8_t>(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue