Merge pull request #285 from lemire/methods

Use methods instead of functions for simd_input
This commit is contained in:
John Keiser 2019-08-16 17:45:42 -07:00 committed by GitHub
commit 08cf140811
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 212 additions and 261 deletions

View File

@ -8,19 +8,14 @@
namespace simdjson { namespace simdjson {
template <Architecture> struct simd_input; template <Architecture>
struct simd_input {
template <Architecture T> simd_input(const uint8_t *ptr);
simd_input<T> fill_input(const uint8_t *ptr); // a straightforward comparison of a mask against input.
uint64_t eq(uint8_t m);
// a straightforward comparison of a mask against input. // find all values less than or equal than the content of maxval (using unsigned arithmetic)
template <Architecture T> uint64_t lteq(uint8_t m);
uint64_t cmp_mask_against_input(simd_input<T> in, uint8_t m); }; // struct simd_input
// find all values less than or equal than the content of maxval (using unsigned
// arithmetic)
template <Architecture T>
uint64_t unsigned_lteq_against_input(simd_input<T> in, uint8_t m);
} // namespace simdjson } // namespace simdjson

View File

@ -6,28 +6,9 @@
#ifdef IS_ARM64 #ifdef IS_ARM64
namespace simdjson { namespace simdjson {
template <>
struct simd_input<Architecture::ARM64> {
uint8x16_t i0;
uint8x16_t i1;
uint8x16_t i2;
uint8x16_t i3;
};
template <>
really_inline simd_input<Architecture::ARM64>
fill_input<Architecture::ARM64>(const uint8_t *ptr) {
struct simd_input<Architecture::ARM64> in;
in.i0 = vld1q_u8(ptr + 0);
in.i1 = vld1q_u8(ptr + 16);
in.i2 = vld1q_u8(ptr + 32);
in.i3 = vld1q_u8(ptr + 48);
return in;
}
really_inline uint16_t neon_movemask(uint8x16_t input) { really_inline uint16_t neon_movemask(uint8x16_t input) {
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
uint8x16_t minput = vandq_u8(input, bit_mask); uint8x16_t minput = vandq_u8(input, bit_mask);
uint8x16_t tmp = vpaddq_u8(minput, minput); uint8x16_t tmp = vpaddq_u8(minput, minput);
tmp = vpaddq_u8(tmp, tmp); tmp = vpaddq_u8(tmp, tmp);
@ -38,7 +19,7 @@ really_inline uint16_t neon_movemask(uint8x16_t input) {
really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1,
uint8x16_t p2, uint8x16_t p3) { uint8x16_t p2, uint8x16_t p3) {
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
uint8x16_t t0 = vandq_u8(p0, bit_mask); uint8x16_t t0 = vandq_u8(p0, bit_mask);
uint8x16_t t1 = vandq_u8(p1, bit_mask); uint8x16_t t1 = vandq_u8(p1, bit_mask);
uint8x16_t t2 = vandq_u8(p2, bit_mask); uint8x16_t t2 = vandq_u8(p2, bit_mask);
@ -51,26 +32,38 @@ really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1,
} }
template <> template <>
really_inline uint64_t cmp_mask_against_input<Architecture::ARM64>( struct simd_input<Architecture::ARM64> {
simd_input<Architecture::ARM64> in, uint8_t m) { uint8x16_t i0;
const uint8x16_t mask = vmovq_n_u8(m); uint8x16_t i1;
uint8x16_t cmp_res_0 = vceqq_u8(in.i0, mask); uint8x16_t i2;
uint8x16_t cmp_res_1 = vceqq_u8(in.i1, mask); uint8x16_t i3;
uint8x16_t cmp_res_2 = vceqq_u8(in.i2, mask);
uint8x16_t cmp_res_3 = vceqq_u8(in.i3, mask);
return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
}
template <> really_inline simd_input(const uint8_t *ptr) {
really_inline uint64_t unsigned_lteq_against_input<Architecture::ARM64>( this->i0 = vld1q_u8(ptr + 0);
simd_input<Architecture::ARM64> in, uint8_t m) { this->i1 = vld1q_u8(ptr + 16);
const uint8x16_t mask = vmovq_n_u8(m); this->i2 = vld1q_u8(ptr + 32);
uint8x16_t cmp_res_0 = vcleq_u8(in.i0, mask); this->i3 = vld1q_u8(ptr + 48);
uint8x16_t cmp_res_1 = vcleq_u8(in.i1, mask); }
uint8x16_t cmp_res_2 = vcleq_u8(in.i2, mask);
uint8x16_t cmp_res_3 = vcleq_u8(in.i3, mask); really_inline uint64_t eq(uint8_t m) {
return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); const uint8x16_t mask = vmovq_n_u8(m);
} uint8x16_t cmp_res_0 = vceqq_u8(this->i0, mask);
uint8x16_t cmp_res_1 = vceqq_u8(this->i1, mask);
uint8x16_t cmp_res_2 = vceqq_u8(this->i2, mask);
uint8x16_t cmp_res_3 = vceqq_u8(this->i3, mask);
return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
}
really_inline uint64_t lteq(uint8_t m) {
const uint8x16_t mask = vmovq_n_u8(m);
uint8x16_t cmp_res_0 = vcleq_u8(this->i0, mask);
uint8x16_t cmp_res_1 = vcleq_u8(this->i1, mask);
uint8x16_t cmp_res_2 = vcleq_u8(this->i2, mask);
uint8x16_t cmp_res_3 = vcleq_u8(this->i3, mask);
return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
}
}; // struct simd_input
} // namespace simdjson } // namespace simdjson

View File

@ -12,38 +12,31 @@ template <>
struct simd_input<Architecture::HASWELL> { struct simd_input<Architecture::HASWELL> {
__m256i lo; __m256i lo;
__m256i hi; __m256i hi;
};
template <> really_inline simd_input(const uint8_t *ptr) {
really_inline simd_input<Architecture::HASWELL> this->lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0));
fill_input<Architecture::HASWELL>(const uint8_t *ptr) { this->hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
struct simd_input<Architecture::HASWELL> in; }
in.lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0));
in.hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
return in;
}
template <> really_inline uint64_t eq(uint8_t m) {
really_inline uint64_t cmp_mask_against_input<Architecture::HASWELL>( const __m256i mask = _mm256_set1_epi8(m);
simd_input<Architecture::HASWELL> in, uint8_t m) { __m256i cmp_res_0 = _mm256_cmpeq_epi8(this->lo, mask);
const __m256i mask = _mm256_set1_epi8(m); uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask); __m256i cmp_res_1 = _mm256_cmpeq_epi8(this->hi, mask);
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0)); uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
__m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask); return res_0 | (res_1 << 32);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); }
return res_0 | (res_1 << 32);
}
template <> really_inline uint64_t lteq(uint8_t m) {
really_inline uint64_t unsigned_lteq_against_input<Architecture::HASWELL>( const __m256i maxval = _mm256_set1_epi8(m);
simd_input<Architecture::HASWELL> in, uint8_t m) { __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, this->lo), maxval);
const __m256i maxval = _mm256_set1_epi8(m); uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.lo), maxval); __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, this->hi), maxval);
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0)); uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
__m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.hi), maxval); return res_0 | (res_1 << 32);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); }
return res_0 | (res_1 << 32);
} }; // struct simd_input
} // namespace simdjson } // namespace simdjson
UNTARGET_REGION UNTARGET_REGION

View File

@ -14,48 +14,41 @@ struct simd_input<Architecture::WESTMERE> {
__m128i v1; __m128i v1;
__m128i v2; __m128i v2;
__m128i v3; __m128i v3;
};
template <> really_inline simd_input(const uint8_t *ptr) {
really_inline simd_input<Architecture::WESTMERE> this->v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 0));
fill_input<Architecture::WESTMERE>(const uint8_t *ptr) { this->v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
struct simd_input<Architecture::WESTMERE> in; this->v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 32));
in.v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 0)); this->v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
in.v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16)); }
in.v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 32));
in.v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
return in;
}
template <> really_inline uint64_t eq(uint8_t m) {
really_inline uint64_t cmp_mask_against_input<Architecture::WESTMERE>( const __m128i mask = _mm_set1_epi8(m);
simd_input<Architecture::WESTMERE> in, uint8_t m) { __m128i cmp_res_0 = _mm_cmpeq_epi8(this->v0, mask);
const __m128i mask = _mm_set1_epi8(m); uint64_t res_0 = _mm_movemask_epi8(cmp_res_0);
__m128i cmp_res_0 = _mm_cmpeq_epi8(in.v0, mask); __m128i cmp_res_1 = _mm_cmpeq_epi8(this->v1, mask);
uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); uint64_t res_1 = _mm_movemask_epi8(cmp_res_1);
__m128i cmp_res_1 = _mm_cmpeq_epi8(in.v1, mask); __m128i cmp_res_2 = _mm_cmpeq_epi8(this->v2, mask);
uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); uint64_t res_2 = _mm_movemask_epi8(cmp_res_2);
__m128i cmp_res_2 = _mm_cmpeq_epi8(in.v2, mask); __m128i cmp_res_3 = _mm_cmpeq_epi8(this->v3, mask);
uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); uint64_t res_3 = _mm_movemask_epi8(cmp_res_3);
__m128i cmp_res_3 = _mm_cmpeq_epi8(in.v3, mask); return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48);
uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); }
return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48);
}
template <> really_inline uint64_t lteq(uint8_t m) {
really_inline uint64_t unsigned_lteq_against_input<Architecture::WESTMERE>( const __m128i maxval = _mm_set1_epi8(m);
simd_input<Architecture::WESTMERE> in, uint8_t m) { __m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v0), maxval);
const __m128i maxval = _mm_set1_epi8(m); uint64_t res_0 = _mm_movemask_epi8(cmp_res_0);
__m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v0), maxval); __m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v1), maxval);
uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); uint64_t res_1 = _mm_movemask_epi8(cmp_res_1);
__m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v1), maxval); __m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v2), maxval);
uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); uint64_t res_2 = _mm_movemask_epi8(cmp_res_2);
__m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v2), maxval); __m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v3), maxval);
uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); uint64_t res_3 = _mm_movemask_epi8(cmp_res_3);
__m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v3), maxval); return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48);
uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); }
return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48);
} }; // struct simd_input
} // namespace simdjson } // namespace simdjson
UNTARGET_REGION UNTARGET_REGION

View File

@ -6,15 +6,14 @@
namespace simdjson { namespace simdjson {
// Holds the state required to perform check_utf8(). // Checks UTF8, chunk by chunk.
template <Architecture> struct utf8_checking_state;
template <Architecture T> template <Architecture T>
void check_utf8(simd_input<T> in, utf8_checking_state<T> &state); struct utf8_checker {
// Process the next chunk of input.
// Checks if the utf8 validation has found any error. void check_next_input(simd_input<T> in);
template <Architecture T> // Find out what (if any) errors have occurred
ErrorValues check_utf8_errors(utf8_checking_state<T> &state); ErrorValues errors();
};
} // namespace simdjson } // namespace simdjson

View File

@ -177,12 +177,6 @@ check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous,
return pb; return pb;
} }
template <>
struct utf8_checking_state<Architecture::ARM64> {
int8x16_t has_error{};
processed_utf_bytes previous{};
};
// Checks that all bytes are ascii // Checks that all bytes are ascii
really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) { really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) {
// checking if the most significant bit is always equal to 0. // checking if the most significant bit is always equal to 0.
@ -198,41 +192,43 @@ really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) {
} }
template <> template <>
really_inline void check_utf8<Architecture::ARM64>( struct utf8_checker<Architecture::ARM64> {
simd_input<Architecture::ARM64> in, int8x16_t has_error{};
utf8_checking_state<Architecture::ARM64> &state) { processed_utf_bytes previous{};
if (check_ascii_neon(in)) {
// All bytes are ascii. Therefore the byte that was just before must be
// ascii too. We only check the byte that was just before simd_input. Nines
// are arbitrary values.
const int8x16_t verror =
(int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1};
state.has_error =
vorrq_s8(vreinterpretq_s8_u8(
vcgtq_s8(state.previous.carried_continuations, verror)),
state.has_error);
} else {
// it is not ascii so we have to do heavy work
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0),
&(state.previous), &(state.has_error));
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1),
&(state.previous), &(state.has_error));
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2),
&(state.previous), &(state.has_error));
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3),
&(state.previous), &(state.has_error));
}
}
template <> really_inline void check_next_input(simd_input<Architecture::ARM64> in) {
really_inline ErrorValues check_utf8_errors<Architecture::ARM64>( if (check_ascii_neon(in)) {
utf8_checking_state<Architecture::ARM64> &state) { // All bytes are ascii. Therefore the byte that was just before must be
uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); // ascii too. We only check the byte that was just before simd_input. Nines
uint32x2_t v32 = vqmovn_u64(v64); // are arbitrary values.
uint64x1_t result = vreinterpret_u64_u32(v32); const int8x16_t verror =
return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1};
: simdjson::SUCCESS; this->has_error =
} vorrq_s8(vreinterpretq_s8_u8(
vcgtq_s8(this->previous.carried_continuations, verror)),
this->has_error);
} else {
// it is not ascii so we have to do heavy work
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0),
&(this->previous), &(this->has_error));
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1),
&(this->previous), &(this->has_error));
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2),
&(this->previous), &(this->has_error));
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3),
&(this->previous), &(this->has_error));
}
}
really_inline ErrorValues errors() {
uint64x2_t v64 = vreinterpretq_u64_s8(this->has_error);
uint32x2_t v32 = vqmovn_u64(v64);
uint64x1_t result = vreinterpret_u64_u32(v32);
return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR
: simdjson::SUCCESS;
}
}; // struct utf8_checker
} // namespace simdjson } // namespace simdjson
#endif #endif

View File

@ -192,46 +192,43 @@ avx_check_utf8_bytes(__m256i current_bytes,
return pb; return pb;
} }
template <> struct utf8_checking_state<Architecture::HASWELL> { template <>
struct utf8_checker<Architecture::HASWELL> {
__m256i has_error; __m256i has_error;
avx_processed_utf_bytes previous; avx_processed_utf_bytes previous;
utf8_checking_state() {
utf8_checker() {
has_error = _mm256_setzero_si256(); has_error = _mm256_setzero_si256();
previous.raw_bytes = _mm256_setzero_si256(); previous.raw_bytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256(); previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256(); previous.carried_continuations = _mm256_setzero_si256();
} }
};
template <> really_inline void check_next_input(simd_input<Architecture::HASWELL> in) {
really_inline void check_utf8<Architecture::HASWELL>( __m256i high_bit = _mm256_set1_epi8(0x80u);
simd_input<Architecture::HASWELL> in, if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) {
utf8_checking_state<Architecture::HASWELL> &state) { // it is ascii, we just check continuation
__m256i high_bit = _mm256_set1_epi8(0x80u); this->has_error = _mm256_or_si256(
if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { _mm256_cmpgt_epi8(this->previous.carried_continuations,
// it is ascii, we just check continuation _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
state.has_error = _mm256_or_si256( 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
_mm256_cmpgt_epi8(state.previous.carried_continuations, 9, 9, 9, 9, 9, 9, 9, 1)),
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, this->has_error);
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, } else {
9, 9, 9, 9, 9, 9, 9, 1)), // it is not ascii so we have to do heavy work
state.has_error); this->previous =
} else { avx_check_utf8_bytes(in.lo, &(this->previous), &(this->has_error));
// it is not ascii so we have to do heavy work this->previous =
state.previous = avx_check_utf8_bytes(in.hi, &(this->previous), &(this->has_error));
avx_check_utf8_bytes(in.lo, &(state.previous), &(state.has_error)); }
state.previous =
avx_check_utf8_bytes(in.hi, &(state.previous), &(state.has_error));
} }
}
template <> really_inline ErrorValues errors() {
really_inline ErrorValues check_utf8_errors<Architecture::HASWELL>( return _mm256_testz_si256(this->has_error, this->has_error) == 0
utf8_checking_state<Architecture::HASWELL> &state) { ? simdjson::UTF8_ERROR
return _mm256_testz_si256(state.has_error, state.has_error) == 0 : simdjson::SUCCESS;
? simdjson::UTF8_ERROR }
: simdjson::SUCCESS; }; // struct utf8_checker
}
} // namespace simdjson } // namespace simdjson
UNTARGET_REGION // haswell UNTARGET_REGION // haswell

View File

@ -31,6 +31,7 @@
TARGET_WESTMERE TARGET_WESTMERE
namespace simdjson { namespace simdjson {
// all byte values must be no larger than 0xF4 // all byte values must be no larger than 0xF4
static inline void check_smaller_than_0xF4(__m128i current_bytes, static inline void check_smaller_than_0xF4(__m128i current_bytes,
__m128i *has_error) { __m128i *has_error) {
@ -164,58 +165,54 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
} }
template <> template <>
struct utf8_checking_state<Architecture::WESTMERE> { struct utf8_checker<Architecture::WESTMERE> {
__m128i has_error = _mm_setzero_si128(); __m128i has_error = _mm_setzero_si128();
processed_utf_bytes previous{ processed_utf_bytes previous{
_mm_setzero_si128(), // raw_bytes _mm_setzero_si128(), // raw_bytes
_mm_setzero_si128(), // high_nibbles _mm_setzero_si128(), // high_nibbles
_mm_setzero_si128() // carried_continuations _mm_setzero_si128() // carried_continuations
}; };
};
template <> really_inline void check_next_input(simd_input<Architecture::WESTMERE> in) {
really_inline void check_utf8<Architecture::WESTMERE>( __m128i high_bit = _mm_set1_epi8(0x80u);
simd_input<Architecture::WESTMERE> in, if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) {
utf8_checking_state<Architecture::WESTMERE> &state) { // it is ascii, we just check continuation
__m128i high_bit = _mm_set1_epi8(0x80u); this->has_error =
if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { _mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations,
// it is ascii, we just check continuation _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
state.has_error = 9, 9, 9, 9, 9, 1)),
_mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, this->has_error);
_mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, } else {
9, 9, 9, 9, 9, 1)), // it is not ascii so we have to do heavy work
state.has_error); this->previous =
} else { check_utf8_bytes(in.v0, &(this->previous), &(this->has_error));
// it is not ascii so we have to do heavy work this->previous =
state.previous = check_utf8_bytes(in.v1, &(this->previous), &(this->has_error));
check_utf8_bytes(in.v0, &(state.previous), &(state.has_error)); }
state.previous =
check_utf8_bytes(in.v1, &(state.previous), &(state.has_error)); if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) {
// it is ascii, we just check continuation
this->has_error =
_mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations,
_mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 1)),
this->has_error);
} else {
// it is not ascii so we have to do heavy work
this->previous =
check_utf8_bytes(in.v2, &(this->previous), &(this->has_error));
this->previous =
check_utf8_bytes(in.v3, &(this->previous), &(this->has_error));
}
} }
if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { really_inline ErrorValues errors() {
// it is ascii, we just check continuation return _mm_testz_si128(this->has_error, this->has_error) == 0
state.has_error = ? simdjson::UTF8_ERROR
_mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, : simdjson::SUCCESS;
_mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 1)),
state.has_error);
} else {
// it is not ascii so we have to do heavy work
state.previous =
check_utf8_bytes(in.v2, &(state.previous), &(state.has_error));
state.previous =
check_utf8_bytes(in.v3, &(state.previous), &(state.has_error));
} }
}
template <> }; // struct utf8_checker
really_inline ErrorValues check_utf8_errors<Architecture::WESTMERE>(
utf8_checking_state<Architecture::WESTMERE> &state) {
return _mm_testz_si128(state.has_error, state.has_error) == 0
? simdjson::UTF8_ERROR
: simdjson::SUCCESS;
}
} // namespace simdjson } // namespace simdjson
UNTARGET_REGION // westmere UNTARGET_REGION // westmere

View File

@ -25,16 +25,6 @@ namespace {
} }
} // namespace } // namespace
// Holds the state required to perform check_utf8().
template <Architecture> struct utf8_checking_state;
template <Architecture T>
void check_utf8(simd_input<T> in, utf8_checking_state<T> &state);
// Checks if the utf8 validation has found any error.
template <Architecture T>
ErrorValues check_utf8_errors(utf8_checking_state<T> &state);
template <Architecture T> template <Architecture T>
really_inline uint64_t find_odd_backslash_sequences( really_inline uint64_t find_odd_backslash_sequences(
simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash); simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash);

View File

@ -24,7 +24,7 @@ really_inline uint64_t find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
uint64_t &prev_iter_ends_odd_backslash) { uint64_t &prev_iter_ends_odd_backslash) {
const uint64_t even_bits = 0x5555555555555555ULL; const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits; const uint64_t odd_bits = ~even_bits;
uint64_t bs_bits = cmp_mask_against_input<TARGETED_ARCHITECTURE>(in, '\\'); uint64_t bs_bits = in.eq('\\');
uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t start_edges = bs_bits & ~(bs_bits << 1);
/* flip lowest if we have an odd-length run at the end of the prior /* flip lowest if we have an odd-length run at the end of the prior
* iteration */ * iteration */
@ -71,7 +71,7 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
simd_input<TARGETED_ARCHITECTURE> in, uint64_t odd_ends, simd_input<TARGETED_ARCHITECTURE> in, uint64_t odd_ends,
uint64_t &prev_iter_inside_quote, uint64_t &quote_bits, uint64_t &prev_iter_inside_quote, uint64_t &quote_bits,
uint64_t &error_mask) { uint64_t &error_mask) {
quote_bits = cmp_mask_against_input<TARGETED_ARCHITECTURE>(in, '"'); quote_bits = in.eq('"');
quote_bits = quote_bits & ~odd_ends; quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = compute_quote_mask<TARGETED_ARCHITECTURE>(quote_bits); uint64_t quote_mask = compute_quote_mask<TARGETED_ARCHITECTURE>(quote_bits);
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
@ -80,8 +80,7 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
* quotation mark, reverse solidus, and the control characters (U+0000 * quotation mark, reverse solidus, and the control characters (U+0000
* through U+001F). * through U+001F).
* https://tools.ietf.org/html/rfc8259 */ * https://tools.ietf.org/html/rfc8259 */
uint64_t unescaped = uint64_t unescaped = in.lteq(0x1F);
unsigned_lteq_against_input<TARGETED_ARCHITECTURE>(in, 0x1F);
error_mask |= quote_mask & unescaped; error_mask |= quote_mask & unescaped;
/* right shift of a signed value expected to be well-defined and standard /* right shift of a signed value expected to be well-defined and standard
* compliant as of C++20, * compliant as of C++20,
@ -97,9 +96,9 @@ really_inline void find_structural_bits_64(
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
uint64_t &error_mask, uint64_t &error_mask,
utf8_checking_state<TARGETED_ARCHITECTURE> &utf8_state) { utf8_checker<TARGETED_ARCHITECTURE> &utf8_state) {
simd_input<TARGETED_ARCHITECTURE> in = fill_input<TARGETED_ARCHITECTURE>(buf); simd_input<TARGETED_ARCHITECTURE> in(buf);
check_utf8<TARGETED_ARCHITECTURE>(in, utf8_state); utf8_state.check_next_input(in);
/* detect odd sequences of backslashes */ /* detect odd sequences of backslashes */
uint64_t odd_ends = find_odd_backslash_sequences<TARGETED_ARCHITECTURE>( uint64_t odd_ends = find_odd_backslash_sequences<TARGETED_ARCHITECTURE>(
in, prev_iter_ends_odd_backslash); in, prev_iter_ends_odd_backslash);
@ -136,7 +135,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
} }
uint32_t *base_ptr = pj.structural_indexes; uint32_t *base_ptr = pj.structural_indexes;
uint32_t base = 0; uint32_t base = 0;
utf8_checking_state<TARGETED_ARCHITECTURE> utf8_state; utf8_checker<TARGETED_ARCHITECTURE> utf8_state;
/* we have padded the input out to 64 byte multiple with the remainder /* we have padded the input out to 64 byte multiple with the remainder
* being zeros persistent state across loop does the last iteration end * being zeros persistent state across loop does the last iteration end
@ -208,8 +207,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
} }
if (len != base_ptr[pj.n_structural_indexes - 1]) { if (len != base_ptr[pj.n_structural_indexes - 1]) {
/* the string might not be NULL terminated, but we add a virtual NULL /* the string might not be NULL terminated, but we add a virtual NULL
* ending * ending character. */
* character. */
base_ptr[pj.n_structural_indexes++] = len; base_ptr[pj.n_structural_indexes++] = len;
} }
/* make it safe to dereference one beyond this array */ /* make it safe to dereference one beyond this array */
@ -217,7 +215,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
if (error_mask) { if (error_mask) {
return simdjson::UNESCAPED_CHARS; return simdjson::UNESCAPED_CHARS;
} }
return check_utf8_errors<TARGETED_ARCHITECTURE>(utf8_state); return utf8_state.errors();
} }
} // namespace simdjson } // namespace simdjson