diff --git a/include/simdjson/simd_input.h b/include/simdjson/simd_input.h index f834c442..62948df4 100644 --- a/include/simdjson/simd_input.h +++ b/include/simdjson/simd_input.h @@ -8,19 +8,14 @@ namespace simdjson { -template struct simd_input; - -template -simd_input fill_input(const uint8_t *ptr); - -// a straightforward comparison of a mask against input. -template -uint64_t cmp_mask_against_input(simd_input in, uint8_t m); - -// find all values less than or equal than the content of maxval (using unsigned -// arithmetic) -template -uint64_t unsigned_lteq_against_input(simd_input in, uint8_t m); +template +struct simd_input { + simd_input(const uint8_t *ptr); + // a straightforward comparison of a mask against input. + uint64_t eq(uint8_t m); + // find all values less than or equal than the content of maxval (using unsigned arithmetic) + uint64_t lteq(uint8_t m); +}; // struct simd_input } // namespace simdjson diff --git a/include/simdjson/simd_input_arm64.h b/include/simdjson/simd_input_arm64.h index 658194a2..206e669a 100644 --- a/include/simdjson/simd_input_arm64.h +++ b/include/simdjson/simd_input_arm64.h @@ -6,28 +6,9 @@ #ifdef IS_ARM64 namespace simdjson { -template <> -struct simd_input { - uint8x16_t i0; - uint8x16_t i1; - uint8x16_t i2; - uint8x16_t i3; -}; - -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.i0 = vld1q_u8(ptr + 0); - in.i1 = vld1q_u8(ptr + 16); - in.i2 = vld1q_u8(ptr + 32); - in.i3 = vld1q_u8(ptr + 48); - return in; -} - really_inline uint16_t neon_movemask(uint8x16_t input) { const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; uint8x16_t minput = vandq_u8(input, bit_mask); uint8x16_t tmp = vpaddq_u8(minput, minput); tmp = vpaddq_u8(tmp, tmp); @@ -38,7 +19,7 @@ really_inline uint16_t neon_movemask(uint8x16_t input) { really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) { const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; uint8x16_t t0 = vandq_u8(p0, bit_mask); uint8x16_t t1 = vandq_u8(p1, bit_mask); uint8x16_t t2 = vandq_u8(p2, bit_mask); @@ -51,26 +32,38 @@ really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, } template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const uint8x16_t mask = vmovq_n_u8(m); - uint8x16_t cmp_res_0 = vceqq_u8(in.i0, mask); - uint8x16_t cmp_res_1 = vceqq_u8(in.i1, mask); - uint8x16_t cmp_res_2 = vceqq_u8(in.i2, mask); - uint8x16_t cmp_res_3 = vceqq_u8(in.i3, mask); - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); -} +struct simd_input { + uint8x16_t i0; + uint8x16_t i1; + uint8x16_t i2; + uint8x16_t i3; -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const uint8x16_t mask = vmovq_n_u8(m); - uint8x16_t cmp_res_0 = vcleq_u8(in.i0, mask); - uint8x16_t cmp_res_1 = vcleq_u8(in.i1, mask); - uint8x16_t cmp_res_2 = vcleq_u8(in.i2, mask); - uint8x16_t cmp_res_3 = vcleq_u8(in.i3, mask); - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); -} + really_inline simd_input(const uint8_t *ptr) { + this->i0 = vld1q_u8(ptr + 0); + this->i1 = vld1q_u8(ptr + 16); + this->i2 = vld1q_u8(ptr + 32); + this->i3 = vld1q_u8(ptr + 48); + } + + really_inline uint64_t eq(uint8_t m) { + const uint8x16_t mask = vmovq_n_u8(m); + uint8x16_t cmp_res_0 = vceqq_u8(this->i0, mask); + uint8x16_t cmp_res_1 = vceqq_u8(this->i1, mask); + uint8x16_t cmp_res_2 = vceqq_u8(this->i2, mask); + uint8x16_t cmp_res_3 = vceqq_u8(this->i3, mask); + return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); + } + + really_inline uint64_t lteq(uint8_t m) { + const uint8x16_t mask = vmovq_n_u8(m); + uint8x16_t cmp_res_0 = vcleq_u8(this->i0, mask); + uint8x16_t cmp_res_1 = vcleq_u8(this->i1, mask); + uint8x16_t cmp_res_2 = vcleq_u8(this->i2, mask); + uint8x16_t cmp_res_3 = vcleq_u8(this->i3, mask); + return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); + } + +}; // struct simd_input } // namespace simdjson diff --git a/include/simdjson/simd_input_haswell.h b/include/simdjson/simd_input_haswell.h index 8f5b9973..30fc933c 100644 --- a/include/simdjson/simd_input_haswell.h +++ b/include/simdjson/simd_input_haswell.h @@ -12,38 +12,31 @@ template <> struct simd_input { __m256i lo; __m256i hi; -}; -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.lo = _mm256_loadu_si256(reinterpret_cast(ptr + 0)); - in.hi = _mm256_loadu_si256(reinterpret_cast(ptr + 32)); - return in; -} + really_inline simd_input(const uint8_t *ptr) { + this->lo = _mm256_loadu_si256(reinterpret_cast(ptr + 0)); + this->hi = _mm256_loadu_si256(reinterpret_cast(ptr + 32)); + } -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const __m256i mask = _mm256_set1_epi8(m); - __m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask); - uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); - __m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask); - uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); - return res_0 | (res_1 << 32); -} + really_inline uint64_t eq(uint8_t m) { + const __m256i mask = _mm256_set1_epi8(m); + __m256i cmp_res_0 = _mm256_cmpeq_epi8(this->lo, mask); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(this->hi, mask); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); + } -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const __m256i maxval = _mm256_set1_epi8(m); - __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.lo), maxval); - uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); - __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.hi), maxval); - uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); - return res_0 | (res_1 << 32); -} + really_inline uint64_t lteq(uint8_t m) { + const __m256i maxval = _mm256_set1_epi8(m); + __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, this->lo), maxval); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, this->hi), maxval); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); + } + +}; // struct simd_input } // namespace simdjson UNTARGET_REGION diff --git a/include/simdjson/simd_input_westmere.h b/include/simdjson/simd_input_westmere.h index 3082946c..dfd4b118 100644 --- a/include/simdjson/simd_input_westmere.h +++ b/include/simdjson/simd_input_westmere.h @@ -14,48 +14,41 @@ struct simd_input { __m128i v1; __m128i v2; __m128i v3; -}; -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.v0 = _mm_loadu_si128(reinterpret_cast(ptr + 0)); - in.v1 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); - in.v2 = _mm_loadu_si128(reinterpret_cast(ptr + 32)); - in.v3 = _mm_loadu_si128(reinterpret_cast(ptr + 48)); - return in; -} + really_inline simd_input(const uint8_t *ptr) { + this->v0 = _mm_loadu_si128(reinterpret_cast(ptr + 0)); + this->v1 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); + this->v2 = _mm_loadu_si128(reinterpret_cast(ptr + 32)); + this->v3 = _mm_loadu_si128(reinterpret_cast(ptr + 48)); + } -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const __m128i mask = _mm_set1_epi8(m); - __m128i cmp_res_0 = _mm_cmpeq_epi8(in.v0, mask); - uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); - __m128i cmp_res_1 = _mm_cmpeq_epi8(in.v1, mask); - uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); - __m128i cmp_res_2 = _mm_cmpeq_epi8(in.v2, mask); - uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); - __m128i cmp_res_3 = _mm_cmpeq_epi8(in.v3, mask); - uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); - return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); -} + really_inline uint64_t eq(uint8_t m) { + const __m128i mask = _mm_set1_epi8(m); + __m128i cmp_res_0 = _mm_cmpeq_epi8(this->v0, mask); + uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); + __m128i cmp_res_1 = _mm_cmpeq_epi8(this->v1, mask); + uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); + __m128i cmp_res_2 = _mm_cmpeq_epi8(this->v2, mask); + uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); + __m128i cmp_res_3 = _mm_cmpeq_epi8(this->v3, mask); + uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); + return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); + } -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const __m128i maxval = _mm_set1_epi8(m); - __m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v0), maxval); - uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); - __m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v1), maxval); - uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); - __m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v2), maxval); - uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); - __m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v3), maxval); - uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); - return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); -} + really_inline uint64_t lteq(uint8_t m) { + const __m128i maxval = _mm_set1_epi8(m); + __m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v0), maxval); + uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); + __m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v1), maxval); + uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); + __m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v2), maxval); + uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); + __m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, this->v3), maxval); + uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); + return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); + } + +}; // struct simd_input } // namespace simdjson UNTARGET_REGION diff --git a/include/simdjson/simdutf8check.h b/include/simdjson/simdutf8check.h index 6097e28e..2d98703d 100644 --- a/include/simdjson/simdutf8check.h +++ b/include/simdjson/simdutf8check.h @@ -6,15 +6,14 @@ namespace simdjson { -// Holds the state required to perform check_utf8(). -template struct utf8_checking_state; - +// Checks UTF8, chunk by chunk. template -void check_utf8(simd_input in, utf8_checking_state &state); - -// Checks if the utf8 validation has found any error. -template -ErrorValues check_utf8_errors(utf8_checking_state &state); +struct utf8_checker { + // Process the next chunk of input. + void check_next_input(simd_input in); + // Find out what (if any) errors have occurred + ErrorValues errors(); +}; } // namespace simdjson diff --git a/include/simdjson/simdutf8check_arm64.h b/include/simdjson/simdutf8check_arm64.h index 4b0baa30..2d8e3ec3 100644 --- a/include/simdjson/simdutf8check_arm64.h +++ b/include/simdjson/simdutf8check_arm64.h @@ -177,12 +177,6 @@ check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous, return pb; } -template <> -struct utf8_checking_state { - int8x16_t has_error{}; - processed_utf_bytes previous{}; -}; - // Checks that all bytes are ascii really_inline bool check_ascii_neon(simd_input in) { // checking if the most significant bit is always equal to 0. @@ -198,41 +192,43 @@ really_inline bool check_ascii_neon(simd_input in) { } template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - if (check_ascii_neon(in)) { - // All bytes are ascii. Therefore the byte that was just before must be - // ascii too. We only check the byte that was just before simd_input. Nines - // are arbitrary values. - const int8x16_t verror = - (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1}; - state.has_error = - vorrq_s8(vreinterpretq_s8_u8( - vcgtq_s8(state.previous.carried_continuations, verror)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3), - &(state.previous), &(state.has_error)); - } -} +struct utf8_checker { + int8x16_t has_error{}; + processed_utf_bytes previous{}; -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); - uint32x2_t v32 = vqmovn_u64(v64); - uint64x1_t result = vreinterpret_u64_u32(v32); - return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} + really_inline void check_next_input(simd_input in) { + if (check_ascii_neon(in)) { + // All bytes are ascii. Therefore the byte that was just before must be + // ascii too. We only check the byte that was just before simd_input. Nines + // are arbitrary values. + const int8x16_t verror = + (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1}; + this->has_error = + vorrq_s8(vreinterpretq_s8_u8( + vcgtq_s8(this->previous.carried_continuations, verror)), + this->has_error); + } else { + // it is not ascii so we have to do heavy work + this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0), + &(this->previous), &(this->has_error)); + this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1), + &(this->previous), &(this->has_error)); + this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2), + &(this->previous), &(this->has_error)); + this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3), + &(this->previous), &(this->has_error)); + } + } + + really_inline ErrorValues errors() { + uint64x2_t v64 = vreinterpretq_u64_s8(this->has_error); + uint32x2_t v32 = vqmovn_u64(v64); + uint64x1_t result = vreinterpret_u64_u32(v32); + return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; + } + +}; // struct utf8_checker } // namespace simdjson #endif diff --git a/include/simdjson/simdutf8check_haswell.h b/include/simdjson/simdutf8check_haswell.h index 355d6247..e99d92dd 100644 --- a/include/simdjson/simdutf8check_haswell.h +++ b/include/simdjson/simdutf8check_haswell.h @@ -192,46 +192,43 @@ avx_check_utf8_bytes(__m256i current_bytes, return pb; } -template <> struct utf8_checking_state { +template <> +struct utf8_checker { __m256i has_error; avx_processed_utf_bytes previous; - utf8_checking_state() { + + utf8_checker() { has_error = _mm256_setzero_si256(); previous.raw_bytes = _mm256_setzero_si256(); previous.high_nibbles = _mm256_setzero_si256(); previous.carried_continuations = _mm256_setzero_si256(); } -}; -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - __m256i high_bit = _mm256_set1_epi8(0x80u); - if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = _mm256_or_si256( - _mm256_cmpgt_epi8(state.previous.carried_continuations, - _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - avx_check_utf8_bytes(in.lo, &(state.previous), &(state.has_error)); - state.previous = - avx_check_utf8_bytes(in.hi, &(state.previous), &(state.has_error)); + really_inline void check_next_input(simd_input in) { + __m256i high_bit = _mm256_set1_epi8(0x80u); + if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { + // it is ascii, we just check continuation + this->has_error = _mm256_or_si256( + _mm256_cmpgt_epi8(this->previous.carried_continuations, + _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 1)), + this->has_error); + } else { + // it is not ascii so we have to do heavy work + this->previous = + avx_check_utf8_bytes(in.lo, &(this->previous), &(this->has_error)); + this->previous = + avx_check_utf8_bytes(in.hi, &(this->previous), &(this->has_error)); + } } -} -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - return _mm256_testz_si256(state.has_error, state.has_error) == 0 - ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} + really_inline ErrorValues errors() { + return _mm256_testz_si256(this->has_error, this->has_error) == 0 + ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; + } +}; // struct utf8_checker } // namespace simdjson UNTARGET_REGION // haswell diff --git a/include/simdjson/simdutf8check_westmere.h b/include/simdjson/simdutf8check_westmere.h index 46361cdb..b1c1ae91 100644 --- a/include/simdjson/simdutf8check_westmere.h +++ b/include/simdjson/simdutf8check_westmere.h @@ -31,6 +31,7 @@ TARGET_WESTMERE namespace simdjson { + // all byte values must be no larger than 0xF4 static inline void check_smaller_than_0xF4(__m128i current_bytes, __m128i *has_error) { @@ -164,58 +165,54 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous, } template <> -struct utf8_checking_state { +struct utf8_checker { __m128i has_error = _mm_setzero_si128(); processed_utf_bytes previous{ _mm_setzero_si128(), // raw_bytes _mm_setzero_si128(), // high_nibbles _mm_setzero_si128() // carried_continuations }; -}; -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - __m128i high_bit = _mm_set1_epi8(0x80u); - if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = - _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, - _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - check_utf8_bytes(in.v0, &(state.previous), &(state.has_error)); - state.previous = - check_utf8_bytes(in.v1, &(state.previous), &(state.has_error)); + really_inline void check_next_input(simd_input in) { + __m128i high_bit = _mm_set1_epi8(0x80u); + if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { + // it is ascii, we just check continuation + this->has_error = + _mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations, + _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 1)), + this->has_error); + } else { + // it is not ascii so we have to do heavy work + this->previous = + check_utf8_bytes(in.v0, &(this->previous), &(this->has_error)); + this->previous = + check_utf8_bytes(in.v1, &(this->previous), &(this->has_error)); + } + + if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { + // it is ascii, we just check continuation + this->has_error = + _mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations, + _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 1)), + this->has_error); + } else { + // it is not ascii so we have to do heavy work + this->previous = + check_utf8_bytes(in.v2, &(this->previous), &(this->has_error)); + this->previous = + check_utf8_bytes(in.v3, &(this->previous), &(this->has_error)); + } } - if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = - _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, - _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - check_utf8_bytes(in.v2, &(state.previous), &(state.has_error)); - state.previous = - check_utf8_bytes(in.v3, &(state.previous), &(state.has_error)); + really_inline ErrorValues errors() { + return _mm_testz_si128(this->has_error, this->has_error) == 0 + ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; } -} -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - return _mm_testz_si128(state.has_error, state.has_error) == 0 - ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} +}; // struct utf8_checker } // namespace simdjson UNTARGET_REGION // westmere diff --git a/include/simdjson/stage1_find_marks.h b/include/simdjson/stage1_find_marks.h index f0a644e7..a1863e6d 100644 --- a/include/simdjson/stage1_find_marks.h +++ b/include/simdjson/stage1_find_marks.h @@ -25,16 +25,6 @@ namespace { } } // namespace -// Holds the state required to perform check_utf8(). -template struct utf8_checking_state; - -template -void check_utf8(simd_input in, utf8_checking_state &state); - -// Checks if the utf8 validation has found any error. -template -ErrorValues check_utf8_errors(utf8_checking_state &state); - template really_inline uint64_t find_odd_backslash_sequences( simd_input in, uint64_t &prev_iter_ends_odd_backslash); diff --git a/include/simdjson/stage1_find_marks_common.h b/include/simdjson/stage1_find_marks_common.h index 26ef6ad1..eb546491 100644 --- a/include/simdjson/stage1_find_marks_common.h +++ b/include/simdjson/stage1_find_marks_common.h @@ -24,7 +24,7 @@ really_inline uint64_t find_odd_backslash_sequences( uint64_t &prev_iter_ends_odd_backslash) { const uint64_t even_bits = 0x5555555555555555ULL; const uint64_t odd_bits = ~even_bits; - uint64_t bs_bits = cmp_mask_against_input(in, '\\'); + uint64_t bs_bits = in.eq('\\'); uint64_t start_edges = bs_bits & ~(bs_bits << 1); /* flip lowest if we have an odd-length run at the end of the prior * iteration */ @@ -71,7 +71,7 @@ really_inline uint64_t find_quote_mask_and_bits( simd_input in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t &error_mask) { - quote_bits = cmp_mask_against_input(in, '"'); + quote_bits = in.eq('"'); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = compute_quote_mask(quote_bits); quote_mask ^= prev_iter_inside_quote; @@ -80,8 +80,7 @@ really_inline uint64_t find_quote_mask_and_bits( * quotation mark, reverse solidus, and the control characters (U+0000 * through U+001F). * https://tools.ietf.org/html/rfc8259 */ - uint64_t unescaped = - unsigned_lteq_against_input(in, 0x1F); + uint64_t unescaped = in.lteq(0x1F); error_mask |= quote_mask & unescaped; /* right shift of a signed value expected to be well-defined and standard * compliant as of C++20, @@ -97,9 +96,9 @@ really_inline void find_structural_bits_64( uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, uint64_t &error_mask, - utf8_checking_state &utf8_state) { - simd_input in = fill_input(buf); - check_utf8(in, utf8_state); + utf8_checker &utf8_state) { + simd_input in(buf); + utf8_state.check_next_input(in); /* detect odd sequences of backslashes */ uint64_t odd_ends = find_odd_backslash_sequences( in, prev_iter_ends_odd_backslash); @@ -136,7 +135,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, } uint32_t *base_ptr = pj.structural_indexes; uint32_t base = 0; - utf8_checking_state utf8_state; + utf8_checker utf8_state; /* we have padded the input out to 64 byte multiple with the remainder * being zeros persistent state across loop does the last iteration end @@ -208,8 +207,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, } if (len != base_ptr[pj.n_structural_indexes - 1]) { /* the string might not be NULL terminated, but we add a virtual NULL - * ending - * character. */ + * ending character. */ base_ptr[pj.n_structural_indexes++] = len; } /* make it safe to dereference one beyond this array */ @@ -217,7 +215,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, if (error_mask) { return simdjson::UNESCAPED_CHARS; } - return check_utf8_errors(utf8_state); + return utf8_state.errors(); } } // namespace simdjson