Use simd_input generic methods for utf8 checking (#301)
* Use generic each/reduce in simdutf8check * Remove macros from generic simd_input uses * Use array instead of members to store simd registers * Default local checkperf to clone from .
This commit is contained in:
parent
5765c81f66
commit
f7e893667d
|
@ -14,6 +14,8 @@ steps:
|
|||
- make amalgamate
|
||||
- name: checkperf
|
||||
image: gcc:8
|
||||
environment:
|
||||
CHECKPERF_REPOSITORY: https://github.com/lemire/simdjson
|
||||
commands:
|
||||
- make checkperf
|
||||
---
|
||||
|
@ -33,6 +35,8 @@ steps:
|
|||
- make amalgamate
|
||||
- name: checkperf
|
||||
image: gcc:8
|
||||
environment:
|
||||
CHECKPERF_REPOSITORY: https://github.com/lemire/simdjson
|
||||
commands:
|
||||
- make checkperf
|
||||
---
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
set -e
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
|
||||
if [ -z "$CHECKPERF_REPOSITORY"]; then CHECKPERF_REPOSITORY=.; fi
|
||||
|
||||
# Arguments: perfdiff.sh <branch> <test json files>
|
||||
if [ -z "$1" ]; then reference_branch="master"; else reference_branch=$1; shift; fi
|
||||
if [ -z "$*" ]; then perftests="jsonexamples/twitter.json"; else perftests=$*; fi
|
||||
|
@ -13,7 +15,7 @@ current=$SCRIPTPATH/..
|
|||
reference=$current/benchbranch/$reference_branch
|
||||
rm -rf $reference
|
||||
mkdir -p $reference
|
||||
git clone --depth 1 -b $reference_branch https://github.com/lemire/simdjson $reference
|
||||
git clone --depth 1 -b $reference_branch $CHECKPERF_REPOSITORY $reference
|
||||
cd $reference
|
||||
make parse
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#ifdef IS_ARM64
|
||||
|
||||
namespace simdjson {
|
||||
namespace simdjson::arm64 {
|
||||
|
||||
really_inline uint16_t neon_movemask(uint8x16_t input) {
|
||||
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
|
@ -32,49 +32,68 @@ really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1,
|
|||
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
|
||||
}
|
||||
|
||||
} // namespace simdjson::arm64
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
using namespace simdjson::arm64;
|
||||
|
||||
template <>
|
||||
struct simd_input<Architecture::ARM64> {
|
||||
uint8x16_t i0;
|
||||
uint8x16_t i1;
|
||||
uint8x16_t i2;
|
||||
uint8x16_t i3;
|
||||
uint8x16_t chunks[4];
|
||||
|
||||
really_inline simd_input(const uint8_t *ptr) {
|
||||
this->i0 = vld1q_u8(ptr + 0);
|
||||
this->i1 = vld1q_u8(ptr + 16);
|
||||
this->i2 = vld1q_u8(ptr + 32);
|
||||
this->i3 = vld1q_u8(ptr + 48);
|
||||
this->chunks[0] = vld1q_u8(ptr + 0*16);
|
||||
this->chunks[1] = vld1q_u8(ptr + 1*16);
|
||||
this->chunks[2] = vld1q_u8(ptr + 2*16);
|
||||
this->chunks[3] = vld1q_u8(ptr + 3*16);
|
||||
}
|
||||
|
||||
really_inline simd_input(uint8x16_t a0, uint8x16_t a1, uint8x16_t a2, uint8x16_t a3) {
|
||||
this->i0 = a0;
|
||||
this->i1 = a1;
|
||||
this->i2 = a2;
|
||||
this->i3 = a3;
|
||||
really_inline simd_input(uint8x16_t chunk0, uint8x16_t chunk1, uint8x16_t chunk2, uint8x16_t chunk3) {
|
||||
this->chunks[0] = chunk0;
|
||||
this->chunks[1] = chunk1;
|
||||
this->chunks[2] = chunk2;
|
||||
this->chunks[3] = chunk3;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk)
|
||||
{
|
||||
each_chunk(this->chunks[0]);
|
||||
each_chunk(this->chunks[1]);
|
||||
each_chunk(this->chunks[2]);
|
||||
each_chunk(this->chunks[3]);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd_input<Architecture::ARM64> map(F const& map_chunk) {
|
||||
return simd_input<Architecture::ARM64>(
|
||||
map_chunk(this->i0),
|
||||
map_chunk(this->i1),
|
||||
map_chunk(this->i2),
|
||||
map_chunk(this->i3)
|
||||
map_chunk(this->chunks[0]),
|
||||
map_chunk(this->chunks[1]),
|
||||
map_chunk(this->chunks[2]),
|
||||
map_chunk(this->chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd_input<Architecture::ARM64> map(simd_input<Architecture::ARM64> b, F const& map_chunk) {
|
||||
return simd_input<Architecture::ARM64>(
|
||||
map_chunk(this->i0, b.i0),
|
||||
map_chunk(this->i1, b.i1),
|
||||
map_chunk(this->i2, b.i2),
|
||||
map_chunk(this->i3, b.i3)
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
map_chunk(this->chunks[1], b.chunks[1]),
|
||||
map_chunk(this->chunks[2], b.chunks[2]),
|
||||
map_chunk(this->chunks[3], b.chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline uint8x16_t reduce(F const& reduce_pair) {
|
||||
uint8x16_t r01 = reduce_pair(this->chunks[0], this->chunks[1]);
|
||||
uint8x16_t r23 = reduce_pair(this->chunks[2], this->chunks[3]);
|
||||
return reduce_pair(r01, r23);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() {
|
||||
return neon_movemask_bulk(this->i0, this->i1, this->i2, this->i3);
|
||||
return neon_movemask_bulk(this->chunks[0], this->chunks[1], this->chunks[2], this->chunks[3]);
|
||||
}
|
||||
|
||||
really_inline uint64_t eq(uint8_t m) {
|
||||
|
|
|
@ -181,11 +181,11 @@ check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous,
|
|||
really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) {
|
||||
// checking if the most significant bit is always equal to 0.
|
||||
uint8x16_t high_bit = vdupq_n_u8(0x80);
|
||||
uint8x16_t t0 = vorrq_u8(in.i0, in.i1);
|
||||
uint8x16_t t1 = vorrq_u8(in.i2, in.i3);
|
||||
uint8x16_t t3 = vorrq_u8(t0, t1);
|
||||
uint8x16_t t4 = vandq_u8(t3, high_bit);
|
||||
uint64x2_t v64 = vreinterpretq_u64_u8(t4);
|
||||
uint8x16_t any_bits_on = in.reduce([&](auto a, auto b) {
|
||||
return vorrq_u8(a, b);
|
||||
});
|
||||
uint8x16_t high_bit_on = vandq_u8(any_bits_on, high_bit);
|
||||
uint64x2_t v64 = vreinterpretq_u64_u8(high_bit_on);
|
||||
uint32x2_t v32 = vqmovn_u64(v64);
|
||||
uint64x1_t result = vreinterpret_u64_u32(v32);
|
||||
return vget_lane_u64(result, 0) == 0;
|
||||
|
@ -215,14 +215,9 @@ struct utf8_checker<Architecture::ARM64> {
|
|||
this->has_error);
|
||||
} else {
|
||||
// it is not ascii so we have to do heavy work
|
||||
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0),
|
||||
&(this->previous), &(this->has_error));
|
||||
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1),
|
||||
&(this->previous), &(this->has_error));
|
||||
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2),
|
||||
&(this->previous), &(this->has_error));
|
||||
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3),
|
||||
&(this->previous), &(this->has_error));
|
||||
in.each([&](auto _in) {
|
||||
this->previous = check_utf8_bytes(vreinterpretq_s8_u8(_in), &(this->previous), &(this->has_error));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -39,10 +39,14 @@ really_inline void find_whitespace_and_structurals(
|
|||
});
|
||||
|
||||
const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
|
||||
structurals = MAP_BITMASK( v, vtstq_u8(_v, structural_shufti_mask) );
|
||||
structurals = v.map([&](auto _v) {
|
||||
return vtstq_u8(_v, structural_shufti_mask);
|
||||
}).to_bitmask();
|
||||
|
||||
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
|
||||
whitespace = MAP_BITMASK( v, vtstq_u8(_v, whitespace_shufti_mask) );
|
||||
whitespace = v.map([&](auto _v) {
|
||||
return vtstq_u8(_v, whitespace_shufti_mask);
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
#include "generic/stage1_find_marks_flatten.h"
|
||||
|
|
|
@ -10,38 +10,51 @@ namespace simdjson {
|
|||
|
||||
template <>
|
||||
struct simd_input<Architecture::HASWELL> {
|
||||
__m256i lo;
|
||||
__m256i hi;
|
||||
__m256i chunks[2];
|
||||
|
||||
really_inline simd_input(const uint8_t *ptr) {
|
||||
this->lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0));
|
||||
this->hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
|
||||
really_inline simd_input(const uint8_t *ptr)
|
||||
{
|
||||
this->chunks[0] = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0*32));
|
||||
this->chunks[1] = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 1*32));
|
||||
}
|
||||
|
||||
really_inline simd_input(__m256i a_lo, __m256i a_hi) {
|
||||
this->lo = a_lo;
|
||||
this->hi = a_hi;
|
||||
really_inline simd_input(__m256i chunk0, __m256i chunk1)
|
||||
{
|
||||
this->chunks[0] = chunk0;
|
||||
this->chunks[1] = chunk1;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk)
|
||||
{
|
||||
each_chunk(this->chunks[0]);
|
||||
each_chunk(this->chunks[1]);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd_input<Architecture::HASWELL> map(F const& map_chunk) {
|
||||
return simd_input<Architecture::HASWELL>(
|
||||
map_chunk(this->lo),
|
||||
map_chunk(this->hi)
|
||||
map_chunk(this->chunks[0]),
|
||||
map_chunk(this->chunks[1])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd_input<Architecture::HASWELL> map(simd_input<Architecture::HASWELL> b, F const& map_chunk) {
|
||||
return simd_input<Architecture::HASWELL>(
|
||||
map_chunk(this->lo, b.lo),
|
||||
map_chunk(this->hi, b.hi)
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
map_chunk(this->chunks[1], b.chunks[1])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline __m256i reduce(F const& reduce_pair) {
|
||||
return reduce_pair(this->chunks[0], this->chunks[1]);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() {
|
||||
uint64_t r_lo = static_cast<uint32_t>(_mm256_movemask_epi8(this->lo));
|
||||
uint64_t r_hi = _mm256_movemask_epi8(this->hi);
|
||||
uint64_t r_lo = static_cast<uint32_t>(_mm256_movemask_epi8(this->chunks[0]));
|
||||
uint64_t r_hi = _mm256_movemask_epi8(this->chunks[1]);
|
||||
return r_lo | (r_hi << 32);
|
||||
}
|
||||
|
||||
|
|
|
@ -215,7 +215,10 @@ struct utf8_checker<Architecture::HASWELL> {
|
|||
|
||||
really_inline void check_next_input(simd_input<Architecture::HASWELL> in) {
|
||||
__m256i high_bit = _mm256_set1_epi8(0x80u);
|
||||
if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) {
|
||||
__m256i any_bits_on = in.reduce([&](auto a, auto b) {
|
||||
return _mm256_or_si256(a, b);
|
||||
});
|
||||
if ((_mm256_testz_si256(any_bits_on, high_bit)) == 1) {
|
||||
// it is ascii, we just check continuation
|
||||
this->has_error = _mm256_or_si256(
|
||||
_mm256_cmpgt_epi8(this->previous.carried_continuations,
|
||||
|
@ -225,10 +228,9 @@ struct utf8_checker<Architecture::HASWELL> {
|
|||
this->has_error);
|
||||
} else {
|
||||
// it is not ascii so we have to do heavy work
|
||||
this->previous =
|
||||
avx_check_utf8_bytes(in.lo, &(this->previous), &(this->has_error));
|
||||
this->previous =
|
||||
avx_check_utf8_bytes(in.hi, &(this->previous), &(this->has_error));
|
||||
in.each([&](auto _in) {
|
||||
this->previous = avx_check_utf8_bytes(_in, &(this->previous), &(this->has_error));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,9 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
|||
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
|
||||
const __m256i struct_mask = _mm256_set1_epi8(32);
|
||||
|
||||
whitespace = MAP_BITMASK( in, _mm256_cmpeq_epi8(_in, _mm256_shuffle_epi8(white_table, _in)) );
|
||||
whitespace = in.map([&](auto _in) {
|
||||
return _mm256_cmpeq_epi8(_in, _mm256_shuffle_epi8(white_table, _in));
|
||||
}).to_bitmask();
|
||||
|
||||
structurals = in.map([&](auto _in) {
|
||||
const __m256i r1 = _mm256_add_epi8(struct_offset, _in);
|
||||
|
|
|
@ -4,19 +4,24 @@
|
|||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template <Architecture T>
|
||||
struct simd_input {
|
||||
simd_input(const uint8_t *ptr);
|
||||
// Run an operation on each chunk.
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk);
|
||||
// Map through each simd register in this input, producing another simd_input.
|
||||
template <typename F>
|
||||
really_inline simd_input<T> map(F const& map_chunk);
|
||||
// Map through each simd register across two inputs, producing a single simd_input.
|
||||
template <typename F>
|
||||
really_inline simd_input<T> map(simd_input<T> b, F const& map_chunk);
|
||||
// Run a horizontal operation like "sum" across the whole input
|
||||
// template <typename F>
|
||||
// really_inline simd<T> reduce(F const& map_chunk);
|
||||
// turn this bytemask (usually the result of a simd comparison operation) into a bitmask.
|
||||
uint64_t to_bitmask();
|
||||
// a straightforward comparison of a mask against input.
|
||||
|
@ -25,11 +30,6 @@ struct simd_input {
|
|||
uint64_t lteq(uint8_t m);
|
||||
}; // struct simd_input
|
||||
|
||||
#define MAP_CHUNKS(A, EXPR) A.map([&](auto _##A) { return (EXPR); })
|
||||
#define MAP_BITMASK(A, EXPR) MAP_CHUNKS(A, EXPR).to_bitmask()
|
||||
#define MAP_CHUNKS2(A, B, EXPR) A.map((B), [&](auto _##A, auto _##B) { return (EXPR); })
|
||||
#define MAP_BITMASK2(A, B, EXPR) MAP_CHUNKS2(A, B, EXPR).to_bitmask()
|
||||
|
||||
} // namespace simdjson
|
||||
|
||||
#endif
|
||||
|
|
|
@ -10,51 +10,64 @@ namespace simdjson {
|
|||
|
||||
template <>
|
||||
struct simd_input<Architecture::WESTMERE> {
|
||||
__m128i v0;
|
||||
__m128i v1;
|
||||
__m128i v2;
|
||||
__m128i v3;
|
||||
__m128i chunks[4];
|
||||
|
||||
really_inline simd_input(const uint8_t *ptr) {
|
||||
this->v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 0));
|
||||
this->v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
|
||||
this->v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 32));
|
||||
this->v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
|
||||
this->chunks[0] = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 0));
|
||||
this->chunks[1] = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
|
||||
this->chunks[2] = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 32));
|
||||
this->chunks[3] = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
|
||||
}
|
||||
|
||||
really_inline simd_input(__m128i i0, __m128i i1, __m128i i2, __m128i i3)
|
||||
{
|
||||
this->v0 = i0;
|
||||
this->v1 = i1;
|
||||
this->v2 = i2;
|
||||
this->v3 = i3;
|
||||
this->chunks[0] = i0;
|
||||
this->chunks[1] = i1;
|
||||
this->chunks[2] = i2;
|
||||
this->chunks[3] = i3;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk)
|
||||
{
|
||||
each_chunk(this->chunks[0]);
|
||||
each_chunk(this->chunks[1]);
|
||||
each_chunk(this->chunks[2]);
|
||||
each_chunk(this->chunks[3]);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd_input<Architecture::WESTMERE> map(F const& map_chunk) {
|
||||
return simd_input<Architecture::WESTMERE>(
|
||||
map_chunk(this->v0),
|
||||
map_chunk(this->v1),
|
||||
map_chunk(this->v2),
|
||||
map_chunk(this->v3)
|
||||
map_chunk(this->chunks[0]),
|
||||
map_chunk(this->chunks[1]),
|
||||
map_chunk(this->chunks[2]),
|
||||
map_chunk(this->chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd_input<Architecture::WESTMERE> map(simd_input<Architecture::WESTMERE> b, F const& map_chunk) {
|
||||
return simd_input<Architecture::WESTMERE>(
|
||||
map_chunk(this->v0, b.v0),
|
||||
map_chunk(this->v1, b.v1),
|
||||
map_chunk(this->v2, b.v2),
|
||||
map_chunk(this->v3, b.v3)
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
map_chunk(this->chunks[1], b.chunks[1]),
|
||||
map_chunk(this->chunks[2], b.chunks[2]),
|
||||
map_chunk(this->chunks[3], b.chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline __m128i reduce(F const& reduce_pair) {
|
||||
__m128i r01 = reduce_pair(this->chunks[0], this->chunks[1]);
|
||||
__m128i r23 = reduce_pair(this->chunks[2], this->chunks[3]);
|
||||
return reduce_pair(r01, r23);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() {
|
||||
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(this->v0));
|
||||
uint64_t r1 = _mm_movemask_epi8(this->v1);
|
||||
uint64_t r2 = _mm_movemask_epi8(this->v2);
|
||||
uint64_t r3 = _mm_movemask_epi8(this->v3);
|
||||
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(this->chunks[0]));
|
||||
uint64_t r1 = _mm_movemask_epi8(this->chunks[1]);
|
||||
uint64_t r2 = _mm_movemask_epi8(this->chunks[2]);
|
||||
uint64_t r3 = _mm_movemask_epi8(this->chunks[3]);
|
||||
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
|
||||
}
|
||||
|
||||
|
|
|
@ -164,7 +164,7 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
|
|||
}
|
||||
|
||||
} // namespace simdjson::westmere
|
||||
UNTARGET_REGION // westmere
|
||||
UNTARGET_REGION
|
||||
|
||||
TARGET_WESTMERE
|
||||
namespace simdjson {
|
||||
|
@ -182,7 +182,10 @@ struct utf8_checker<Architecture::WESTMERE> {
|
|||
|
||||
really_inline void check_next_input(simd_input<Architecture::WESTMERE> in) {
|
||||
__m128i high_bit = _mm_set1_epi8(0x80u);
|
||||
if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) {
|
||||
__m128i any_bits_on = in.reduce([&](auto a, auto b) {
|
||||
return _mm_or_si128(a, b);
|
||||
});
|
||||
if ((_mm_testz_si128( any_bits_on, high_bit)) == 1) {
|
||||
// it is ascii, we just check continuation
|
||||
this->has_error =
|
||||
_mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations,
|
||||
|
@ -191,25 +194,9 @@ struct utf8_checker<Architecture::WESTMERE> {
|
|||
this->has_error);
|
||||
} else {
|
||||
// it is not ascii so we have to do heavy work
|
||||
this->previous =
|
||||
check_utf8_bytes(in.v0, &(this->previous), &(this->has_error));
|
||||
this->previous =
|
||||
check_utf8_bytes(in.v1, &(this->previous), &(this->has_error));
|
||||
}
|
||||
|
||||
if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) {
|
||||
// it is ascii, we just check continuation
|
||||
this->has_error =
|
||||
_mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations,
|
||||
_mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 1)),
|
||||
this->has_error);
|
||||
} else {
|
||||
// it is not ascii so we have to do heavy work
|
||||
this->previous =
|
||||
check_utf8_bytes(in.v2, &(this->previous), &(this->has_error));
|
||||
this->previous =
|
||||
check_utf8_bytes(in.v3, &(this->previous), &(this->has_error));
|
||||
in.each([&](auto _in) {
|
||||
this->previous = check_utf8_bytes(_in, &(this->previous), &(this->has_error));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,9 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
|||
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
|
||||
const __m128i struct_mask = _mm_set1_epi8(32);
|
||||
|
||||
whitespace = MAP_BITMASK( in, _mm_cmpeq_epi8(_in, _mm_shuffle_epi8(white_table, _in)) );
|
||||
whitespace = in.map([&](auto _in) {
|
||||
return _mm_cmpeq_epi8(_in, _mm_shuffle_epi8(white_table, _in));
|
||||
}).to_bitmask();
|
||||
|
||||
structurals = in.map([&](auto _in) {
|
||||
const __m128i r1 = _mm_add_epi8(struct_offset, _in);
|
||||
|
|
Loading…
Reference in New Issue