Use map().to_bitmask() instead of build_bitmask()
This commit is contained in:
parent
441963c84c
commit
9cc4ddfc88
|
@ -46,45 +46,39 @@ struct simd_input<Architecture::ARM64> {
|
|||
this->i3 = vld1q_u8(ptr + 48);
|
||||
}
|
||||
|
||||
really_inline simd_input(uint8x16_t i0, uint8x16_t i1, uint8x16_t i2, uint8x16_t i3) {
|
||||
this->i0 = i0;
|
||||
this->i1 = i1;
|
||||
this->i2 = i2;
|
||||
this->i3 = i3;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
|
||||
uint8x16_t r0 = chunk_to_mask(this->i0);
|
||||
uint8x16_t r1 = chunk_to_mask(this->i1);
|
||||
uint8x16_t r2 = chunk_to_mask(this->i2);
|
||||
uint8x16_t r3 = chunk_to_mask(this->i3);
|
||||
return neon_movemask_bulk(r0, r1, r2, r3);
|
||||
really_inline simd_input(uint8x16_t a0, uint8x16_t a1, uint8x16_t a2, uint8x16_t a3) {
|
||||
this->i0 = a0;
|
||||
this->i1 = a1;
|
||||
this->i2 = a2;
|
||||
this->i3 = a3;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd_input<Architecture::ARM64> map(F const& map_chunk) {
|
||||
simd_input<Architecture::ARM64> result = {
|
||||
return simd_input<Architecture::ARM64>(
|
||||
map_chunk(this->i0),
|
||||
map_chunk(this->i1),
|
||||
map_chunk(this->i2),
|
||||
map_chunk(this->i3)
|
||||
};
|
||||
return result;
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() {
|
||||
return neon_movemask_bulk(this->i0, this->i1, this->i2, this->i3);
|
||||
}
|
||||
|
||||
really_inline uint64_t eq(uint8_t m) {
|
||||
const uint8x16_t mask = vmovq_n_u8(m);
|
||||
return this->build_bitmask([&](uint8x16_t chunk) {
|
||||
return this->map([&](uint8x16_t chunk) {
|
||||
return vceqq_u8(chunk, mask);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
really_inline uint64_t lteq(uint8_t m) {
|
||||
const uint8x16_t mask = vmovq_n_u8(m);
|
||||
return this->build_bitmask([&](uint8x16_t chunk) {
|
||||
return this->map([&](uint8x16_t chunk) {
|
||||
return vcleq_u8(chunk, mask);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
}; // struct simd_input
|
||||
|
|
|
@ -39,14 +39,14 @@ really_inline void find_whitespace_and_structurals(
|
|||
});
|
||||
|
||||
const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
|
||||
structurals = v.build_bitmask([&](auto chunk) {
|
||||
structurals = v.map([&](auto chunk) {
|
||||
return vtstq_u8(chunk, structural_shufti_mask);
|
||||
});
|
||||
}).to_bitmask();
|
||||
|
||||
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
|
||||
whitespace = v.build_bitmask([&](auto chunk) {
|
||||
whitespace = v.map([&](auto chunk) {
|
||||
return vtstq_u8(chunk, whitespace_shufti_mask);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
#include "generic/stage1_find_marks_flatten.h"
|
||||
|
|
|
@ -18,25 +18,37 @@ struct simd_input<Architecture::HASWELL> {
|
|||
this->hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
|
||||
}
|
||||
|
||||
really_inline simd_input(__m256i i0, __m256i i1) {
|
||||
this->lo = i0;
|
||||
this->hi = i1;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
|
||||
uint64_t r0 = static_cast<uint32_t>(_mm256_movemask_epi8(chunk_to_mask(this->lo)));
|
||||
uint64_t r1 = _mm256_movemask_epi8(chunk_to_mask(this->hi));
|
||||
really_inline simd_input<Architecture::HASWELL> map(F const& map_chunk) {
|
||||
return simd_input<Architecture::HASWELL>(
|
||||
map_chunk(this->lo),
|
||||
map_chunk(this->hi)
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() {
|
||||
uint64_t r0 = static_cast<uint32_t>(_mm256_movemask_epi8(this->lo));
|
||||
uint64_t r1 = _mm256_movemask_epi8(this->hi);
|
||||
return r0 | (r1 << 32);
|
||||
}
|
||||
|
||||
really_inline uint64_t eq(uint8_t m) {
|
||||
const __m256i mask = _mm256_set1_epi8(m);
|
||||
return this->build_bitmask([&] (auto chunk) {
|
||||
return this->map([&] (auto chunk) {
|
||||
return _mm256_cmpeq_epi8(chunk, mask);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
really_inline uint64_t lteq(uint8_t m) {
|
||||
const __m256i maxval = _mm256_set1_epi8(m);
|
||||
return this->build_bitmask([&] (auto chunk) {
|
||||
return this->map([&] (auto chunk) {
|
||||
return _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, chunk), maxval);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
}; // struct simd_input
|
||||
|
|
|
@ -34,7 +34,7 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
|||
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
|
||||
const __m256i mask_column = _mm256_set1_epi8(0x3a);
|
||||
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
|
||||
structurals = in->build_bitmask([&](auto in) {
|
||||
structurals = in.map([&](auto in) {
|
||||
__m256i structurals = _mm256_cmpeq_epi8(in, mask_open_brace);
|
||||
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_close_brace));
|
||||
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_open_bracket));
|
||||
|
@ -42,18 +42,18 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
|||
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_column));
|
||||
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_comma));
|
||||
return structurals;
|
||||
});
|
||||
}).to_bitmask();
|
||||
|
||||
const __m256i mask_space = _mm256_set1_epi8(0x20);
|
||||
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
|
||||
const __m256i mask_tab = _mm256_set1_epi8(0x09);
|
||||
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
|
||||
whitespace = in->build_bitmask([&](auto in) {
|
||||
whitespace = in.map([&](auto in) {
|
||||
__m256i space = _mm256_cmpeq_epi8(in, mask_space);
|
||||
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_linefeed));
|
||||
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_tab));
|
||||
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_carriage));
|
||||
});
|
||||
}).to_bitmask();
|
||||
// end of naive approach
|
||||
|
||||
#else // SIMDJSON_NAIVE_STRUCTURAL
|
||||
|
@ -69,15 +69,15 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
|||
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
|
||||
const __m256i struct_mask = _mm256_set1_epi8(32);
|
||||
|
||||
whitespace = in.build_bitmask([&](auto chunk) {
|
||||
whitespace = in.map([&](auto chunk) {
|
||||
return _mm256_cmpeq_epi8(chunk, _mm256_shuffle_epi8(white_table, chunk));
|
||||
});
|
||||
structurals = in.build_bitmask([&](auto chunk) {
|
||||
}).to_bitmask();
|
||||
structurals = in.map([&](auto chunk) {
|
||||
__m256i struct_r1 = _mm256_add_epi8(struct_offset, chunk);
|
||||
__m256i struct_r2 = _mm256_or_si256(chunk, struct_mask);
|
||||
__m256i struct_r3 = _mm256_shuffle_epi8(structural_table, struct_r1);
|
||||
return _mm256_cmpeq_epi8(struct_r2, struct_r3);
|
||||
});
|
||||
}).to_bitmask();
|
||||
|
||||
#endif // else SIMDJSON_NAIVE_STRUCTURAL
|
||||
}
|
||||
|
|
|
@ -22,27 +22,44 @@ struct simd_input<Architecture::WESTMERE> {
|
|||
this->v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
|
||||
}
|
||||
|
||||
really_inline simd_input(__m128i i0, __m128i i1, __m128i i2, __m128i i3)
|
||||
{
|
||||
this->v0 = i0;
|
||||
this->v1 = i1;
|
||||
this->v2 = i2;
|
||||
this->v3 = i3;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
|
||||
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(chunk_to_mask(this->v0)));
|
||||
uint64_t r1 = _mm_movemask_epi8(chunk_to_mask(this->v1));
|
||||
uint64_t r2 = _mm_movemask_epi8(chunk_to_mask(this->v2));
|
||||
uint64_t r3 = _mm_movemask_epi8(chunk_to_mask(this->v3));
|
||||
really_inline simd_input<Architecture::WESTMERE> map(F const& map_chunk) {
|
||||
return simd_input<Architecture::WESTMERE>(
|
||||
map_chunk(this->v0),
|
||||
map_chunk(this->v1),
|
||||
map_chunk(this->v2),
|
||||
map_chunk(this->v3)
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() {
|
||||
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(this->v0));
|
||||
uint64_t r1 = _mm_movemask_epi8(this->v0);
|
||||
uint64_t r2 = _mm_movemask_epi8(this->v2);
|
||||
uint64_t r3 = _mm_movemask_epi8(this->v3);
|
||||
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
|
||||
}
|
||||
|
||||
really_inline uint64_t eq(uint8_t m) {
|
||||
const __m128i mask = _mm_set1_epi8(m);
|
||||
return this->build_bitmask([&](auto chunk) {
|
||||
return this->map([&](auto chunk) {
|
||||
return _mm_cmpeq_epi8(chunk, mask);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
really_inline uint64_t lteq(uint8_t m) {
|
||||
const __m128i maxval = _mm_set1_epi8(m);
|
||||
return this->build_bitmask([&](auto chunk) {
|
||||
return this->map([&](auto chunk) {
|
||||
return _mm_cmpeq_epi8(_mm_max_epu8(maxval, chunk), maxval);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
}; // struct simd_input
|
||||
|
|
|
@ -28,16 +28,16 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
|
|||
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
|
||||
const __m128i struct_mask = _mm_set1_epi8(32);
|
||||
|
||||
whitespace = in.build_bitmask([&](auto chunk) {
|
||||
whitespace = in.map([&](auto chunk) {
|
||||
return _mm_cmpeq_epi8(chunk, _mm_shuffle_epi8(white_table, chunk));
|
||||
});
|
||||
}).to_bitmask();
|
||||
|
||||
structurals = in.build_bitmask([&](auto chunk) {
|
||||
structurals = in.map([&](auto chunk) {
|
||||
__m128i struct_r1 = _mm_add_epi8(struct_offset, chunk);
|
||||
__m128i struct_r2 = _mm_or_si128(chunk, struct_mask);
|
||||
__m128i struct_r3 = _mm_shuffle_epi8(structural_table, struct_r1);
|
||||
return _mm_cmpeq_epi8(struct_r2, struct_r3);
|
||||
});
|
||||
}).to_bitmask();
|
||||
}
|
||||
|
||||
#include "generic/stage1_find_marks_flatten.h"
|
||||
|
|
Loading…
Reference in New Issue