Use map().to_bitmask() instead of build_bitmask()

This commit is contained in:
John Keiser 2019-08-23 11:48:36 -07:00
parent 441963c84c
commit 9cc4ddfc88
6 changed files with 76 additions and 53 deletions

View File

@ -46,45 +46,39 @@ struct simd_input<Architecture::ARM64> {
this->i3 = vld1q_u8(ptr + 48);
}
really_inline simd_input(uint8x16_t i0, uint8x16_t i1, uint8x16_t i2, uint8x16_t i3) {
this->i0 = i0;
this->i1 = i1;
this->i2 = i2;
this->i3 = i3;
}
template <typename F>
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
uint8x16_t r0 = chunk_to_mask(this->i0);
uint8x16_t r1 = chunk_to_mask(this->i1);
uint8x16_t r2 = chunk_to_mask(this->i2);
uint8x16_t r3 = chunk_to_mask(this->i3);
return neon_movemask_bulk(r0, r1, r2, r3);
really_inline simd_input(uint8x16_t a0, uint8x16_t a1, uint8x16_t a2, uint8x16_t a3) {
this->i0 = a0;
this->i1 = a1;
this->i2 = a2;
this->i3 = a3;
}
template <typename F>
really_inline simd_input<Architecture::ARM64> map(F const& map_chunk) {
simd_input<Architecture::ARM64> result = {
return simd_input<Architecture::ARM64>(
map_chunk(this->i0),
map_chunk(this->i1),
map_chunk(this->i2),
map_chunk(this->i3)
};
return result;
);
}
really_inline uint64_t to_bitmask() {
return neon_movemask_bulk(this->i0, this->i1, this->i2, this->i3);
}
really_inline uint64_t eq(uint8_t m) {
const uint8x16_t mask = vmovq_n_u8(m);
return this->build_bitmask([&](uint8x16_t chunk) {
return this->map([&](uint8x16_t chunk) {
return vceqq_u8(chunk, mask);
});
}).to_bitmask();
}
really_inline uint64_t lteq(uint8_t m) {
const uint8x16_t mask = vmovq_n_u8(m);
return this->build_bitmask([&](uint8x16_t chunk) {
return this->map([&](uint8x16_t chunk) {
return vcleq_u8(chunk, mask);
});
}).to_bitmask();
}
}; // struct simd_input

View File

@ -39,14 +39,14 @@ really_inline void find_whitespace_and_structurals(
});
const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
structurals = v.build_bitmask([&](auto chunk) {
structurals = v.map([&](auto chunk) {
return vtstq_u8(chunk, structural_shufti_mask);
});
}).to_bitmask();
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
whitespace = v.build_bitmask([&](auto chunk) {
whitespace = v.map([&](auto chunk) {
return vtstq_u8(chunk, whitespace_shufti_mask);
});
}).to_bitmask();
}
#include "generic/stage1_find_marks_flatten.h"

View File

@ -18,25 +18,37 @@ struct simd_input<Architecture::HASWELL> {
this->hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
}
really_inline simd_input(__m256i i0, __m256i i1) {
this->lo = i0;
this->hi = i1;
}
template <typename F>
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
uint64_t r0 = static_cast<uint32_t>(_mm256_movemask_epi8(chunk_to_mask(this->lo)));
uint64_t r1 = _mm256_movemask_epi8(chunk_to_mask(this->hi));
really_inline simd_input<Architecture::HASWELL> map(F const& map_chunk) {
return simd_input<Architecture::HASWELL>(
map_chunk(this->lo),
map_chunk(this->hi)
);
}
really_inline uint64_t to_bitmask() {
uint64_t r0 = static_cast<uint32_t>(_mm256_movemask_epi8(this->lo));
uint64_t r1 = _mm256_movemask_epi8(this->hi);
return r0 | (r1 << 32);
}
really_inline uint64_t eq(uint8_t m) {
const __m256i mask = _mm256_set1_epi8(m);
return this->build_bitmask([&] (auto chunk) {
return this->map([&] (auto chunk) {
return _mm256_cmpeq_epi8(chunk, mask);
});
}).to_bitmask();
}
really_inline uint64_t lteq(uint8_t m) {
const __m256i maxval = _mm256_set1_epi8(m);
return this->build_bitmask([&] (auto chunk) {
return this->map([&] (auto chunk) {
return _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, chunk), maxval);
});
}).to_bitmask();
}
}; // struct simd_input

View File

@ -34,7 +34,7 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
const __m256i mask_column = _mm256_set1_epi8(0x3a);
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
structurals = in->build_bitmask([&](auto in) {
structurals = in.map([&](auto in) {
__m256i structurals = _mm256_cmpeq_epi8(in, mask_open_brace);
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_close_brace));
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_open_bracket));
@ -42,18 +42,18 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_column));
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_comma));
return structurals;
});
}).to_bitmask();
const __m256i mask_space = _mm256_set1_epi8(0x20);
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
const __m256i mask_tab = _mm256_set1_epi8(0x09);
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
whitespace = in->build_bitmask([&](auto in) {
whitespace = in.map([&](auto in) {
__m256i space = _mm256_cmpeq_epi8(in, mask_space);
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_linefeed));
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_tab));
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_carriage));
});
}).to_bitmask();
// end of naive approach
#else // SIMDJSON_NAIVE_STRUCTURAL
@ -69,15 +69,15 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
const __m256i struct_mask = _mm256_set1_epi8(32);
whitespace = in.build_bitmask([&](auto chunk) {
whitespace = in.map([&](auto chunk) {
return _mm256_cmpeq_epi8(chunk, _mm256_shuffle_epi8(white_table, chunk));
});
structurals = in.build_bitmask([&](auto chunk) {
}).to_bitmask();
structurals = in.map([&](auto chunk) {
__m256i struct_r1 = _mm256_add_epi8(struct_offset, chunk);
__m256i struct_r2 = _mm256_or_si256(chunk, struct_mask);
__m256i struct_r3 = _mm256_shuffle_epi8(structural_table, struct_r1);
return _mm256_cmpeq_epi8(struct_r2, struct_r3);
});
}).to_bitmask();
#endif // else SIMDJSON_NAIVE_STRUCTURAL
}

View File

@ -22,27 +22,44 @@ struct simd_input<Architecture::WESTMERE> {
this->v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
}
really_inline simd_input(__m128i i0, __m128i i1, __m128i i2, __m128i i3)
{
this->v0 = i0;
this->v1 = i1;
this->v2 = i2;
this->v3 = i3;
}
template <typename F>
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(chunk_to_mask(this->v0)));
uint64_t r1 = _mm_movemask_epi8(chunk_to_mask(this->v1));
uint64_t r2 = _mm_movemask_epi8(chunk_to_mask(this->v2));
uint64_t r3 = _mm_movemask_epi8(chunk_to_mask(this->v3));
really_inline simd_input<Architecture::WESTMERE> map(F const& map_chunk) {
return simd_input<Architecture::WESTMERE>(
map_chunk(this->v0),
map_chunk(this->v1),
map_chunk(this->v2),
map_chunk(this->v3)
);
}
really_inline uint64_t to_bitmask() {
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(this->v0));
uint64_t r1 = _mm_movemask_epi8(this->v0);
uint64_t r2 = _mm_movemask_epi8(this->v2);
uint64_t r3 = _mm_movemask_epi8(this->v3);
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
}
really_inline uint64_t eq(uint8_t m) {
const __m128i mask = _mm_set1_epi8(m);
return this->build_bitmask([&](auto chunk) {
return this->map([&](auto chunk) {
return _mm_cmpeq_epi8(chunk, mask);
});
}).to_bitmask();
}
really_inline uint64_t lteq(uint8_t m) {
const __m128i maxval = _mm_set1_epi8(m);
return this->build_bitmask([&](auto chunk) {
return this->map([&](auto chunk) {
return _mm_cmpeq_epi8(_mm_max_epu8(maxval, chunk), maxval);
});
}).to_bitmask();
}
}; // struct simd_input

View File

@ -28,16 +28,16 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
const __m128i struct_mask = _mm_set1_epi8(32);
whitespace = in.build_bitmask([&](auto chunk) {
whitespace = in.map([&](auto chunk) {
return _mm_cmpeq_epi8(chunk, _mm_shuffle_epi8(white_table, chunk));
});
}).to_bitmask();
structurals = in.build_bitmask([&](auto chunk) {
structurals = in.map([&](auto chunk) {
__m128i struct_r1 = _mm_add_epi8(struct_offset, chunk);
__m128i struct_r2 = _mm_or_si128(chunk, struct_mask);
__m128i struct_r3 = _mm_shuffle_epi8(structural_table, struct_r1);
return _mm_cmpeq_epi8(struct_r2, struct_r3);
});
}).to_bitmask();
}
#include "generic/stage1_find_marks_flatten.h"