Fix performance issues:
1. Don't recast "int" result of movemask to uint32_t 2. Call max_epu8 with the mask first and the bytes second.
This commit is contained in:
parent
d89046d515
commit
3828e1e538
|
@ -58,7 +58,7 @@ namespace simdjson::haswell::simd {
|
||||||
// SIMD byte mask type (returned by things like eq and gt)
|
// SIMD byte mask type (returned by things like eq and gt)
|
||||||
template<>
|
template<>
|
||||||
struct simd8<bool>: base8<bool> {
|
struct simd8<bool>: base8<bool> {
|
||||||
typedef uint32_t bitmask_t;
|
typedef int bitmask_t;
|
||||||
static really_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(-(!!_value)); }
|
static really_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(-(!!_value)); }
|
||||||
|
|
||||||
really_inline simd8<bool>() : base8() {}
|
really_inline simd8<bool>() : base8() {}
|
||||||
|
@ -183,7 +183,7 @@ namespace simdjson::haswell::simd {
|
||||||
// Order-specific operations
|
// Order-specific operations
|
||||||
really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
|
really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
|
||||||
really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm256_min_epu8(*this, other); }
|
really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm256_min_epu8(*this, other); }
|
||||||
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return this->max(other) == other; }
|
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max(*this) == other; }
|
||||||
|
|
||||||
// Bit-specific operations
|
// Bit-specific operations
|
||||||
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
||||||
|
|
|
@ -30,8 +30,8 @@ really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src
|
||||||
// store to dest unconditionally - we can overwrite the bits we don't like later
|
// store to dest unconditionally - we can overwrite the bits we don't like later
|
||||||
v.store(dst);
|
v.store(dst);
|
||||||
return {
|
return {
|
||||||
(v == '\\').to_bitmask(), // bs_bits
|
(uint32_t)(v == '\\').to_bitmask(), // bs_bits
|
||||||
(v == '"').to_bitmask(), // quote_bits
|
(uint32_t)(v == '"').to_bitmask(), // quote_bits
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ namespace simdjson::westmere::simd {
|
||||||
|
|
||||||
template<typename T, typename Mask=simd8<bool>>
|
template<typename T, typename Mask=simd8<bool>>
|
||||||
struct base8: base<simd8<T>> {
|
struct base8: base<simd8<T>> {
|
||||||
typedef uint32_t bitmask_t;
|
typedef int bitmask_t;
|
||||||
|
|
||||||
really_inline base8() : base<simd8<T>>() {}
|
really_inline base8() : base<simd8<T>>() {}
|
||||||
really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
|
really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
|
||||||
|
@ -173,7 +173,7 @@ namespace simdjson::westmere::simd {
|
||||||
// Order-specific operations
|
// Order-specific operations
|
||||||
really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
|
really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
|
||||||
really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
|
really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
|
||||||
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return this->max(other) == other; }
|
really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max(*this) == other; }
|
||||||
|
|
||||||
// Bit-specific operations
|
// Bit-specific operations
|
||||||
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set(); }
|
||||||
|
|
Loading…
Reference in New Issue