From f0ec26992a5f8f4dc1af97a2a0614ef5ba613b87 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Tue, 1 Sep 2020 08:37:46 -0700 Subject: [PATCH] Remove bit_or (bad perf on Windows) --- src/arm64/simd.h | 10 ---------- src/haswell/dom_parser_implementation.cpp | 6 +++++- src/haswell/simd.h | 2 +- src/westmere/dom_parser_implementation.cpp | 8 +++++++- src/westmere/simd.h | 22 ++++++---------------- 5 files changed, 19 insertions(+), 29 deletions(-) diff --git a/src/arm64/simd.h b/src/arm64/simd.h index 6ece602a..f59cd1ae 100644 --- a/src/arm64/simd.h +++ b/src/arm64/simd.h @@ -462,16 +462,6 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); } - simdjson_really_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask, - this->chunks[1] | mask, - this->chunks[2] | mask, - this->chunks[3] | mask - ); - } - simdjson_really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( diff --git a/src/haswell/dom_parser_implementation.cpp b/src/haswell/dom_parser_implementation.cpp index 623969c9..b2104967 100644 --- a/src/haswell/dom_parser_implementation.cpp +++ b/src/haswell/dom_parser_implementation.cpp @@ -70,7 +70,11 @@ simdjson_really_inline json_character_block json_character_block::classify(const _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) }); - const simd8x64 curlified = in.bit_or(0x20); // Turn [ and ] into { and } + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20 + }; const uint64_t op = curlified.eq({ _mm256_shuffle_epi8(op_table, in.chunks[0]), _mm256_shuffle_epi8(op_table, in.chunks[1]) diff --git a/src/haswell/simd.h b/src/haswell/simd.h index 93491408..276bd3a1 100644 --- a/src/haswell/simd.h +++ b/src/haswell/simd.h @@ -337,7 +337,7 @@ namespace simd { ).to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64 other) const { + simdjson_really_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1] diff --git a/src/westmere/dom_parser_implementation.cpp b/src/westmere/dom_parser_implementation.cpp index 5a174b00..5518ba52 100644 --- a/src/westmere/dom_parser_implementation.cpp +++ b/src/westmere/dom_parser_implementation.cpp @@ -65,7 +65,13 @@ simdjson_really_inline json_character_block json_character_block::classify(const _mm_shuffle_epi8(whitespace_table, in.chunks[2]), _mm_shuffle_epi8(whitespace_table, in.chunks[3]) }); - const simd8x64 curlified = in.bit_or(0x20); // Turn [ and ] into { and } + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20, + in.chunks[2] | 0x20, + in.chunks[3] | 0x20 + }; const uint64_t op = curlified.eq({ _mm_shuffle_epi8(op_table, in.chunks[0]), _mm_shuffle_epi8(op_table, in.chunks[1]), diff --git a/src/westmere/simd.h b/src/westmere/simd.h index 5b92d980..c321586b 100644 --- a/src/westmere/simd.h +++ b/src/westmere/simd.h @@ -288,23 +288,13 @@ namespace simd { } simdjson_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } - - simdjson_really_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask, - this->chunks[1] | mask, - this->chunks[2] | mask, - this->chunks[3] | mask - ); - } - + simdjson_really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( @@ -315,7 +305,7 @@ namespace simd { ).to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64 other) const { + simdjson_really_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1],