From 62e8332b347f422475132adad8f01d96fb0f04f8 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Tue, 1 Sep 2020 07:46:56 -0700 Subject: [PATCH] Use simd8x64 abstractions in classification --- src/haswell/dom_parser_implementation.cpp | 21 ++++++++-------- src/haswell/simd.h | 7 ++++++ src/westmere/dom_parser_implementation.cpp | 28 +++++++++++----------- src/westmere/simd.h | 9 +++++++ 4 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/haswell/dom_parser_implementation.cpp b/src/haswell/dom_parser_implementation.cpp index 1e231a43..623969c9 100644 --- a/src/haswell/dom_parser_implementation.cpp +++ b/src/haswell/dom_parser_implementation.cpp @@ -33,7 +33,7 @@ simdjson_really_inline uint64_t json_character_block::scalar() const { return ~( simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. - auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); // The 6 operators (:,[]{}) have these values: // @@ -54,7 +54,7 @@ simdjson_really_inline json_character_block json_character_block::classify(const // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , // and :. This gets caught in stage 2, which checks the actual character to ensure the right // operators are in the right places. - auto op_table = simd8::repeat_16( + const auto op_table = simd8::repeat_16( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B @@ -66,15 +66,16 @@ simdjson_really_inline json_character_block json_character_block::classify(const // hope that useless computations will be omitted. This is namely case when // minifying (we only need whitespace). - uint64_t whitespace = simd8x64( - in.chunks[0] == simd8(_mm256_shuffle_epi8(whitespace_table, in.chunks[0])), - in.chunks[1] == simd8(_mm256_shuffle_epi8(whitespace_table, in.chunks[1])) - ).to_bitmask(); + const uint64_t whitespace = in.eq({ + _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) + }); + const simd8x64 curlified = in.bit_or(0x20); // Turn [ and ] into { and } + const uint64_t op = curlified.eq({ + _mm256_shuffle_epi8(op_table, in.chunks[0]), + _mm256_shuffle_epi8(op_table, in.chunks[1]) + }); - uint64_t op = simd8x64( - (in.chunks[0] | 0x20) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[0])), - (in.chunks[1] | 0x20) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[1])) - ).to_bitmask(); return { whitespace, op }; } diff --git a/src/haswell/simd.h b/src/haswell/simd.h index 277b99b8..93491408 100644 --- a/src/haswell/simd.h +++ b/src/haswell/simd.h @@ -337,6 +337,13 @@ namespace simd { ).to_bitmask(); } + simdjson_really_inline uint64_t eq(const simd8x64 other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + simdjson_really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( diff --git a/src/westmere/dom_parser_implementation.cpp b/src/westmere/dom_parser_implementation.cpp index f9b7316e..5a174b00 100644 --- a/src/westmere/dom_parser_implementation.cpp +++ b/src/westmere/dom_parser_implementation.cpp @@ -58,21 +58,21 @@ simdjson_really_inline json_character_block json_character_block::classify(const // hope that useless computations will be omitted. This is namely case when // minifying (we only need whitespace). - uint64_t whitespace = simd8x64( - in.chunks[0] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[0])), - in.chunks[1] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[1])), - in.chunks[2] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[2])), - in.chunks[3] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[3])) - ).to_bitmask(); - // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart - uint64_t op = simd8x64( - (in.chunks[0] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[0])), - (in.chunks[1] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[1])), - (in.chunks[2] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[2])), - (in.chunks[3] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[3])) - ).to_bitmask(); - return { whitespace, op }; + const uint64_t whitespace = in.eq({ + _mm_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm_shuffle_epi8(whitespace_table, in.chunks[1]), + _mm_shuffle_epi8(whitespace_table, in.chunks[2]), + _mm_shuffle_epi8(whitespace_table, in.chunks[3]) + }); + const simd8x64 curlified = in.bit_or(0x20); // Turn [ and ] into { and } + const uint64_t op = curlified.eq({ + _mm_shuffle_epi8(op_table, in.chunks[0]), + _mm_shuffle_epi8(op_table, in.chunks[1]), + _mm_shuffle_epi8(op_table, in.chunks[2]), + _mm_shuffle_epi8(op_table, in.chunks[3]) + }); + return { whitespace, op }; } simdjson_really_inline bool is_ascii(const simd8x64& input) { diff --git a/src/westmere/simd.h b/src/westmere/simd.h index 9da56a42..5b92d980 100644 --- a/src/westmere/simd.h +++ b/src/westmere/simd.h @@ -315,6 +315,15 @@ namespace simd { ).to_bitmask(); } + simdjson_really_inline uint64_t eq(const simd8x64 other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + simdjson_really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(