From 0925f71987008b9550895d854d6fb72980939a27 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Tue, 1 Sep 2020 06:14:12 -0700 Subject: [PATCH] Simplify operator classification lookup on Intel --- src/haswell/dom_parser_implementation.cpp | 43 ++++++++++++++++++---- src/westmere/dom_parser_implementation.cpp | 35 +++++++++++++++--- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/src/haswell/dom_parser_implementation.cpp b/src/haswell/dom_parser_implementation.cpp index de436d9c..1e231a43 100644 --- a/src/haswell/dom_parser_implementation.cpp +++ b/src/haswell/dom_parser_implementation.cpp @@ -14,25 +14,54 @@ using namespace simd; struct json_character_block { static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); // ASCII white-space ('\r','\n','\t',' ') - simdjson_really_inline uint64_t whitespace() const { return _whitespace; } + simdjson_really_inline uint64_t whitespace() const; // non-quote structural characters (comma, colon, braces, brackets) - simdjson_really_inline uint64_t op() const { return _op; } + simdjson_really_inline uint64_t op() const; // neither a structural character nor a white-space, so letters, numbers and quotes - simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); } + simdjson_really_inline uint64_t scalar() const; uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) }; +simdjson_really_inline uint64_t json_character_block::whitespace() const { return _whitespace; } +simdjson_really_inline uint64_t json_character_block::op() const { return _op; } +simdjson_really_inline uint64_t json_character_block::scalar() const { return ~(op() | whitespace()); } + // This identifies structural characters (comma, colon, braces, brackets), // and ASCII white-space ('\r','\n','\t',' '). simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); - auto op_table = simd8::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{'); - // We compute whitespace and op separately. If the code later only use one or the + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the // other, given the fact that all functions are aggressively inlined, we can // hope that useless computations will be omitted. This is namely case when // minifying (we only need whitespace). @@ -43,8 +72,8 @@ simdjson_really_inline json_character_block json_character_block::classify(const ).to_bitmask(); uint64_t op = simd8x64( - (in.chunks[0] | 32) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[0]-',')), - (in.chunks[1] | 32) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[1]-',')) + (in.chunks[0] | 0x20) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[0])), + (in.chunks[1] | 0x20) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[1])) ).to_bitmask(); return { whitespace, op }; } diff --git a/src/westmere/dom_parser_implementation.cpp b/src/westmere/dom_parser_implementation.cpp index 55c61b8d..f9b7316e 100644 --- a/src/westmere/dom_parser_implementation.cpp +++ b/src/westmere/dom_parser_implementation.cpp @@ -26,7 +26,32 @@ simdjson_really_inline json_character_block json_character_block::classify(const // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); - auto op_table = simd8::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{'); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); // We compute whitespace and op separately. If the code later only use one or the // other, given the fact that all functions are aggressively inlined, we can @@ -42,10 +67,10 @@ simdjson_really_inline json_character_block json_character_block::classify(const // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart uint64_t op = simd8x64( - (in.chunks[0] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[0]-',')), - (in.chunks[1] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[1]-',')), - (in.chunks[2] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[2]-',')), - (in.chunks[3] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[3]-',')) + (in.chunks[0] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[0])), + (in.chunks[1] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[1])), + (in.chunks[2] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[2])), + (in.chunks[3] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[3])) ).to_bitmask(); return { whitespace, op }; }