Adding a new compile-time flag (SIMDJSON_NAIVE_STRUCTURAL) for research purposes.

This commit is contained in:
Daniel Lemire 2019-06-03 16:41:50 -04:00
parent e27a46973c
commit b32c72f1fc
1 changed files with 45 additions and 0 deletions

View File

@ -295,6 +295,50 @@ really_inline void find_whitespace_and_structurals(simd_input in,
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
// these go into the next 2 buckets of the comparison (8/16)
#ifdef __AVX2__
#ifdef SIMDJSON_NAIVE_STRUCTURAL
// You should never need this naive approach, but it can be useful
// for research purposes
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_close_brace));
struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_close_brace));
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_open_bracket));
struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_open_bracket));
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_close_bracket));
struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_close_bracket));
const __m256i mask_column = _mm256_set1_epi8(0x3a);
struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_column));
struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_column));
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
struct_lo = _mm256_or_si256(struct_lo,_mm256_cmpeq_epi8(in.lo, mask_comma));
struct_hi = _mm256_or_si256(struct_hi,_mm256_cmpeq_epi8(in.hi, mask_comma));
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
structurals = (structural_res_0 | (structural_res_1 << 32));
const __m256i mask_space = _mm256_set1_epi8(0x20);
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
space_lo = _mm256_or_si256(space_lo,_mm256_cmpeq_epi8(in.lo, mask_linefeed));
space_hi = _mm256_or_si256(space_hi,_mm256_cmpeq_epi8(in.hi, mask_linefeed));
const __m256i mask_tab = _mm256_set1_epi8(0x09);
space_lo = _mm256_or_si256(space_lo,_mm256_cmpeq_epi8(in.lo, mask_tab));
space_hi = _mm256_or_si256(space_hi,_mm256_cmpeq_epi8(in.hi, mask_tab));
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
space_lo = _mm256_or_si256(space_lo,_mm256_cmpeq_epi8(in.lo, mask_carriage));
space_hi = _mm256_or_si256(space_hi,_mm256_cmpeq_epi8(in.hi, mask_carriage));
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
whitespace = (ws_res_0 | (ws_res_1 << 32));
// end of naive approach
#else // SIMDJSON_NAIVE_STRUCTURAL
const __m256i low_nibble_mask = _mm256_setr_epi8(
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0,
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
@ -334,6 +378,7 @@ really_inline void find_whitespace_and_structurals(simd_input in,
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
whitespace = ~(ws_res_0 | (ws_res_1 << 32));
#endif // SIMDJSON_NAIVE_STRUCTURAL
#elif defined(__ARM_NEON)
#ifndef FUNKY_BAD_TABLE
const uint8x16_t low_nibble_mask = (uint8x16_t){