From 8f01cece3ac42dc42e2cd27b5a27f2c9132e9782 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Tue, 13 Aug 2019 17:44:06 -0700 Subject: [PATCH 1/3] Move simd_input and associated functions to their own header --- include/simdjson/simd_input.h | 26 +++++++ include/simdjson/simd_input_arm64.h | 78 +++++++++++++++++++ include/simdjson/simd_input_haswell.h | 52 +++++++++++++ include/simdjson/simd_input_westmere.h | 64 +++++++++++++++ include/simdjson/stage1_find_marks.h | 14 +--- include/simdjson/stage1_find_marks_arm64.h | 68 +--------------- include/simdjson/stage1_find_marks_haswell.h | 36 +-------- include/simdjson/stage1_find_marks_westmere.h | 48 +----------- 8 files changed, 226 insertions(+), 160 deletions(-) create mode 100644 include/simdjson/simd_input.h create mode 100644 include/simdjson/simd_input_arm64.h create mode 100644 include/simdjson/simd_input_haswell.h create mode 100644 include/simdjson/simd_input_westmere.h diff --git a/include/simdjson/simd_input.h b/include/simdjson/simd_input.h new file mode 100644 index 00000000..085d89b4 --- /dev/null +++ b/include/simdjson/simd_input.h @@ -0,0 +1,26 @@ +#ifndef SIMDJSON_SIMD_INPUT_H +#define SIMDJSON_SIMD_INPUT_H + +#include "simdjson/common_defs.h" +#include "simdjson/portability.h" +#include "simdjson/simdjson.h" +#include + +namespace simdjson { + +template struct simd_input; + +// a straightforward comparison of a mask against input. +template +uint64_t cmp_mask_against_input(simd_input in, uint8_t m); + +template simd_input fill_input(const uint8_t *ptr); + +// find all values less than or equal than the content of maxval (using unsigned +// arithmetic) +template +uint64_t unsigned_lteq_against_input(simd_input in, uint8_t m); + +} // namespace simdjson + +#endif diff --git a/include/simdjson/simd_input_arm64.h b/include/simdjson/simd_input_arm64.h new file mode 100644 index 00000000..658194a2 --- /dev/null +++ b/include/simdjson/simd_input_arm64.h @@ -0,0 +1,78 @@ +#ifndef SIMDJSON_SIMD_INPUT_ARM64_H +#define SIMDJSON_SIMD_INPUT_ARM64_H + +#include "simdjson/simd_input.h" + +#ifdef IS_ARM64 +namespace simdjson { + +template <> +struct simd_input { + uint8x16_t i0; + uint8x16_t i1; + uint8x16_t i2; + uint8x16_t i3; +}; + +template <> +really_inline simd_input +fill_input(const uint8_t *ptr) { + struct simd_input in; + in.i0 = vld1q_u8(ptr + 0); + in.i1 = vld1q_u8(ptr + 16); + in.i2 = vld1q_u8(ptr + 32); + in.i3 = vld1q_u8(ptr + 48); + return in; +} + +really_inline uint16_t neon_movemask(uint8x16_t input) { + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); +} + +really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, + uint8x16_t p2, uint8x16_t p3) { + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; + uint8x16_t t0 = vandq_u8(p0, bit_mask); + uint8x16_t t1 = vandq_u8(p1, bit_mask); + uint8x16_t t2 = vandq_u8(p2, bit_mask); + uint8x16_t t3 = vandq_u8(p3, bit_mask); + uint8x16_t sum0 = vpaddq_u8(t0, t1); + uint8x16_t sum1 = vpaddq_u8(t2, t3); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); +} + +template <> +really_inline uint64_t cmp_mask_against_input( + simd_input in, uint8_t m) { + const uint8x16_t mask = vmovq_n_u8(m); + uint8x16_t cmp_res_0 = vceqq_u8(in.i0, mask); + uint8x16_t cmp_res_1 = vceqq_u8(in.i1, mask); + uint8x16_t cmp_res_2 = vceqq_u8(in.i2, mask); + uint8x16_t cmp_res_3 = vceqq_u8(in.i3, mask); + return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); +} + +template <> +really_inline uint64_t unsigned_lteq_against_input( + simd_input in, uint8_t m) { + const uint8x16_t mask = vmovq_n_u8(m); + uint8x16_t cmp_res_0 = vcleq_u8(in.i0, mask); + uint8x16_t cmp_res_1 = vcleq_u8(in.i1, mask); + uint8x16_t cmp_res_2 = vcleq_u8(in.i2, mask); + uint8x16_t cmp_res_3 = vcleq_u8(in.i3, mask); + return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); +} + +} // namespace simdjson + +#endif // IS_ARM64 +#endif // SIMDJSON_SIMD_INPUT_ARM64_H diff --git a/include/simdjson/simd_input_haswell.h b/include/simdjson/simd_input_haswell.h new file mode 100644 index 00000000..8f5b9973 --- /dev/null +++ b/include/simdjson/simd_input_haswell.h @@ -0,0 +1,52 @@ +#ifndef SIMDJSON_SIMD_INPUT_HASWELL_H +#define SIMDJSON_SIMD_INPUT_HASWELL_H + +#include "simdjson/simd_input.h" + +#ifdef IS_X86_64 + +TARGET_HASWELL +namespace simdjson { + +template <> +struct simd_input { + __m256i lo; + __m256i hi; +}; + +template <> +really_inline simd_input +fill_input(const uint8_t *ptr) { + struct simd_input in; + in.lo = _mm256_loadu_si256(reinterpret_cast(ptr + 0)); + in.hi = _mm256_loadu_si256(reinterpret_cast(ptr + 32)); + return in; +} + +template <> +really_inline uint64_t cmp_mask_against_input( + simd_input in, uint8_t m) { + const __m256i mask = _mm256_set1_epi8(m); + __m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); +} + +template <> +really_inline uint64_t unsigned_lteq_against_input( + simd_input in, uint8_t m) { + const __m256i maxval = _mm256_set1_epi8(m); + __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.lo), maxval); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.hi), maxval); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); +} + +} // namespace simdjson +UNTARGET_REGION + +#endif // IS_X86_64 +#endif // SIMDJSON_SIMD_INPUT_HASWELL_H diff --git a/include/simdjson/simd_input_westmere.h b/include/simdjson/simd_input_westmere.h new file mode 100644 index 00000000..3082946c --- /dev/null +++ b/include/simdjson/simd_input_westmere.h @@ -0,0 +1,64 @@ +#ifndef SIMDJSON_SIMD_INPUT_WESTMERE_H +#define SIMDJSON_SIMD_INPUT_WESTMERE_H + +#include "simdjson/simd_input.h" + +#ifdef IS_X86_64 + +TARGET_WESTMERE +namespace simdjson { + +template <> +struct simd_input { + __m128i v0; + __m128i v1; + __m128i v2; + __m128i v3; +}; + +template <> +really_inline simd_input +fill_input(const uint8_t *ptr) { + struct simd_input in; + in.v0 = _mm_loadu_si128(reinterpret_cast(ptr + 0)); + in.v1 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); + in.v2 = _mm_loadu_si128(reinterpret_cast(ptr + 32)); + in.v3 = _mm_loadu_si128(reinterpret_cast(ptr + 48)); + return in; +} + +template <> +really_inline uint64_t cmp_mask_against_input( + simd_input in, uint8_t m) { + const __m128i mask = _mm_set1_epi8(m); + __m128i cmp_res_0 = _mm_cmpeq_epi8(in.v0, mask); + uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); + __m128i cmp_res_1 = _mm_cmpeq_epi8(in.v1, mask); + uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); + __m128i cmp_res_2 = _mm_cmpeq_epi8(in.v2, mask); + uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); + __m128i cmp_res_3 = _mm_cmpeq_epi8(in.v3, mask); + uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); + return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); +} + +template <> +really_inline uint64_t unsigned_lteq_against_input( + simd_input in, uint8_t m) { + const __m128i maxval = _mm_set1_epi8(m); + __m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v0), maxval); + uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); + __m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v1), maxval); + uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); + __m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v2), maxval); + uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); + __m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v3), maxval); + uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); + return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); +} + +} // namespace simdjson +UNTARGET_REGION + +#endif // IS_X86_64 +#endif // SIMDJSON_SIMD_INPUT_WESTMERE_H diff --git a/include/simdjson/stage1_find_marks.h b/include/simdjson/stage1_find_marks.h index 24a0bd15..f0a644e7 100644 --- a/include/simdjson/stage1_find_marks.h +++ b/include/simdjson/stage1_find_marks.h @@ -5,12 +5,11 @@ #include "simdjson/parsedjson.h" #include "simdjson/portability.h" #include "simdjson/simdjson.h" +#include "simdjson/simd_input.h" #include namespace simdjson { -template struct simd_input; - template uint64_t compute_quote_mask(uint64_t quote_bits); namespace { @@ -36,17 +35,6 @@ void check_utf8(simd_input in, utf8_checking_state &state); template ErrorValues check_utf8_errors(utf8_checking_state &state); -// a straightforward comparison of a mask against input. -template -uint64_t cmp_mask_against_input(simd_input in, uint8_t m); - -template simd_input fill_input(const uint8_t *ptr); - -// find all values less than or equal than the content of maxval (using unsigned -// arithmetic) -template -uint64_t unsigned_lteq_against_input(simd_input in, uint8_t m); - template really_inline uint64_t find_odd_backslash_sequences( simd_input in, uint64_t &prev_iter_ends_odd_backslash); diff --git a/include/simdjson/stage1_find_marks_arm64.h b/include/simdjson/stage1_find_marks_arm64.h index 51a77879..412ef849 100644 --- a/include/simdjson/stage1_find_marks_arm64.h +++ b/include/simdjson/stage1_find_marks_arm64.h @@ -1,53 +1,12 @@ #ifndef SIMDJSON_STAGE1_FIND_MARKS_ARM64_H #define SIMDJSON_STAGE1_FIND_MARKS_ARM64_H +#include "simdjson/simd_input_arm64.h" #include "simdjson/simdutf8check_arm64.h" #include "simdjson/stage1_find_marks.h" #ifdef IS_ARM64 namespace simdjson { -template <> struct simd_input { - uint8x16_t i0; - uint8x16_t i1; - uint8x16_t i2; - uint8x16_t i3; -}; - -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.i0 = vld1q_u8(ptr + 0); - in.i1 = vld1q_u8(ptr + 16); - in.i2 = vld1q_u8(ptr + 32); - in.i3 = vld1q_u8(ptr + 48); - return in; -} - -really_inline uint16_t neon_movemask(uint8x16_t input) { - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; - uint8x16_t minput = vandq_u8(input, bit_mask); - uint8x16_t tmp = vpaddq_u8(minput, minput); - tmp = vpaddq_u8(tmp, tmp); - tmp = vpaddq_u8(tmp, tmp); - return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); -} - -really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, - uint8x16_t p2, uint8x16_t p3) { - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; - uint8x16_t t0 = vandq_u8(p0, bit_mask); - uint8x16_t t1 = vandq_u8(p1, bit_mask); - uint8x16_t t2 = vandq_u8(p2, bit_mask); - uint8x16_t t3 = vandq_u8(p3, bit_mask); - uint8x16_t sum0 = vpaddq_u8(t0, t1); - uint8x16_t sum1 = vpaddq_u8(t2, t3); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); -} template <> really_inline uint64_t @@ -59,7 +18,8 @@ compute_quote_mask(uint64_t quote_bits) { #endif } -template <> struct utf8_checking_state { +template <> +struct utf8_checking_state { int8x16_t has_error{}; processed_utf_bytes previous{}; }; @@ -115,28 +75,6 @@ really_inline ErrorValues check_utf8_errors( : simdjson::SUCCESS; } -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const uint8x16_t mask = vmovq_n_u8(m); - uint8x16_t cmp_res_0 = vceqq_u8(in.i0, mask); - uint8x16_t cmp_res_1 = vceqq_u8(in.i1, mask); - uint8x16_t cmp_res_2 = vceqq_u8(in.i2, mask); - uint8x16_t cmp_res_3 = vceqq_u8(in.i3, mask); - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); -} - -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const uint8x16_t mask = vmovq_n_u8(m); - uint8x16_t cmp_res_0 = vcleq_u8(in.i0, mask); - uint8x16_t cmp_res_1 = vcleq_u8(in.i1, mask); - uint8x16_t cmp_res_2 = vcleq_u8(in.i2, mask); - uint8x16_t cmp_res_3 = vcleq_u8(in.i3, mask); - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, diff --git a/include/simdjson/stage1_find_marks_haswell.h b/include/simdjson/stage1_find_marks_haswell.h index d6ff7113..ae589e66 100644 --- a/include/simdjson/stage1_find_marks_haswell.h +++ b/include/simdjson/stage1_find_marks_haswell.h @@ -1,6 +1,7 @@ #ifndef SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H #define SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H +#include "simdjson/simd_input_haswell.h" #include "simdjson/simdutf8check_haswell.h" #include "simdjson/stage1_find_marks.h" @@ -8,19 +9,6 @@ TARGET_HASWELL namespace simdjson { -template <> struct simd_input { - __m256i lo; - __m256i hi; -}; - -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.lo = _mm256_loadu_si256(reinterpret_cast(ptr + 0)); - in.hi = _mm256_loadu_si256(reinterpret_cast(ptr + 32)); - return in; -} template <> really_inline uint64_t @@ -73,28 +61,6 @@ really_inline ErrorValues check_utf8_errors( : simdjson::SUCCESS; } -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const __m256i mask = _mm256_set1_epi8(m); - __m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask); - uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); - __m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask); - uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); - return res_0 | (res_1 << 32); -} - -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const __m256i maxval = _mm256_set1_epi8(m); - __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.lo), maxval); - uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); - __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.hi), maxval); - uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); - return res_0 | (res_1 << 32); -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, diff --git a/include/simdjson/stage1_find_marks_westmere.h b/include/simdjson/stage1_find_marks_westmere.h index f39b8a96..7336a2a7 100644 --- a/include/simdjson/stage1_find_marks_westmere.h +++ b/include/simdjson/stage1_find_marks_westmere.h @@ -1,6 +1,7 @@ #ifndef SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H #define SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H +#include "simdjson/simd_input_westmere.h" #include "simdjson/simdutf8check_westmere.h" #include "simdjson/stage1_find_marks.h" @@ -8,23 +9,6 @@ TARGET_WESTMERE namespace simdjson { -template <> struct simd_input { - __m128i v0; - __m128i v1; - __m128i v2; - __m128i v3; -}; - -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.v0 = _mm_loadu_si128(reinterpret_cast(ptr + 0)); - in.v1 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); - in.v2 = _mm_loadu_si128(reinterpret_cast(ptr + 32)); - in.v3 = _mm_loadu_si128(reinterpret_cast(ptr + 48)); - return in; -} template <> really_inline uint64_t @@ -86,36 +70,6 @@ really_inline ErrorValues check_utf8_errors( : simdjson::SUCCESS; } -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const __m128i mask = _mm_set1_epi8(m); - __m128i cmp_res_0 = _mm_cmpeq_epi8(in.v0, mask); - uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); - __m128i cmp_res_1 = _mm_cmpeq_epi8(in.v1, mask); - uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); - __m128i cmp_res_2 = _mm_cmpeq_epi8(in.v2, mask); - uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); - __m128i cmp_res_3 = _mm_cmpeq_epi8(in.v3, mask); - uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); - return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); -} - -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const __m128i maxval = _mm_set1_epi8(m); - __m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v0), maxval); - uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); - __m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v1), maxval); - uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); - __m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v2), maxval); - uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); - __m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v3), maxval); - uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); - return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, From 237b8865f533ea70904053d2ddcb35d091366602 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Tue, 13 Aug 2019 17:44:26 -0700 Subject: [PATCH 2/3] Correct header #define --- include/simdjson/simdjson.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/simdjson/simdjson.h b/include/simdjson/simdjson.h index ffb0c717..4f08f89e 100644 --- a/include/simdjson/simdjson.h +++ b/include/simdjson/simdjson.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_ERR_H -#define SIMDJSON_ERR_H +#ifndef SIMDJSON_SIMDJSON_H +#define SIMDJSON_SIMDJSON_H #include @@ -41,4 +41,4 @@ enum ErrorValues { }; const std::string &error_message(const int); } // namespace simdjson -#endif +#endif // SIMDJSON_SIMDJSON_H From 0042d9b406fc9dc006e455bcb95f21d888cee528 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Wed, 14 Aug 2019 09:45:33 -0700 Subject: [PATCH 3/3] Move UTF8 checking functions into their own file --- amalgamation.sh | 5 + include/simdjson/simd_input.h | 5 +- include/simdjson/simdutf8check.h | 21 + include/simdjson/simdutf8check_arm64.h | 59 + include/simdjson/simdutf8check_haswell.h | 43 + include/simdjson/simdutf8check_westmere.h | 56 + include/simdjson/stage1_find_marks_arm64.h | 57 - include/simdjson/stage1_find_marks_haswell.h | 41 - include/simdjson/stage1_find_marks_westmere.h | 53 - singleheader/amalgamation_demo.cpp | 2 +- singleheader/simdjson.cpp | 3142 ++++++++++++++--- singleheader/simdjson.h | 1749 ++++----- 12 files changed, 3547 insertions(+), 1686 deletions(-) create mode 100644 include/simdjson/simdutf8check.h diff --git a/amalgamation.sh b/amalgamation.sh index 3ae78910..34aa48f9 100755 --- a/amalgamation.sh +++ b/amalgamation.sh @@ -36,6 +36,11 @@ $SCRIPTPATH/include/simdjson/jsoncharutils.h $SCRIPTPATH/include/simdjson/jsonformatutils.h $SCRIPTPATH/include/simdjson/jsonioutil.h $SCRIPTPATH/include/simdjson/simdprune_tables.h +$SCRIPTPATH/include/simdjson/simd_input.h +$SCRIPTPATH/include/simdjson/simd_input_haswell.h +$SCRIPTPATH/include/simdjson/simd_input_westmere.h +$SCRIPTPATH/include/simdjson/simd_input_arm64.h +$SCRIPTPATH/include/simdjson/simdutf8check.h $SCRIPTPATH/include/simdjson/simdutf8check_haswell.h $SCRIPTPATH/include/simdjson/simdutf8check_westmere.h $SCRIPTPATH/include/simdjson/simdutf8check_arm64.h diff --git a/include/simdjson/simd_input.h b/include/simdjson/simd_input.h index 085d89b4..f834c442 100644 --- a/include/simdjson/simd_input.h +++ b/include/simdjson/simd_input.h @@ -10,12 +10,13 @@ namespace simdjson { template struct simd_input; +template +simd_input fill_input(const uint8_t *ptr); + // a straightforward comparison of a mask against input. template uint64_t cmp_mask_against_input(simd_input in, uint8_t m); -template simd_input fill_input(const uint8_t *ptr); - // find all values less than or equal than the content of maxval (using unsigned // arithmetic) template diff --git a/include/simdjson/simdutf8check.h b/include/simdjson/simdutf8check.h new file mode 100644 index 00000000..6097e28e --- /dev/null +++ b/include/simdjson/simdutf8check.h @@ -0,0 +1,21 @@ +#ifndef SIMDJSON_SIMDUTF8CHECK_H +#define SIMDJSON_SIMDUTF8CHECK_H + +#include "simdjson/simdjson.h" +#include "simdjson/simd_input.h" + +namespace simdjson { + +// Holds the state required to perform check_utf8(). +template struct utf8_checking_state; + +template +void check_utf8(simd_input in, utf8_checking_state &state); + +// Checks if the utf8 validation has found any error. +template +ErrorValues check_utf8_errors(utf8_checking_state &state); + +} // namespace simdjson + +#endif // SIMDJSON_SIMDUTF8CHECK_H diff --git a/include/simdjson/simdutf8check_arm64.h b/include/simdjson/simdutf8check_arm64.h index 6360b012..4b0baa30 100644 --- a/include/simdjson/simdutf8check_arm64.h +++ b/include/simdjson/simdutf8check_arm64.h @@ -7,6 +7,7 @@ #if defined(_ARM_NEON) || defined(__aarch64__) || \ (defined(_MSC_VER) && defined(_M_ARM64)) +#include "simdjson/simdutf8check.h" #include #include #include @@ -175,6 +176,64 @@ check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous, previous->high_nibbles, has_error); return pb; } + +template <> +struct utf8_checking_state { + int8x16_t has_error{}; + processed_utf_bytes previous{}; +}; + +// Checks that all bytes are ascii +really_inline bool check_ascii_neon(simd_input in) { + // checking if the most significant bit is always equal to 0. + uint8x16_t high_bit = vdupq_n_u8(0x80); + uint8x16_t t0 = vorrq_u8(in.i0, in.i1); + uint8x16_t t1 = vorrq_u8(in.i2, in.i3); + uint8x16_t t3 = vorrq_u8(t0, t1); + uint8x16_t t4 = vandq_u8(t3, high_bit); + uint64x2_t v64 = vreinterpretq_u64_u8(t4); + uint32x2_t v32 = vqmovn_u64(v64); + uint64x1_t result = vreinterpret_u64_u32(v32); + return vget_lane_u64(result, 0) == 0; +} + +template <> +really_inline void check_utf8( + simd_input in, + utf8_checking_state &state) { + if (check_ascii_neon(in)) { + // All bytes are ascii. Therefore the byte that was just before must be + // ascii too. We only check the byte that was just before simd_input. Nines + // are arbitrary values. + const int8x16_t verror = + (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1}; + state.has_error = + vorrq_s8(vreinterpretq_s8_u8( + vcgtq_s8(state.previous.carried_continuations, verror)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0), + &(state.previous), &(state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1), + &(state.previous), &(state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2), + &(state.previous), &(state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3), + &(state.previous), &(state.has_error)); + } +} + +template <> +really_inline ErrorValues check_utf8_errors( + utf8_checking_state &state) { + uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); + uint32x2_t v32 = vqmovn_u64(v64); + uint64x1_t result = vreinterpret_u64_u32(v32); + return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; +} + } // namespace simdjson #endif #endif diff --git a/include/simdjson/simdutf8check_haswell.h b/include/simdjson/simdutf8check_haswell.h index 6097af0c..355d6247 100644 --- a/include/simdjson/simdutf8check_haswell.h +++ b/include/simdjson/simdutf8check_haswell.h @@ -2,6 +2,7 @@ #define SIMDJSON_SIMDUTF8CHECK_HASWELL_H #include "simdjson/portability.h" +#include "simdjson/simdutf8check.h" #include #include #include @@ -190,6 +191,48 @@ avx_check_utf8_bytes(__m256i current_bytes, previous->high_nibbles, has_error); return pb; } + +template <> struct utf8_checking_state { + __m256i has_error; + avx_processed_utf_bytes previous; + utf8_checking_state() { + has_error = _mm256_setzero_si256(); + previous.raw_bytes = _mm256_setzero_si256(); + previous.high_nibbles = _mm256_setzero_si256(); + previous.carried_continuations = _mm256_setzero_si256(); + } +}; + +template <> +really_inline void check_utf8( + simd_input in, + utf8_checking_state &state) { + __m256i high_bit = _mm256_set1_epi8(0x80u); + if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { + // it is ascii, we just check continuation + state.has_error = _mm256_or_si256( + _mm256_cmpgt_epi8(state.previous.carried_continuations, + _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 1)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = + avx_check_utf8_bytes(in.lo, &(state.previous), &(state.has_error)); + state.previous = + avx_check_utf8_bytes(in.hi, &(state.previous), &(state.has_error)); + } +} + +template <> +really_inline ErrorValues check_utf8_errors( + utf8_checking_state &state) { + return _mm256_testz_si256(state.has_error, state.has_error) == 0 + ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; +} + } // namespace simdjson UNTARGET_REGION // haswell diff --git a/include/simdjson/simdutf8check_westmere.h b/include/simdjson/simdutf8check_westmere.h index cf57fec9..46361cdb 100644 --- a/include/simdjson/simdutf8check_westmere.h +++ b/include/simdjson/simdutf8check_westmere.h @@ -2,6 +2,7 @@ #define SIMDJSON_SIMDUTF8CHECK_WESTMERE_H #include "simdjson/portability.h" +#include "simdjson/simdutf8check.h" #include #include #include @@ -161,6 +162,61 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous, previous->high_nibbles, has_error); return pb; } + +template <> +struct utf8_checking_state { + __m128i has_error = _mm_setzero_si128(); + processed_utf_bytes previous{ + _mm_setzero_si128(), // raw_bytes + _mm_setzero_si128(), // high_nibbles + _mm_setzero_si128() // carried_continuations + }; +}; + +template <> +really_inline void check_utf8( + simd_input in, + utf8_checking_state &state) { + __m128i high_bit = _mm_set1_epi8(0x80u); + if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { + // it is ascii, we just check continuation + state.has_error = + _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, + _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 1)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = + check_utf8_bytes(in.v0, &(state.previous), &(state.has_error)); + state.previous = + check_utf8_bytes(in.v1, &(state.previous), &(state.has_error)); + } + + if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { + // it is ascii, we just check continuation + state.has_error = + _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, + _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 1)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = + check_utf8_bytes(in.v2, &(state.previous), &(state.has_error)); + state.previous = + check_utf8_bytes(in.v3, &(state.previous), &(state.has_error)); + } +} + +template <> +really_inline ErrorValues check_utf8_errors( + utf8_checking_state &state) { + return _mm_testz_si128(state.has_error, state.has_error) == 0 + ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; +} + } // namespace simdjson UNTARGET_REGION // westmere diff --git a/include/simdjson/stage1_find_marks_arm64.h b/include/simdjson/stage1_find_marks_arm64.h index 412ef849..3edeaaa2 100644 --- a/include/simdjson/stage1_find_marks_arm64.h +++ b/include/simdjson/stage1_find_marks_arm64.h @@ -18,63 +18,6 @@ compute_quote_mask(uint64_t quote_bits) { #endif } -template <> -struct utf8_checking_state { - int8x16_t has_error{}; - processed_utf_bytes previous{}; -}; - -// Checks that all bytes are ascii -really_inline bool check_ascii_neon(simd_input in) { - // checking if the most significant bit is always equal to 0. - uint8x16_t high_bit = vdupq_n_u8(0x80); - uint8x16_t t0 = vorrq_u8(in.i0, in.i1); - uint8x16_t t1 = vorrq_u8(in.i2, in.i3); - uint8x16_t t3 = vorrq_u8(t0, t1); - uint8x16_t t4 = vandq_u8(t3, high_bit); - uint64x2_t v64 = vreinterpretq_u64_u8(t4); - uint32x2_t v32 = vqmovn_u64(v64); - uint64x1_t result = vreinterpret_u64_u32(v32); - return vget_lane_u64(result, 0) == 0; -} - -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - if (check_ascii_neon(in)) { - // All bytes are ascii. Therefore the byte that was just before must be - // ascii too. We only check the byte that was just before simd_input. Nines - // are arbitrary values. - const int8x16_t verror = - (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1}; - state.has_error = - vorrq_s8(vreinterpretq_s8_u8( - vcgtq_s8(state.previous.carried_continuations, verror)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3), - &(state.previous), &(state.has_error)); - } -} - -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); - uint32x2_t v32 = vqmovn_u64(v64); - uint64x1_t result = vreinterpret_u64_u32(v32); - return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, diff --git a/include/simdjson/stage1_find_marks_haswell.h b/include/simdjson/stage1_find_marks_haswell.h index ae589e66..c43f33c3 100644 --- a/include/simdjson/stage1_find_marks_haswell.h +++ b/include/simdjson/stage1_find_marks_haswell.h @@ -20,47 +20,6 @@ compute_quote_mask(uint64_t quote_bits) { return quote_mask; } -template <> struct utf8_checking_state { - __m256i has_error; - avx_processed_utf_bytes previous; - utf8_checking_state() { - has_error = _mm256_setzero_si256(); - previous.raw_bytes = _mm256_setzero_si256(); - previous.high_nibbles = _mm256_setzero_si256(); - previous.carried_continuations = _mm256_setzero_si256(); - } -}; - -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - __m256i high_bit = _mm256_set1_epi8(0x80u); - if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = _mm256_or_si256( - _mm256_cmpgt_epi8(state.previous.carried_continuations, - _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - avx_check_utf8_bytes(in.lo, &(state.previous), &(state.has_error)); - state.previous = - avx_check_utf8_bytes(in.hi, &(state.previous), &(state.has_error)); - } -} - -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - return _mm256_testz_si256(state.has_error, state.has_error) == 0 - ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, diff --git a/include/simdjson/stage1_find_marks_westmere.h b/include/simdjson/stage1_find_marks_westmere.h index 7336a2a7..082c8db2 100644 --- a/include/simdjson/stage1_find_marks_westmere.h +++ b/include/simdjson/stage1_find_marks_westmere.h @@ -17,59 +17,6 @@ compute_quote_mask(uint64_t quote_bits) { _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0)); } -template <> struct utf8_checking_state { - __m128i has_error = _mm_setzero_si128(); - processed_utf_bytes previous{ - _mm_setzero_si128(), // raw_bytes - _mm_setzero_si128(), // high_nibbles - _mm_setzero_si128() // carried_continuations - }; -}; - -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - __m128i high_bit = _mm_set1_epi8(0x80u); - if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = - _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, - _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - check_utf8_bytes(in.v0, &(state.previous), &(state.has_error)); - state.previous = - check_utf8_bytes(in.v1, &(state.previous), &(state.has_error)); - } - - if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = - _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, - _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - check_utf8_bytes(in.v2, &(state.previous), &(state.has_error)); - state.previous = - check_utf8_bytes(in.v3, &(state.previous), &(state.has_error)); - } -} - -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - return _mm_testz_si128(state.has_error, state.has_error) == 0 - ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, diff --git a/singleheader/amalgamation_demo.cpp b/singleheader/amalgamation_demo.cpp index 789e0b52..71b1e9c9 100644 --- a/singleheader/amalgamation_demo.cpp +++ b/singleheader/amalgamation_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Sun Aug 4 15:43:41 EDT 2019. Do not edit! */ +/* auto-generated on Wed Aug 14 10:31:26 DST 2019. Do not edit! */ #include #include "simdjson.h" diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index 63a7349a..ed000f63 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Sun Aug 4 15:43:41 EDT 2019. Do not edit! */ +/* auto-generated on Wed Aug 14 10:31:26 DST 2019. Do not edit! */ #include "simdjson.h" /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ @@ -359,6 +359,7 @@ size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out) { #endif /* end file src/jsonminifier.cpp */ /* begin file src/jsonparser.cpp */ +#include namespace simdjson { @@ -368,21 +369,21 @@ namespace simdjson { // function pointer type for json_parse using json_parse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, - bool realloc_if_needed); + bool realloc); // Pointer that holds the json_parse implementation corresponding to the // available SIMD instruction set -extern json_parse_functype *json_parse_ptr; +extern std::atomic json_parse_ptr; int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, - bool realloc_if_needed) { - return json_parse_ptr(buf, len, pj, realloc_if_needed); + bool realloc) { + return json_parse_ptr.load(std::memory_order_relaxed)(buf, len, pj, realloc); } int json_parse(const char *buf, size_t len, ParsedJson &pj, - bool realloc_if_needed) { - return json_parse_ptr(reinterpret_cast(buf), len, pj, - realloc_if_needed); + bool realloc) { + return json_parse_ptr.load(std::memory_order_relaxed)(reinterpret_cast(buf), len, pj, + realloc); } Architecture find_best_supported_implementation() { @@ -406,21 +407,21 @@ Architecture find_best_supported_implementation() { // Responsible to select the best json_parse implementation int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, - bool realloc_if_needed) { + bool realloc) { Architecture best_implementation = find_best_supported_implementation(); // Selecting the best implementation switch (best_implementation) { #ifdef IS_X86_64 case Architecture::HASWELL: - json_parse_ptr = &json_parse_implementation; + json_parse_ptr.store(&json_parse_implementation, std::memory_order_relaxed); break; case Architecture::WESTMERE: - json_parse_ptr = &json_parse_implementation; + json_parse_ptr.store(&json_parse_implementation, std::memory_order_relaxed); break; #endif #ifdef IS_ARM64 case Architecture::ARM64: - json_parse_ptr = &json_parse_implementation; + json_parse_ptr.store(&json_parse_implementation, std::memory_order_relaxed); break; #endif default: @@ -428,18 +429,18 @@ int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, return simdjson::UNEXPECTED_ERROR; } - return json_parse_ptr(buf, len, pj, realloc_if_needed); + return json_parse_ptr.load(std::memory_order_relaxed)(buf, len, pj, realloc); } -json_parse_functype *json_parse_ptr = &json_parse_dispatch; +std::atomic json_parse_ptr = &json_parse_dispatch; WARN_UNUSED ParsedJson build_parsed_json(const uint8_t *buf, size_t len, - bool realloc_if_needed) { + bool realloc) { ParsedJson pj; bool ok = pj.allocate_capacity(len); if (ok) { - json_parse(buf, len, pj, realloc_if_needed); + json_parse(buf, len, pj, realloc); } else { std::cerr << "failure during memory allocation " << std::endl; } @@ -447,47 +448,1044 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, } } // namespace simdjson /* end file src/jsonparser.cpp */ -/* begin file src/stage1_find_marks.cpp */ +/* begin file include/simdjson/stage1_find_marks_flatten_haswell.h */ +// This file provides the same function as +// stage1_find_marks_flatten_common.h, but uses Intel intrinsics. +// This should provide better performance on Visual Studio +// and other compilers that do a conservative optimization. + +// Specifically, on x64 processors with BMI, +// x & (x - 1) should be mapped to +// the blsr instruction. By using the +// _blsr_u64 intrinsic, we +// ensure that this will happen. +///////// + #ifdef IS_X86_64 TARGET_HASWELL namespace simdjson { -template <> -int find_structural_bits(const uint8_t *buf, size_t len, - ParsedJson &pj) { - FIND_STRUCTURAL_BITS(Architecture::HASWELL, buf, len, pj, - simdjson::haswell::flatten_bits); + +// flatten out values in 'bits' assuming that they are are to have values of idx +// plus their position in the bitvector, and store these indexes at +// base_ptr[base] incrementing base as we go +// will potentially store extra values beyond end of valid bits, so base_ptr +// needs to be large enough to handle this +template<> +really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, + uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + uint32_t cnt = _mm_popcnt_u64(bits); + uint32_t next_base = base + cnt; + idx -= 64; + base_ptr += base; + { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[1] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[2] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[3] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[4] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[5] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[6] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[7] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr += 8; + } + // We hope that the next branch is easily predicted. + if (cnt > 8) { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[1] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[2] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[3] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[4] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[5] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[6] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr[7] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr += 8; + } + if (cnt > 16) { // unluckly: we rarely get here + // since it means having one structural or pseudo-structral element + // every 4 characters (possible with inputs like "","","",...). + do { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = _blsr_u64(bits); + base_ptr++; + } while (bits != 0); + } + base = next_base; } } // namespace simdjson UNTARGET_REGION +#endif // IS_X86_64 +/* end file include/simdjson/stage1_find_marks_flatten_haswell.h */ +/* begin file src/stage1_find_marks.cpp */ -TARGET_WESTMERE +#ifdef IS_X86_64 + +#define TARGETED_ARCHITECTURE Architecture::HASWELL +#define TARGETED_REGION TARGET_HASWELL +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage1_find_marks.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION namespace simdjson { + +// return a bitvector indicating where we have characters that end an odd-length +// sequence of backslashes (and thus change the behavior of the next character +// to follow). A even-length sequence of backslashes, and, for that matter, the +// largest even-length prefix of our odd-length sequence of backslashes, simply +// modify the behavior of the backslashes themselves. +// We also update the prev_iter_ends_odd_backslash reference parameter to +// indicate whether we end an iteration on an odd-length sequence of +// backslashes, which modifies our subsequent search for odd-length +// sequences of backslashes in an obvious way. template <> -int find_structural_bits(const uint8_t *buf, size_t len, - ParsedJson &pj) { - FIND_STRUCTURAL_BITS(Architecture::WESTMERE, buf, len, pj, - simdjson::flatten_bits); +really_inline uint64_t find_odd_backslash_sequences( + simd_input in, + uint64_t &prev_iter_ends_odd_backslash) { + const uint64_t even_bits = 0x5555555555555555ULL; + const uint64_t odd_bits = ~even_bits; + uint64_t bs_bits = cmp_mask_against_input(in, '\\'); + uint64_t start_edges = bs_bits & ~(bs_bits << 1); + /* flip lowest if we have an odd-length run at the end of the prior + * iteration */ + uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; + uint64_t even_starts = start_edges & even_start_mask; + uint64_t odd_starts = start_edges & ~even_start_mask; + uint64_t even_carries = bs_bits + even_starts; + + uint64_t odd_carries; + /* must record the carry-out of our odd-carries out of bit 63; this + * indicates whether the sense of any edge going to the next iteration + * should be flipped */ + bool iter_ends_odd_backslash = + add_overflow(bs_bits, odd_starts, &odd_carries); + + odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a + * potential end if we had an + * odd-numbered run at the + * end of the previous + * iteration */ + prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; + uint64_t even_carry_ends = even_carries & ~bs_bits; + uint64_t odd_carry_ends = odd_carries & ~bs_bits; + uint64_t even_start_odd_end = even_carry_ends & odd_bits; + uint64_t odd_start_even_end = odd_carry_ends & even_bits; + uint64_t odd_ends = even_start_odd_end | odd_start_even_end; + return odd_ends; } + +// return both the quote mask (which is a half-open mask that covers the first +// quote +// in an unescaped quote pair and everything in the quote pair) and the quote +// bits, which are the simple +// unescaped quoted bits. We also update the prev_iter_inside_quote value to +// tell the next iteration +// whether we finished the final iteration inside a quote pair; if so, this +// inverts our behavior of +// whether we're inside quotes for the next iteration. +// Note that we don't do any error checking to see if we have backslash +// sequences outside quotes; these +// backslash sequences (of any length) will be detected elsewhere. +template <> +really_inline uint64_t find_quote_mask_and_bits( + simd_input in, uint64_t odd_ends, + uint64_t &prev_iter_inside_quote, uint64_t "e_bits, + uint64_t &error_mask) { + quote_bits = cmp_mask_against_input(in, '"'); + quote_bits = quote_bits & ~odd_ends; + uint64_t quote_mask = compute_quote_mask(quote_bits); + quote_mask ^= prev_iter_inside_quote; + /* All Unicode characters may be placed within the + * quotation marks, except for the characters that MUST be escaped: + * quotation mark, reverse solidus, and the control characters (U+0000 + * through U+001F). + * https://tools.ietf.org/html/rfc8259 */ + uint64_t unescaped = + unsigned_lteq_against_input(in, 0x1F); + error_mask |= quote_mask & unescaped; + /* right shift of a signed value expected to be well-defined and standard + * compliant as of C++20, + * John Regher from Utah U. says this is fine code */ + prev_iter_inside_quote = + static_cast(static_cast(quote_mask) >> 63); + return quote_mask; +} + +// Find structural bits in a 64-byte chunk. +really_inline void find_structural_bits_64( + const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base, + uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, + uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, + uint64_t &error_mask, + utf8_checking_state &utf8_state) { + simd_input in = fill_input(buf); + check_utf8(in, utf8_state); + /* detect odd sequences of backslashes */ + uint64_t odd_ends = find_odd_backslash_sequences( + in, prev_iter_ends_odd_backslash); + + /* detect insides of quote pairs ("quote_mask") and also our quote_bits + * themselves */ + uint64_t quote_bits; + uint64_t quote_mask = find_quote_mask_and_bits( + in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); + + /* take the previous iterations structural bits, not our current + * iteration, + * and flatten */ + flatten_bits(base_ptr, base, idx, structurals); + + uint64_t whitespace; + find_whitespace_and_structurals(in, whitespace, + structurals); + + /* fixup structurals to reflect quotes and add pseudo-structural + * characters */ + structurals = finalize_structurals(structurals, whitespace, quote_mask, + quote_bits, prev_iter_ends_pseudo_pred); +} + +template <> +int find_structural_bits(const uint8_t *buf, size_t len, + ParsedJson &pj) { + if (len > pj.byte_capacity) { + std::cerr << "Your ParsedJson object only supports documents up to " + << pj.byte_capacity << " bytes but you are trying to process " + << len << " bytes" << std::endl; + return simdjson::CAPACITY; + } + uint32_t *base_ptr = pj.structural_indexes; + uint32_t base = 0; + utf8_checking_state utf8_state; + + /* we have padded the input out to 64 byte multiple with the remainder + * being zeros persistent state across loop does the last iteration end + * with an odd-length sequence of backslashes? */ + + /* either 0 or 1, but a 64-bit value */ + uint64_t prev_iter_ends_odd_backslash = 0ULL; + /* does the previous iteration end inside a double-quote pair? */ + uint64_t prev_iter_inside_quote = + 0ULL; /* either all zeros or all ones + * does the previous iteration end on something that is a + * predecessor of a pseudo-structural character - i.e. + * whitespace or a structural character effectively the very + * first char is considered to follow "whitespace" for the + * purposes of pseudo-structural character detection so we + * initialize to 1 */ + uint64_t prev_iter_ends_pseudo_pred = 1ULL; + + /* structurals are persistent state across loop as we flatten them on the + * subsequent iteration into our array pointed to be base_ptr. + * This is harmless on the first iteration as structurals==0 + * and is done for performance reasons; we can hide some of the latency of + * the + * expensive carryless multiply in the previous step with this work */ + uint64_t structurals = 0; + + size_t lenminus64 = len < 64 ? 0 : len - 64; + size_t idx = 0; + uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII + code points < 0x20) */ + + for (; idx < lenminus64; idx += 64) { + find_structural_bits_64(&buf[idx], idx, base_ptr, base, + prev_iter_ends_odd_backslash, + prev_iter_inside_quote, prev_iter_ends_pseudo_pred, + structurals, error_mask, utf8_state); + } + /* If we have a final chunk of less than 64 bytes, pad it to 64 with + * spaces before processing it (otherwise, we risk invalidating the UTF-8 + * checks). */ + if (idx < len) { + uint8_t tmp_buf[64]; + memset(tmp_buf, 0x20, 64); + memcpy(tmp_buf, buf + idx, len - idx); + find_structural_bits_64(&tmp_buf[0], idx, base_ptr, base, + prev_iter_ends_odd_backslash, + prev_iter_inside_quote, prev_iter_ends_pseudo_pred, + structurals, error_mask, utf8_state); + idx += 64; + } + + /* is last string quote closed? */ + if (prev_iter_inside_quote) { + return simdjson::UNCLOSED_STRING; + } + + /* finally, flatten out the remaining structurals from the last iteration + */ + flatten_bits(base_ptr, base, idx, structurals); + + pj.n_structural_indexes = base; + /* a valid JSON file cannot have zero structural indexes - we should have + * found something */ + if (pj.n_structural_indexes == 0u) { + return simdjson::EMPTY; + } + if (base_ptr[pj.n_structural_indexes - 1] > len) { + return simdjson::UNEXPECTED_ERROR; + } + if (len != base_ptr[pj.n_structural_indexes - 1]) { + /* the string might not be NULL terminated, but we add a virtual NULL + * ending + * character. */ + base_ptr[pj.n_structural_indexes++] = len; + } + /* make it safe to dereference one beyond this array */ + base_ptr[pj.n_structural_indexes] = 0; + if (error_mask) { + return simdjson::UNESCAPED_CHARS; + } + return check_utf8_errors(utf8_state); +} + } // namespace simdjson UNTARGET_REGION -#endif +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION + +#define TARGETED_ARCHITECTURE Architecture::WESTMERE +#define TARGETED_REGION TARGET_WESTMERE +// This file contains a non-architecture-specific version of "flatten" used in stage1. +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage1_find_marks.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION +namespace simdjson { + +#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking +// +// This is just a naive implementation. It should be normally +// disable, but can be used for research purposes to compare +// again our optimized version. +template <> +really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, + uint32_t idx, uint64_t bits) { + uint32_t *out_ptr = base_ptr + base; + idx -= 64; + while (bits != 0) { + out_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + out_ptr++; + } + base = (out_ptr - base_ptr); +} + +#else +// flatten out values in 'bits' assuming that they are are to have values of idx +// plus their position in the bitvector, and store these indexes at +// base_ptr[base] incrementing base as we go +// will potentially store extra values beyond end of valid bits, so base_ptr +// needs to be large enough to handle this +template<> +really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, + uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + uint32_t cnt = hamming(bits); + uint32_t next_base = base + cnt; + idx -= 64; + base_ptr += base; + { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[1] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[2] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[3] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[4] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[5] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[6] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[7] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr += 8; + } + // We hope that the next branch is easily predicted. + if (cnt > 8) { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[1] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[2] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[3] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[4] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[5] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[6] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[7] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr += 8; + } + if (cnt > 16) { // unluckly: we rarely get here + // since it means having one structural or pseudo-structral element + // every 4 characters (possible with inputs like "","","",...). + do { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr++; + } while (bits != 0); + } + base = next_base; +} +#endif // SIMDJSON_NAIVE_FLATTEN + +} // namespace simdjson +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage1_find_marks.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION +namespace simdjson { + +// return a bitvector indicating where we have characters that end an odd-length +// sequence of backslashes (and thus change the behavior of the next character +// to follow). A even-length sequence of backslashes, and, for that matter, the +// largest even-length prefix of our odd-length sequence of backslashes, simply +// modify the behavior of the backslashes themselves. +// We also update the prev_iter_ends_odd_backslash reference parameter to +// indicate whether we end an iteration on an odd-length sequence of +// backslashes, which modifies our subsequent search for odd-length +// sequences of backslashes in an obvious way. +template <> +really_inline uint64_t find_odd_backslash_sequences( + simd_input in, + uint64_t &prev_iter_ends_odd_backslash) { + const uint64_t even_bits = 0x5555555555555555ULL; + const uint64_t odd_bits = ~even_bits; + uint64_t bs_bits = cmp_mask_against_input(in, '\\'); + uint64_t start_edges = bs_bits & ~(bs_bits << 1); + /* flip lowest if we have an odd-length run at the end of the prior + * iteration */ + uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; + uint64_t even_starts = start_edges & even_start_mask; + uint64_t odd_starts = start_edges & ~even_start_mask; + uint64_t even_carries = bs_bits + even_starts; + + uint64_t odd_carries; + /* must record the carry-out of our odd-carries out of bit 63; this + * indicates whether the sense of any edge going to the next iteration + * should be flipped */ + bool iter_ends_odd_backslash = + add_overflow(bs_bits, odd_starts, &odd_carries); + + odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a + * potential end if we had an + * odd-numbered run at the + * end of the previous + * iteration */ + prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; + uint64_t even_carry_ends = even_carries & ~bs_bits; + uint64_t odd_carry_ends = odd_carries & ~bs_bits; + uint64_t even_start_odd_end = even_carry_ends & odd_bits; + uint64_t odd_start_even_end = odd_carry_ends & even_bits; + uint64_t odd_ends = even_start_odd_end | odd_start_even_end; + return odd_ends; +} + +// return both the quote mask (which is a half-open mask that covers the first +// quote +// in an unescaped quote pair and everything in the quote pair) and the quote +// bits, which are the simple +// unescaped quoted bits. We also update the prev_iter_inside_quote value to +// tell the next iteration +// whether we finished the final iteration inside a quote pair; if so, this +// inverts our behavior of +// whether we're inside quotes for the next iteration. +// Note that we don't do any error checking to see if we have backslash +// sequences outside quotes; these +// backslash sequences (of any length) will be detected elsewhere. +template <> +really_inline uint64_t find_quote_mask_and_bits( + simd_input in, uint64_t odd_ends, + uint64_t &prev_iter_inside_quote, uint64_t "e_bits, + uint64_t &error_mask) { + quote_bits = cmp_mask_against_input(in, '"'); + quote_bits = quote_bits & ~odd_ends; + uint64_t quote_mask = compute_quote_mask(quote_bits); + quote_mask ^= prev_iter_inside_quote; + /* All Unicode characters may be placed within the + * quotation marks, except for the characters that MUST be escaped: + * quotation mark, reverse solidus, and the control characters (U+0000 + * through U+001F). + * https://tools.ietf.org/html/rfc8259 */ + uint64_t unescaped = + unsigned_lteq_against_input(in, 0x1F); + error_mask |= quote_mask & unescaped; + /* right shift of a signed value expected to be well-defined and standard + * compliant as of C++20, + * John Regher from Utah U. says this is fine code */ + prev_iter_inside_quote = + static_cast(static_cast(quote_mask) >> 63); + return quote_mask; +} + +// Find structural bits in a 64-byte chunk. +really_inline void find_structural_bits_64( + const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base, + uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, + uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, + uint64_t &error_mask, + utf8_checking_state &utf8_state) { + simd_input in = fill_input(buf); + check_utf8(in, utf8_state); + /* detect odd sequences of backslashes */ + uint64_t odd_ends = find_odd_backslash_sequences( + in, prev_iter_ends_odd_backslash); + + /* detect insides of quote pairs ("quote_mask") and also our quote_bits + * themselves */ + uint64_t quote_bits; + uint64_t quote_mask = find_quote_mask_and_bits( + in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); + + /* take the previous iterations structural bits, not our current + * iteration, + * and flatten */ + flatten_bits(base_ptr, base, idx, structurals); + + uint64_t whitespace; + find_whitespace_and_structurals(in, whitespace, + structurals); + + /* fixup structurals to reflect quotes and add pseudo-structural + * characters */ + structurals = finalize_structurals(structurals, whitespace, quote_mask, + quote_bits, prev_iter_ends_pseudo_pred); +} + +template <> +int find_structural_bits(const uint8_t *buf, size_t len, + ParsedJson &pj) { + if (len > pj.byte_capacity) { + std::cerr << "Your ParsedJson object only supports documents up to " + << pj.byte_capacity << " bytes but you are trying to process " + << len << " bytes" << std::endl; + return simdjson::CAPACITY; + } + uint32_t *base_ptr = pj.structural_indexes; + uint32_t base = 0; + utf8_checking_state utf8_state; + + /* we have padded the input out to 64 byte multiple with the remainder + * being zeros persistent state across loop does the last iteration end + * with an odd-length sequence of backslashes? */ + + /* either 0 or 1, but a 64-bit value */ + uint64_t prev_iter_ends_odd_backslash = 0ULL; + /* does the previous iteration end inside a double-quote pair? */ + uint64_t prev_iter_inside_quote = + 0ULL; /* either all zeros or all ones + * does the previous iteration end on something that is a + * predecessor of a pseudo-structural character - i.e. + * whitespace or a structural character effectively the very + * first char is considered to follow "whitespace" for the + * purposes of pseudo-structural character detection so we + * initialize to 1 */ + uint64_t prev_iter_ends_pseudo_pred = 1ULL; + + /* structurals are persistent state across loop as we flatten them on the + * subsequent iteration into our array pointed to be base_ptr. + * This is harmless on the first iteration as structurals==0 + * and is done for performance reasons; we can hide some of the latency of + * the + * expensive carryless multiply in the previous step with this work */ + uint64_t structurals = 0; + + size_t lenminus64 = len < 64 ? 0 : len - 64; + size_t idx = 0; + uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII + code points < 0x20) */ + + for (; idx < lenminus64; idx += 64) { + find_structural_bits_64(&buf[idx], idx, base_ptr, base, + prev_iter_ends_odd_backslash, + prev_iter_inside_quote, prev_iter_ends_pseudo_pred, + structurals, error_mask, utf8_state); + } + /* If we have a final chunk of less than 64 bytes, pad it to 64 with + * spaces before processing it (otherwise, we risk invalidating the UTF-8 + * checks). */ + if (idx < len) { + uint8_t tmp_buf[64]; + memset(tmp_buf, 0x20, 64); + memcpy(tmp_buf, buf + idx, len - idx); + find_structural_bits_64(&tmp_buf[0], idx, base_ptr, base, + prev_iter_ends_odd_backslash, + prev_iter_inside_quote, prev_iter_ends_pseudo_pred, + structurals, error_mask, utf8_state); + idx += 64; + } + + /* is last string quote closed? */ + if (prev_iter_inside_quote) { + return simdjson::UNCLOSED_STRING; + } + + /* finally, flatten out the remaining structurals from the last iteration + */ + flatten_bits(base_ptr, base, idx, structurals); + + pj.n_structural_indexes = base; + /* a valid JSON file cannot have zero structural indexes - we should have + * found something */ + if (pj.n_structural_indexes == 0u) { + return simdjson::EMPTY; + } + if (base_ptr[pj.n_structural_indexes - 1] > len) { + return simdjson::UNEXPECTED_ERROR; + } + if (len != base_ptr[pj.n_structural_indexes - 1]) { + /* the string might not be NULL terminated, but we add a virtual NULL + * ending + * character. */ + base_ptr[pj.n_structural_indexes++] = len; + } + /* make it safe to dereference one beyond this array */ + base_ptr[pj.n_structural_indexes] = 0; + if (error_mask) { + return simdjson::UNESCAPED_CHARS; + } + return check_utf8_errors(utf8_state); +} + +} // namespace simdjson +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION + +#endif // IS_X86_64 #ifdef IS_ARM64 + +#define TARGETED_ARCHITECTURE Architecture::ARM64 +#define TARGETED_REGION TARGET_ARM64 +// This file contains a non-architecture-specific version of "flatten" used in stage1. +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage1_find_marks.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION namespace simdjson { + +#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking +// +// This is just a naive implementation. It should be normally +// disable, but can be used for research purposes to compare +// again our optimized version. template <> -int find_structural_bits(const uint8_t *buf, size_t len, - ParsedJson &pj) { - FIND_STRUCTURAL_BITS(Architecture::ARM64, buf, len, pj, - simdjson::flatten_bits); +really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, + uint32_t idx, uint64_t bits) { + uint32_t *out_ptr = base_ptr + base; + idx -= 64; + while (bits != 0) { + out_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + out_ptr++; + } + base = (out_ptr - base_ptr); } + +#else +// flatten out values in 'bits' assuming that they are are to have values of idx +// plus their position in the bitvector, and store these indexes at +// base_ptr[base] incrementing base as we go +// will potentially store extra values beyond end of valid bits, so base_ptr +// needs to be large enough to handle this +template<> +really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, + uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + uint32_t cnt = hamming(bits); + uint32_t next_base = base + cnt; + idx -= 64; + base_ptr += base; + { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[1] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[2] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[3] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[4] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[5] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[6] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[7] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr += 8; + } + // We hope that the next branch is easily predicted. + if (cnt > 8) { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[1] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[2] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[3] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[4] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[5] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[6] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr[7] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr += 8; + } + if (cnt > 16) { // unluckly: we rarely get here + // since it means having one structural or pseudo-structral element + // every 4 characters (possible with inputs like "","","",...). + do { + base_ptr[0] = idx + trailing_zeroes(bits); + bits = bits & (bits - 1); + base_ptr++; + } while (bits != 0); + } + base = next_base; +} +#endif // SIMDJSON_NAIVE_FLATTEN + } // namespace simdjson -#endif +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage1_find_marks.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION +namespace simdjson { + +// return a bitvector indicating where we have characters that end an odd-length +// sequence of backslashes (and thus change the behavior of the next character +// to follow). A even-length sequence of backslashes, and, for that matter, the +// largest even-length prefix of our odd-length sequence of backslashes, simply +// modify the behavior of the backslashes themselves. +// We also update the prev_iter_ends_odd_backslash reference parameter to +// indicate whether we end an iteration on an odd-length sequence of +// backslashes, which modifies our subsequent search for odd-length +// sequences of backslashes in an obvious way. +template <> +really_inline uint64_t find_odd_backslash_sequences( + simd_input in, + uint64_t &prev_iter_ends_odd_backslash) { + const uint64_t even_bits = 0x5555555555555555ULL; + const uint64_t odd_bits = ~even_bits; + uint64_t bs_bits = cmp_mask_against_input(in, '\\'); + uint64_t start_edges = bs_bits & ~(bs_bits << 1); + /* flip lowest if we have an odd-length run at the end of the prior + * iteration */ + uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; + uint64_t even_starts = start_edges & even_start_mask; + uint64_t odd_starts = start_edges & ~even_start_mask; + uint64_t even_carries = bs_bits + even_starts; + + uint64_t odd_carries; + /* must record the carry-out of our odd-carries out of bit 63; this + * indicates whether the sense of any edge going to the next iteration + * should be flipped */ + bool iter_ends_odd_backslash = + add_overflow(bs_bits, odd_starts, &odd_carries); + + odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a + * potential end if we had an + * odd-numbered run at the + * end of the previous + * iteration */ + prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; + uint64_t even_carry_ends = even_carries & ~bs_bits; + uint64_t odd_carry_ends = odd_carries & ~bs_bits; + uint64_t even_start_odd_end = even_carry_ends & odd_bits; + uint64_t odd_start_even_end = odd_carry_ends & even_bits; + uint64_t odd_ends = even_start_odd_end | odd_start_even_end; + return odd_ends; +} + +// return both the quote mask (which is a half-open mask that covers the first +// quote +// in an unescaped quote pair and everything in the quote pair) and the quote +// bits, which are the simple +// unescaped quoted bits. We also update the prev_iter_inside_quote value to +// tell the next iteration +// whether we finished the final iteration inside a quote pair; if so, this +// inverts our behavior of +// whether we're inside quotes for the next iteration. +// Note that we don't do any error checking to see if we have backslash +// sequences outside quotes; these +// backslash sequences (of any length) will be detected elsewhere. +template <> +really_inline uint64_t find_quote_mask_and_bits( + simd_input in, uint64_t odd_ends, + uint64_t &prev_iter_inside_quote, uint64_t "e_bits, + uint64_t &error_mask) { + quote_bits = cmp_mask_against_input(in, '"'); + quote_bits = quote_bits & ~odd_ends; + uint64_t quote_mask = compute_quote_mask(quote_bits); + quote_mask ^= prev_iter_inside_quote; + /* All Unicode characters may be placed within the + * quotation marks, except for the characters that MUST be escaped: + * quotation mark, reverse solidus, and the control characters (U+0000 + * through U+001F). + * https://tools.ietf.org/html/rfc8259 */ + uint64_t unescaped = + unsigned_lteq_against_input(in, 0x1F); + error_mask |= quote_mask & unescaped; + /* right shift of a signed value expected to be well-defined and standard + * compliant as of C++20, + * John Regher from Utah U. says this is fine code */ + prev_iter_inside_quote = + static_cast(static_cast(quote_mask) >> 63); + return quote_mask; +} + +// Find structural bits in a 64-byte chunk. +really_inline void find_structural_bits_64( + const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base, + uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, + uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, + uint64_t &error_mask, + utf8_checking_state &utf8_state) { + simd_input in = fill_input(buf); + check_utf8(in, utf8_state); + /* detect odd sequences of backslashes */ + uint64_t odd_ends = find_odd_backslash_sequences( + in, prev_iter_ends_odd_backslash); + + /* detect insides of quote pairs ("quote_mask") and also our quote_bits + * themselves */ + uint64_t quote_bits; + uint64_t quote_mask = find_quote_mask_and_bits( + in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); + + /* take the previous iterations structural bits, not our current + * iteration, + * and flatten */ + flatten_bits(base_ptr, base, idx, structurals); + + uint64_t whitespace; + find_whitespace_and_structurals(in, whitespace, + structurals); + + /* fixup structurals to reflect quotes and add pseudo-structural + * characters */ + structurals = finalize_structurals(structurals, whitespace, quote_mask, + quote_bits, prev_iter_ends_pseudo_pred); +} + +template <> +int find_structural_bits(const uint8_t *buf, size_t len, + ParsedJson &pj) { + if (len > pj.byte_capacity) { + std::cerr << "Your ParsedJson object only supports documents up to " + << pj.byte_capacity << " bytes but you are trying to process " + << len << " bytes" << std::endl; + return simdjson::CAPACITY; + } + uint32_t *base_ptr = pj.structural_indexes; + uint32_t base = 0; + utf8_checking_state utf8_state; + + /* we have padded the input out to 64 byte multiple with the remainder + * being zeros persistent state across loop does the last iteration end + * with an odd-length sequence of backslashes? */ + + /* either 0 or 1, but a 64-bit value */ + uint64_t prev_iter_ends_odd_backslash = 0ULL; + /* does the previous iteration end inside a double-quote pair? */ + uint64_t prev_iter_inside_quote = + 0ULL; /* either all zeros or all ones + * does the previous iteration end on something that is a + * predecessor of a pseudo-structural character - i.e. + * whitespace or a structural character effectively the very + * first char is considered to follow "whitespace" for the + * purposes of pseudo-structural character detection so we + * initialize to 1 */ + uint64_t prev_iter_ends_pseudo_pred = 1ULL; + + /* structurals are persistent state across loop as we flatten them on the + * subsequent iteration into our array pointed to be base_ptr. + * This is harmless on the first iteration as structurals==0 + * and is done for performance reasons; we can hide some of the latency of + * the + * expensive carryless multiply in the previous step with this work */ + uint64_t structurals = 0; + + size_t lenminus64 = len < 64 ? 0 : len - 64; + size_t idx = 0; + uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII + code points < 0x20) */ + + for (; idx < lenminus64; idx += 64) { + find_structural_bits_64(&buf[idx], idx, base_ptr, base, + prev_iter_ends_odd_backslash, + prev_iter_inside_quote, prev_iter_ends_pseudo_pred, + structurals, error_mask, utf8_state); + } + /* If we have a final chunk of less than 64 bytes, pad it to 64 with + * spaces before processing it (otherwise, we risk invalidating the UTF-8 + * checks). */ + if (idx < len) { + uint8_t tmp_buf[64]; + memset(tmp_buf, 0x20, 64); + memcpy(tmp_buf, buf + idx, len - idx); + find_structural_bits_64(&tmp_buf[0], idx, base_ptr, base, + prev_iter_ends_odd_backslash, + prev_iter_inside_quote, prev_iter_ends_pseudo_pred, + structurals, error_mask, utf8_state); + idx += 64; + } + + /* is last string quote closed? */ + if (prev_iter_inside_quote) { + return simdjson::UNCLOSED_STRING; + } + + /* finally, flatten out the remaining structurals from the last iteration + */ + flatten_bits(base_ptr, base, idx, structurals); + + pj.n_structural_indexes = base; + /* a valid JSON file cannot have zero structural indexes - we should have + * found something */ + if (pj.n_structural_indexes == 0u) { + return simdjson::EMPTY; + } + if (base_ptr[pj.n_structural_indexes - 1] > len) { + return simdjson::UNEXPECTED_ERROR; + } + if (len != base_ptr[pj.n_structural_indexes - 1]) { + /* the string might not be NULL terminated, but we add a virtual NULL + * ending + * character. */ + base_ptr[pj.n_structural_indexes++] = len; + } + /* make it safe to dereference one beyond this array */ + base_ptr[pj.n_structural_indexes] = 0; + if (error_mask) { + return simdjson::UNESCAPED_CHARS; + } + return check_utf8_errors(utf8_state); +} + +} // namespace simdjson +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION + +#endif // IS_ARM64 /* end file src/stage1_find_marks.cpp */ /* begin file src/stage2_build_tape.cpp */ +#ifdef IS_X86_64 +#define TARGETED_ARCHITECTURE Architecture::HASWELL +#define TARGETED_REGION TARGET_HASWELL +// This file contains the common code every implementation uses for stage2 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage2_build_tape.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION namespace simdjson { // this macro reads the next structural character, updating idx, i and c. @@ -522,537 +1520,1583 @@ namespace simdjson { * The JSON is parsed to a tape, see the accompanying tape.md file * for documentation. ***********/ -// We need to compile that code for multiple architectures. However, target -// attributes can be used only once by function definition. Huge macro seemed -// better than huge code duplication. int UNIFIED_MACHINE(const uint8_t *buf, -// size_t len, ParsedJson &pj) -#define UNIFIED_MACHINE(T, buf, len, pj) \ - { \ - if (ALLOW_SAME_PAGE_BUFFER_OVERRUN) { \ - memset((uint8_t *)buf + len, 0, \ - SIMDJSON_PADDING); /* to please valgrind */ \ - } \ - uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ \ - uint32_t \ - idx; /* location of the structural character in the input (buf) */ \ - uint8_t c; /* used to track the (structural) character we are looking at, \ - updated */ \ - /* by UPDATE_CHAR macro */ \ - uint32_t depth = 0; /* could have an arbitrary starting depth */ \ - pj.init(); /* sets is_valid to false */ \ - if (pj.byte_capacity < len) { \ - pj.error_code = simdjson::CAPACITY; \ - return pj.error_code; \ - } \ - \ - /*//////////////////////////// START STATE ///////////////////////////// \ - */ \ - SET_GOTO_START_CONTINUE() \ - pj.containing_scope_offset[depth] = pj.get_current_loc(); \ - pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ \ - /* the root is used, if nothing else, to capture the size of the tape */ \ - depth++; /* everything starts at depth = 1, depth = 0 is just for the \ - root, the root may contain an object, an array or something \ - else. */ \ - if (depth >= pj.depth_capacity) { \ - goto fail; \ - } \ - \ - UPDATE_CHAR(); \ - switch (c) { \ - case '{': \ - pj.containing_scope_offset[depth] = pj.get_current_loc(); \ - SET_GOTO_START_CONTINUE(); \ - depth++; \ - if (depth >= pj.depth_capacity) { \ - goto fail; \ - } \ - pj.write_tape( \ - 0, \ - c); /* strangely, moving this to object_begin slows things down */ \ - goto object_begin; \ - case '[': \ - pj.containing_scope_offset[depth] = pj.get_current_loc(); \ - SET_GOTO_START_CONTINUE(); \ - depth++; \ - if (depth >= pj.depth_capacity) { \ - goto fail; \ - } \ - pj.write_tape(0, c); \ - goto array_begin; \ - /* #define SIMDJSON_ALLOWANYTHINGINROOT \ - * A JSON text is a serialized value. Note that certain previous \ - * specifications of JSON constrained a JSON text to be an object or an \ - * array. Implementations that generate only objects or arrays where a \ - * JSON text is called for will be interoperable in the sense that all \ - * implementations will accept these as conforming JSON texts. \ - * https://tools.ietf.org/html/rfc8259 \ - * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ \ - case '"': { \ - if (!parse_string(buf, len, pj, depth, idx)) { \ - goto fail; \ - } \ - break; \ - } \ - case 't': { \ - /* we need to make a copy to make sure that the string is space \ - * terminated. \ - * this only applies to the JSON document made solely of the true value. \ - * this will almost never be called in practice */ \ - char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); \ - if (copy == nullptr) { \ - goto fail; \ - } \ - memcpy(copy, buf, len); \ - copy[len] = ' '; \ - if (!is_valid_true_atom(reinterpret_cast(copy) + \ - idx)) { \ - free(copy); \ - goto fail; \ - } \ - free(copy); \ - pj.write_tape(0, c); \ - break; \ - } \ - case 'f': { \ - /* we need to make a copy to make sure that the string is space \ - * terminated. \ - * this only applies to the JSON document made solely of the false \ - * value. \ - * this will almost never be called in practice */ \ - char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); \ - if (copy == nullptr) { \ - goto fail; \ - } \ - memcpy(copy, buf, len); \ - copy[len] = ' '; \ - if (!is_valid_false_atom(reinterpret_cast(copy) + \ - idx)) { \ - free(copy); \ - goto fail; \ - } \ - free(copy); \ - pj.write_tape(0, c); \ - break; \ - } \ - case 'n': { \ - /* we need to make a copy to make sure that the string is space \ - * terminated. \ - * this only applies to the JSON document made solely of the null value. \ - * this will almost never be called in practice */ \ - char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); \ - if (copy == nullptr) { \ - goto fail; \ - } \ - memcpy(copy, buf, len); \ - copy[len] = ' '; \ - if (!is_valid_null_atom(reinterpret_cast(copy) + \ - idx)) { \ - free(copy); \ - goto fail; \ - } \ - free(copy); \ - pj.write_tape(0, c); \ - break; \ - } \ - case '0': \ - case '1': \ - case '2': \ - case '3': \ - case '4': \ - case '5': \ - case '6': \ - case '7': \ - case '8': \ - case '9': { \ - /* we need to make a copy to make sure that the string is space \ - * terminated. \ - * this is done only for JSON documents made of a sole number \ - * this will almost never be called in practice. We terminate with a \ - * space \ - * because we do not want to allow NULLs in the middle of a number \ - * (whereas a \ - * space in the middle of a number would be identified in stage 1). */ \ - char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); \ - if (copy == nullptr) { \ - goto fail; \ - } \ - memcpy(copy, buf, len); \ - copy[len] = ' '; \ - if (!parse_number(reinterpret_cast(copy), pj, idx, \ - false)) { \ - free(copy); \ - goto fail; \ - } \ - free(copy); \ - break; \ - } \ - case '-': { \ - /* we need to make a copy to make sure that the string is NULL \ - * terminated. \ - * this is done only for JSON documents made of a sole number \ - * this will almost never be called in practice */ \ - char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); \ - if (copy == nullptr) { \ - goto fail; \ - } \ - memcpy(copy, buf, len); \ - copy[len] = ' '; \ - if (!parse_number(reinterpret_cast(copy), pj, idx, \ - true)) { \ - free(copy); \ - goto fail; \ - } \ - free(copy); \ - break; \ - } \ - default: \ - goto fail; \ - } \ - start_continue: \ - /* the string might not be NULL terminated. */ \ - if (i + 1 == pj.n_structural_indexes) { \ - goto succeed; \ - } else { \ - goto fail; \ - } \ - /*//////////////////////////// OBJECT STATES ///////////////////////////*/ \ - \ - object_begin: \ - UPDATE_CHAR(); \ - switch (c) { \ - case '"': { \ - if (!parse_string(buf, len, pj, depth, idx)) { \ - goto fail; \ - } \ - goto object_key_state; \ - } \ - case '}': \ - goto scope_end; /* could also go to object_continue */ \ - default: \ - goto fail; \ - } \ - \ - object_key_state: \ - UPDATE_CHAR(); \ - if (c != ':') { \ - goto fail; \ - } \ - UPDATE_CHAR(); \ - switch (c) { \ - case '"': { \ - if (!parse_string(buf, len, pj, depth, idx)) { \ - goto fail; \ - } \ - break; \ - } \ - case 't': \ - if (!is_valid_true_atom(buf + idx)) { \ - goto fail; \ - } \ - pj.write_tape(0, c); \ - break; \ - case 'f': \ - if (!is_valid_false_atom(buf + idx)) { \ - goto fail; \ - } \ - pj.write_tape(0, c); \ - break; \ - case 'n': \ - if (!is_valid_null_atom(buf + idx)) { \ - goto fail; \ - } \ - pj.write_tape(0, c); \ - break; \ - case '0': \ - case '1': \ - case '2': \ - case '3': \ - case '4': \ - case '5': \ - case '6': \ - case '7': \ - case '8': \ - case '9': { \ - if (!parse_number(buf, pj, idx, false)) { \ - goto fail; \ - } \ - break; \ - } \ - case '-': { \ - if (!parse_number(buf, pj, idx, true)) { \ - goto fail; \ - } \ - break; \ - } \ - case '{': { \ - pj.containing_scope_offset[depth] = pj.get_current_loc(); \ - pj.write_tape(0, c); /* here the compilers knows what c is so this gets \ - optimized */ \ - /* we have not yet encountered } so we need to come back for it */ \ - SET_GOTO_OBJECT_CONTINUE() \ - /* we found an object inside an object, so we need to increment the \ - * depth */ \ - depth++; \ - if (depth >= pj.depth_capacity) { \ - goto fail; \ - } \ - \ - goto object_begin; \ - } \ - case '[': { \ - pj.containing_scope_offset[depth] = pj.get_current_loc(); \ - pj.write_tape(0, c); /* here the compilers knows what c is so this gets \ - optimized */ \ - /* we have not yet encountered } so we need to come back for it */ \ - SET_GOTO_OBJECT_CONTINUE() \ - /* we found an array inside an object, so we need to increment the depth \ - */ \ - depth++; \ - if (depth >= pj.depth_capacity) { \ - goto fail; \ - } \ - goto array_begin; \ - } \ - default: \ - goto fail; \ - } \ - \ - object_continue: \ - UPDATE_CHAR(); \ - switch (c) { \ - case ',': \ - UPDATE_CHAR(); \ - if (c != '"') { \ - goto fail; \ - } else { \ - if (!parse_string(buf, len, pj, depth, idx)) { \ - goto fail; \ - } \ - goto object_key_state; \ - } \ - case '}': \ - goto scope_end; \ - default: \ - goto fail; \ - } \ - \ - /*//////////////////////////// COMMON STATE ///////////////////////////*/ \ - \ - scope_end: \ - /* write our tape location to the header scope */ \ - depth--; \ - pj.write_tape(pj.containing_scope_offset[depth], c); \ - pj.annotate_previous_loc(pj.containing_scope_offset[depth], \ - pj.get_current_loc()); \ - /* goto saved_state */ \ - GOTO_CONTINUE() \ - \ - /*//////////////////////////// ARRAY STATES ///////////////////////////*/ \ - array_begin: \ - UPDATE_CHAR(); \ - if (c == ']') { \ - goto scope_end; /* could also go to array_continue */ \ - } \ - \ - main_array_switch: \ - /* we call update char on all paths in, so we can peek at c on the \ - * on paths that can accept a close square brace (post-, and at start) */ \ - switch (c) { \ - case '"': { \ - if (!parse_string(buf, len, pj, depth, idx)) { \ - goto fail; \ - } \ - break; \ - } \ - case 't': \ - if (!is_valid_true_atom(buf + idx)) { \ - goto fail; \ - } \ - pj.write_tape(0, c); \ - break; \ - case 'f': \ - if (!is_valid_false_atom(buf + idx)) { \ - goto fail; \ - } \ - pj.write_tape(0, c); \ - break; \ - case 'n': \ - if (!is_valid_null_atom(buf + idx)) { \ - goto fail; \ - } \ - pj.write_tape(0, c); \ - break; /* goto array_continue; */ \ - \ - case '0': \ - case '1': \ - case '2': \ - case '3': \ - case '4': \ - case '5': \ - case '6': \ - case '7': \ - case '8': \ - case '9': { \ - if (!parse_number(buf, pj, idx, false)) { \ - goto fail; \ - } \ - break; /* goto array_continue; */ \ - } \ - case '-': { \ - if (!parse_number(buf, pj, idx, true)) { \ - goto fail; \ - } \ - break; /* goto array_continue; */ \ - } \ - case '{': { \ - /* we have not yet encountered ] so we need to come back for it */ \ - pj.containing_scope_offset[depth] = pj.get_current_loc(); \ - pj.write_tape(0, c); /* here the compilers knows what c is so this gets \ - optimized */ \ - SET_GOTO_ARRAY_CONTINUE() \ - /* we found an object inside an array, so we need to increment the depth \ - */ \ - depth++; \ - if (depth >= pj.depth_capacity) { \ - goto fail; \ - } \ - \ - goto object_begin; \ - } \ - case '[': { \ - /* we have not yet encountered ] so we need to come back for it */ \ - pj.containing_scope_offset[depth] = pj.get_current_loc(); \ - pj.write_tape(0, c); /* here the compilers knows what c is so this gets \ - optimized */ \ - SET_GOTO_ARRAY_CONTINUE() \ - /* we found an array inside an array, so we need to increment the depth \ - */ \ - depth++; \ - if (depth >= pj.depth_capacity) { \ - goto fail; \ - } \ - goto array_begin; \ - } \ - default: \ - goto fail; \ - } \ - \ - array_continue: \ - UPDATE_CHAR(); \ - switch (c) { \ - case ',': \ - UPDATE_CHAR(); \ - goto main_array_switch; \ - case ']': \ - goto scope_end; \ - default: \ - goto fail; \ - } \ - \ - /*//////////////////////////// FINAL STATES ///////////////////////////*/ \ - \ - succeed: \ - depth--; \ - if (depth != 0) { \ - fprintf(stderr, "internal bug\n"); \ - abort(); \ - } \ - if (pj.containing_scope_offset[depth] != 0) { \ - fprintf(stderr, "internal bug\n"); \ - abort(); \ - } \ - pj.annotate_previous_loc(pj.containing_scope_offset[depth], \ - pj.get_current_loc()); \ - pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ \ - \ - pj.valid = true; \ - pj.error_code = simdjson::SUCCESS; \ - return pj.error_code; \ - fail: \ - /* we do not need the next line because this is done by pj.init(), \ - * pessimistically. \ - * pj.is_valid = false; \ - * At this point in the code, we have all the time in the world. \ - * Note that we know exactly where we are in the document so we could, \ - * without any overhead on the processing code, report a specific \ - * location. \ - * We could even trigger special code paths to assess what happened \ - * carefully, \ - * all without any added cost. */ \ - if (depth >= pj.depth_capacity) { \ - pj.error_code = simdjson::DEPTH_ERROR; \ - return pj.error_code; \ - } \ - switch (c) { \ - case '"': \ - pj.error_code = simdjson::STRING_ERROR; \ - return pj.error_code; \ - case '0': \ - case '1': \ - case '2': \ - case '3': \ - case '4': \ - case '5': \ - case '6': \ - case '7': \ - case '8': \ - case '9': \ - case '-': \ - pj.error_code = simdjson::NUMBER_ERROR; \ - return pj.error_code; \ - case 't': \ - pj.error_code = simdjson::T_ATOM_ERROR; \ - return pj.error_code; \ - case 'n': \ - pj.error_code = simdjson::N_ATOM_ERROR; \ - return pj.error_code; \ - case 'f': \ - pj.error_code = simdjson::F_ATOM_ERROR; \ - return pj.error_code; \ - default: \ - break; \ - } \ - pj.error_code = simdjson::TAPE_ERROR; \ - return pj.error_code; \ +template <> +WARN_UNUSED int +unified_machine(const uint8_t *buf, size_t len, + ParsedJson &pj) { + uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ + uint32_t idx; /* location of the structural character in the input (buf) */ + uint8_t c; /* used to track the (structural) character we are looking at, + updated */ + /* by UPDATE_CHAR macro */ + uint32_t depth = 0; /* could have an arbitrary starting depth */ + pj.init(); /* sets is_valid to false */ + if (pj.byte_capacity < len) { + pj.error_code = simdjson::CAPACITY; + return pj.error_code; } -} // namespace simdjson + /*//////////////////////////// START STATE ///////////////////////////// + */ + SET_GOTO_START_CONTINUE() + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ + /* the root is used, if nothing else, to capture the size of the tape */ + depth++; /* everything starts at depth = 1, depth = 0 is just for the + root, the root may contain an object, an array or something + else. */ + if (depth >= pj.depth_capacity) { + goto fail; + } -#ifdef IS_X86_64 -TARGET_HASWELL + UPDATE_CHAR(); + switch (c) { + case '{': + pj.containing_scope_offset[depth] = pj.get_current_loc(); + SET_GOTO_START_CONTINUE(); + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + pj.write_tape( + 0, c); /* strangely, moving this to object_begin slows things down */ + goto object_begin; + case '[': + pj.containing_scope_offset[depth] = pj.get_current_loc(); + SET_GOTO_START_CONTINUE(); + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + pj.write_tape(0, c); + goto array_begin; + /* #define SIMDJSON_ALLOWANYTHINGINROOT + * A JSON text is a serialized value. Note that certain previous + * specifications of JSON constrained a JSON text to be an object or an + * array. Implementations that generate only objects or arrays where a + * JSON text is called for will be interoperable in the sense that all + * implementations will accept these as conforming JSON texts. + * https://tools.ietf.org/html/rfc8259 + * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the true value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_true_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case 'f': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the false + * value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_false_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case 'n': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the null value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_null_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this is done only for JSON documents made of a sole number + * this will almost never be called in practice. We terminate with a + * space + * because we do not want to allow NULLs in the middle of a number + * (whereas a + * space in the middle of a number would be identified in stage 1). */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!parse_number(reinterpret_cast(copy), pj, idx, + false)) { + free(copy); + goto fail; + } + free(copy); + break; + } + case '-': { + /* we need to make a copy to make sure that the string is NULL + * terminated. + * this is done only for JSON documents made of a sole number + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!parse_number(reinterpret_cast(copy), pj, idx, true)) { + free(copy); + goto fail; + } + free(copy); + break; + } + default: + goto fail; + } +start_continue: + /* the string might not be NULL terminated. */ + if (i + 1 == pj.n_structural_indexes) { + goto succeed; + } else { + goto fail; + } + /*//////////////////////////// OBJECT STATES ///////////////////////////*/ + +object_begin: + UPDATE_CHAR(); + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + goto object_key_state; + } + case '}': + goto scope_end; /* could also go to object_continue */ + default: + goto fail; + } + +object_key_state: + UPDATE_CHAR(); + if (c != ':') { + goto fail; + } + UPDATE_CHAR(); + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': + if (!is_valid_true_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'f': + if (!is_valid_false_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'n': + if (!is_valid_null_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (!parse_number(buf, pj, idx, false)) { + goto fail; + } + break; + } + case '-': { + if (!parse_number(buf, pj, idx, true)) { + goto fail; + } + break; + } + case '{': { + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + /* we have not yet encountered } so we need to come back for it */ + SET_GOTO_OBJECT_CONTINUE() + /* we found an object inside an object, so we need to increment the + * depth */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + + goto object_begin; + } + case '[': { + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + /* we have not yet encountered } so we need to come back for it */ + SET_GOTO_OBJECT_CONTINUE() + /* we found an array inside an object, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + goto array_begin; + } + default: + goto fail; + } + +object_continue: + UPDATE_CHAR(); + switch (c) { + case ',': + UPDATE_CHAR(); + if (c != '"') { + goto fail; + } else { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + goto object_key_state; + } + case '}': + goto scope_end; + default: + goto fail; + } + + /*//////////////////////////// COMMON STATE ///////////////////////////*/ + +scope_end: + /* write our tape location to the header scope */ + depth--; + pj.write_tape(pj.containing_scope_offset[depth], c); + pj.annotate_previous_loc(pj.containing_scope_offset[depth], + pj.get_current_loc()); + /* goto saved_state */ + GOTO_CONTINUE() + + /*//////////////////////////// ARRAY STATES ///////////////////////////*/ +array_begin: + UPDATE_CHAR(); + if (c == ']') { + goto scope_end; /* could also go to array_continue */ + } + +main_array_switch: + /* we call update char on all paths in, so we can peek at c on the + * on paths that can accept a close square brace (post-, and at start) */ + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': + if (!is_valid_true_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'f': + if (!is_valid_false_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'n': + if (!is_valid_null_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; /* goto array_continue; */ + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (!parse_number(buf, pj, idx, false)) { + goto fail; + } + break; /* goto array_continue; */ + } + case '-': { + if (!parse_number(buf, pj, idx, true)) { + goto fail; + } + break; /* goto array_continue; */ + } + case '{': { + /* we have not yet encountered ] so we need to come back for it */ + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + SET_GOTO_ARRAY_CONTINUE() + /* we found an object inside an array, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + + goto object_begin; + } + case '[': { + /* we have not yet encountered ] so we need to come back for it */ + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + SET_GOTO_ARRAY_CONTINUE() + /* we found an array inside an array, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + goto array_begin; + } + default: + goto fail; + } + +array_continue: + UPDATE_CHAR(); + switch (c) { + case ',': + UPDATE_CHAR(); + goto main_array_switch; + case ']': + goto scope_end; + default: + goto fail; + } + + /*//////////////////////////// FINAL STATES ///////////////////////////*/ + +succeed: + depth--; + if (depth != 0) { + fprintf(stderr, "internal bug\n"); + abort(); + } + if (pj.containing_scope_offset[depth] != 0) { + fprintf(stderr, "internal bug\n"); + abort(); + } + pj.annotate_previous_loc(pj.containing_scope_offset[depth], + pj.get_current_loc()); + pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ + + pj.valid = true; + pj.error_code = simdjson::SUCCESS; + return pj.error_code; +fail: + /* we do not need the next line because this is done by pj.init(), + * pessimistically. + * pj.is_valid = false; + * At this point in the code, we have all the time in the world. + * Note that we know exactly where we are in the document so we could, + * without any overhead on the processing code, report a specific + * location. + * We could even trigger special code paths to assess what happened + * carefully, + * all without any added cost. */ + if (depth >= pj.depth_capacity) { + pj.error_code = simdjson::DEPTH_ERROR; + return pj.error_code; + } + switch (c) { + case '"': + pj.error_code = simdjson::STRING_ERROR; + return pj.error_code; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + pj.error_code = simdjson::NUMBER_ERROR; + return pj.error_code; + case 't': + pj.error_code = simdjson::T_ATOM_ERROR; + return pj.error_code; + case 'n': + pj.error_code = simdjson::N_ATOM_ERROR; + return pj.error_code; + case 'f': + pj.error_code = simdjson::F_ATOM_ERROR; + return pj.error_code; + default: + break; + } + pj.error_code = simdjson::TAPE_ERROR; + return pj.error_code; +} + +} // namespace simdjson +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION + +#define TARGETED_ARCHITECTURE Architecture::WESTMERE +#define TARGETED_REGION TARGET_WESTMERE +// This file contains the common code every implementation uses for stage2 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage2_build_tape.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION namespace simdjson { + +// this macro reads the next structural character, updating idx, i and c. +#define UPDATE_CHAR() \ + { \ + idx = pj.structural_indexes[i++]; \ + c = buf[idx]; \ + } + +#ifdef SIMDJSON_USE_COMPUTED_GOTO +#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue; +#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue; +#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue; +#define GOTO_CONTINUE() goto *pj.ret_address[depth]; +#else +#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a'; +#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o'; +#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's'; +#define GOTO_CONTINUE() \ + { \ + if (pj.ret_address[depth] == 'a') { \ + goto array_continue; \ + } else if (pj.ret_address[depth] == 'o') { \ + goto object_continue; \ + } else { \ + goto start_continue; \ + } \ + } +#endif + +/************ + * The JSON is parsed to a tape, see the accompanying tape.md file + * for documentation. + ***********/ template <> -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int -unified_machine(const uint8_t *buf, size_t len, +WARN_UNUSED int +unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) { - UNIFIED_MACHINE(Architecture::HASWELL, buf, len, pj); + uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ + uint32_t idx; /* location of the structural character in the input (buf) */ + uint8_t c; /* used to track the (structural) character we are looking at, + updated */ + /* by UPDATE_CHAR macro */ + uint32_t depth = 0; /* could have an arbitrary starting depth */ + pj.init(); /* sets is_valid to false */ + if (pj.byte_capacity < len) { + pj.error_code = simdjson::CAPACITY; + return pj.error_code; + } + + /*//////////////////////////// START STATE ///////////////////////////// + */ + SET_GOTO_START_CONTINUE() + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ + /* the root is used, if nothing else, to capture the size of the tape */ + depth++; /* everything starts at depth = 1, depth = 0 is just for the + root, the root may contain an object, an array or something + else. */ + if (depth >= pj.depth_capacity) { + goto fail; + } + + UPDATE_CHAR(); + switch (c) { + case '{': + pj.containing_scope_offset[depth] = pj.get_current_loc(); + SET_GOTO_START_CONTINUE(); + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + pj.write_tape( + 0, c); /* strangely, moving this to object_begin slows things down */ + goto object_begin; + case '[': + pj.containing_scope_offset[depth] = pj.get_current_loc(); + SET_GOTO_START_CONTINUE(); + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + pj.write_tape(0, c); + goto array_begin; + /* #define SIMDJSON_ALLOWANYTHINGINROOT + * A JSON text is a serialized value. Note that certain previous + * specifications of JSON constrained a JSON text to be an object or an + * array. Implementations that generate only objects or arrays where a + * JSON text is called for will be interoperable in the sense that all + * implementations will accept these as conforming JSON texts. + * https://tools.ietf.org/html/rfc8259 + * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the true value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_true_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case 'f': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the false + * value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_false_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case 'n': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the null value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_null_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this is done only for JSON documents made of a sole number + * this will almost never be called in practice. We terminate with a + * space + * because we do not want to allow NULLs in the middle of a number + * (whereas a + * space in the middle of a number would be identified in stage 1). */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!parse_number(reinterpret_cast(copy), pj, idx, + false)) { + free(copy); + goto fail; + } + free(copy); + break; + } + case '-': { + /* we need to make a copy to make sure that the string is NULL + * terminated. + * this is done only for JSON documents made of a sole number + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!parse_number(reinterpret_cast(copy), pj, idx, true)) { + free(copy); + goto fail; + } + free(copy); + break; + } + default: + goto fail; + } +start_continue: + /* the string might not be NULL terminated. */ + if (i + 1 == pj.n_structural_indexes) { + goto succeed; + } else { + goto fail; + } + /*//////////////////////////// OBJECT STATES ///////////////////////////*/ + +object_begin: + UPDATE_CHAR(); + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + goto object_key_state; + } + case '}': + goto scope_end; /* could also go to object_continue */ + default: + goto fail; + } + +object_key_state: + UPDATE_CHAR(); + if (c != ':') { + goto fail; + } + UPDATE_CHAR(); + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': + if (!is_valid_true_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'f': + if (!is_valid_false_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'n': + if (!is_valid_null_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (!parse_number(buf, pj, idx, false)) { + goto fail; + } + break; + } + case '-': { + if (!parse_number(buf, pj, idx, true)) { + goto fail; + } + break; + } + case '{': { + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + /* we have not yet encountered } so we need to come back for it */ + SET_GOTO_OBJECT_CONTINUE() + /* we found an object inside an object, so we need to increment the + * depth */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + + goto object_begin; + } + case '[': { + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + /* we have not yet encountered } so we need to come back for it */ + SET_GOTO_OBJECT_CONTINUE() + /* we found an array inside an object, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + goto array_begin; + } + default: + goto fail; + } + +object_continue: + UPDATE_CHAR(); + switch (c) { + case ',': + UPDATE_CHAR(); + if (c != '"') { + goto fail; + } else { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + goto object_key_state; + } + case '}': + goto scope_end; + default: + goto fail; + } + + /*//////////////////////////// COMMON STATE ///////////////////////////*/ + +scope_end: + /* write our tape location to the header scope */ + depth--; + pj.write_tape(pj.containing_scope_offset[depth], c); + pj.annotate_previous_loc(pj.containing_scope_offset[depth], + pj.get_current_loc()); + /* goto saved_state */ + GOTO_CONTINUE() + + /*//////////////////////////// ARRAY STATES ///////////////////////////*/ +array_begin: + UPDATE_CHAR(); + if (c == ']') { + goto scope_end; /* could also go to array_continue */ + } + +main_array_switch: + /* we call update char on all paths in, so we can peek at c on the + * on paths that can accept a close square brace (post-, and at start) */ + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': + if (!is_valid_true_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'f': + if (!is_valid_false_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'n': + if (!is_valid_null_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; /* goto array_continue; */ + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (!parse_number(buf, pj, idx, false)) { + goto fail; + } + break; /* goto array_continue; */ + } + case '-': { + if (!parse_number(buf, pj, idx, true)) { + goto fail; + } + break; /* goto array_continue; */ + } + case '{': { + /* we have not yet encountered ] so we need to come back for it */ + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + SET_GOTO_ARRAY_CONTINUE() + /* we found an object inside an array, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + + goto object_begin; + } + case '[': { + /* we have not yet encountered ] so we need to come back for it */ + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + SET_GOTO_ARRAY_CONTINUE() + /* we found an array inside an array, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + goto array_begin; + } + default: + goto fail; + } + +array_continue: + UPDATE_CHAR(); + switch (c) { + case ',': + UPDATE_CHAR(); + goto main_array_switch; + case ']': + goto scope_end; + default: + goto fail; + } + + /*//////////////////////////// FINAL STATES ///////////////////////////*/ + +succeed: + depth--; + if (depth != 0) { + fprintf(stderr, "internal bug\n"); + abort(); + } + if (pj.containing_scope_offset[depth] != 0) { + fprintf(stderr, "internal bug\n"); + abort(); + } + pj.annotate_previous_loc(pj.containing_scope_offset[depth], + pj.get_current_loc()); + pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ + + pj.valid = true; + pj.error_code = simdjson::SUCCESS; + return pj.error_code; +fail: + /* we do not need the next line because this is done by pj.init(), + * pessimistically. + * pj.is_valid = false; + * At this point in the code, we have all the time in the world. + * Note that we know exactly where we are in the document so we could, + * without any overhead on the processing code, report a specific + * location. + * We could even trigger special code paths to assess what happened + * carefully, + * all without any added cost. */ + if (depth >= pj.depth_capacity) { + pj.error_code = simdjson::DEPTH_ERROR; + return pj.error_code; + } + switch (c) { + case '"': + pj.error_code = simdjson::STRING_ERROR; + return pj.error_code; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + pj.error_code = simdjson::NUMBER_ERROR; + return pj.error_code; + case 't': + pj.error_code = simdjson::T_ATOM_ERROR; + return pj.error_code; + case 'n': + pj.error_code = simdjson::N_ATOM_ERROR; + return pj.error_code; + case 'f': + pj.error_code = simdjson::F_ATOM_ERROR; + return pj.error_code; + default: + break; + } + pj.error_code = simdjson::TAPE_ERROR; + return pj.error_code; } + } // namespace simdjson UNTARGET_REGION -TARGET_WESTMERE -namespace simdjson { -template <> -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int -unified_machine(const uint8_t *buf, size_t len, - ParsedJson &pj) { - UNIFIED_MACHINE(Architecture::WESTMERE, buf, len, pj); -} -} // namespace simdjson -UNTARGET_REGION +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION #endif // IS_X86_64 #ifdef IS_ARM64 +#define TARGETED_ARCHITECTURE Architecture::ARM64 +#define TARGETED_REGION TARGET_ARM64 +// This file contains the common code every implementation uses for stage2 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stage2_build_tape.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION namespace simdjson { -template <> -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int -unified_machine(const uint8_t *buf, size_t len, - ParsedJson &pj) { - UNIFIED_MACHINE(Architecture::ARM64, buf, len, pj); -} -} // namespace simdjson + +// this macro reads the next structural character, updating idx, i and c. +#define UPDATE_CHAR() \ + { \ + idx = pj.structural_indexes[i++]; \ + c = buf[idx]; \ + } + +#ifdef SIMDJSON_USE_COMPUTED_GOTO +#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue; +#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue; +#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue; +#define GOTO_CONTINUE() goto *pj.ret_address[depth]; +#else +#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a'; +#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o'; +#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's'; +#define GOTO_CONTINUE() \ + { \ + if (pj.ret_address[depth] == 'a') { \ + goto array_continue; \ + } else if (pj.ret_address[depth] == 'o') { \ + goto object_continue; \ + } else { \ + goto start_continue; \ + } \ + } #endif + +/************ + * The JSON is parsed to a tape, see the accompanying tape.md file + * for documentation. + ***********/ +template <> +WARN_UNUSED int +unified_machine(const uint8_t *buf, size_t len, + ParsedJson &pj) { + uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ + uint32_t idx; /* location of the structural character in the input (buf) */ + uint8_t c; /* used to track the (structural) character we are looking at, + updated */ + /* by UPDATE_CHAR macro */ + uint32_t depth = 0; /* could have an arbitrary starting depth */ + pj.init(); /* sets is_valid to false */ + if (pj.byte_capacity < len) { + pj.error_code = simdjson::CAPACITY; + return pj.error_code; + } + + /*//////////////////////////// START STATE ///////////////////////////// + */ + SET_GOTO_START_CONTINUE() + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ + /* the root is used, if nothing else, to capture the size of the tape */ + depth++; /* everything starts at depth = 1, depth = 0 is just for the + root, the root may contain an object, an array or something + else. */ + if (depth >= pj.depth_capacity) { + goto fail; + } + + UPDATE_CHAR(); + switch (c) { + case '{': + pj.containing_scope_offset[depth] = pj.get_current_loc(); + SET_GOTO_START_CONTINUE(); + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + pj.write_tape( + 0, c); /* strangely, moving this to object_begin slows things down */ + goto object_begin; + case '[': + pj.containing_scope_offset[depth] = pj.get_current_loc(); + SET_GOTO_START_CONTINUE(); + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + pj.write_tape(0, c); + goto array_begin; + /* #define SIMDJSON_ALLOWANYTHINGINROOT + * A JSON text is a serialized value. Note that certain previous + * specifications of JSON constrained a JSON text to be an object or an + * array. Implementations that generate only objects or arrays where a + * JSON text is called for will be interoperable in the sense that all + * implementations will accept these as conforming JSON texts. + * https://tools.ietf.org/html/rfc8259 + * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the true value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_true_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case 'f': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the false + * value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_false_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case 'n': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this only applies to the JSON document made solely of the null value. + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!is_valid_null_atom(reinterpret_cast(copy) + idx)) { + free(copy); + goto fail; + } + free(copy); + pj.write_tape(0, c); + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + /* we need to make a copy to make sure that the string is space + * terminated. + * this is done only for JSON documents made of a sole number + * this will almost never be called in practice. We terminate with a + * space + * because we do not want to allow NULLs in the middle of a number + * (whereas a + * space in the middle of a number would be identified in stage 1). */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!parse_number(reinterpret_cast(copy), pj, idx, + false)) { + free(copy); + goto fail; + } + free(copy); + break; + } + case '-': { + /* we need to make a copy to make sure that the string is NULL + * terminated. + * this is done only for JSON documents made of a sole number + * this will almost never be called in practice */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + goto fail; + } + memcpy(copy, buf, len); + copy[len] = ' '; + if (!parse_number(reinterpret_cast(copy), pj, idx, true)) { + free(copy); + goto fail; + } + free(copy); + break; + } + default: + goto fail; + } +start_continue: + /* the string might not be NULL terminated. */ + if (i + 1 == pj.n_structural_indexes) { + goto succeed; + } else { + goto fail; + } + /*//////////////////////////// OBJECT STATES ///////////////////////////*/ + +object_begin: + UPDATE_CHAR(); + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + goto object_key_state; + } + case '}': + goto scope_end; /* could also go to object_continue */ + default: + goto fail; + } + +object_key_state: + UPDATE_CHAR(); + if (c != ':') { + goto fail; + } + UPDATE_CHAR(); + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': + if (!is_valid_true_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'f': + if (!is_valid_false_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'n': + if (!is_valid_null_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (!parse_number(buf, pj, idx, false)) { + goto fail; + } + break; + } + case '-': { + if (!parse_number(buf, pj, idx, true)) { + goto fail; + } + break; + } + case '{': { + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + /* we have not yet encountered } so we need to come back for it */ + SET_GOTO_OBJECT_CONTINUE() + /* we found an object inside an object, so we need to increment the + * depth */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + + goto object_begin; + } + case '[': { + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + /* we have not yet encountered } so we need to come back for it */ + SET_GOTO_OBJECT_CONTINUE() + /* we found an array inside an object, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + goto array_begin; + } + default: + goto fail; + } + +object_continue: + UPDATE_CHAR(); + switch (c) { + case ',': + UPDATE_CHAR(); + if (c != '"') { + goto fail; + } else { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + goto object_key_state; + } + case '}': + goto scope_end; + default: + goto fail; + } + + /*//////////////////////////// COMMON STATE ///////////////////////////*/ + +scope_end: + /* write our tape location to the header scope */ + depth--; + pj.write_tape(pj.containing_scope_offset[depth], c); + pj.annotate_previous_loc(pj.containing_scope_offset[depth], + pj.get_current_loc()); + /* goto saved_state */ + GOTO_CONTINUE() + + /*//////////////////////////// ARRAY STATES ///////////////////////////*/ +array_begin: + UPDATE_CHAR(); + if (c == ']') { + goto scope_end; /* could also go to array_continue */ + } + +main_array_switch: + /* we call update char on all paths in, so we can peek at c on the + * on paths that can accept a close square brace (post-, and at start) */ + switch (c) { + case '"': { + if (!parse_string(buf, len, pj, depth, idx)) { + goto fail; + } + break; + } + case 't': + if (!is_valid_true_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'f': + if (!is_valid_false_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; + case 'n': + if (!is_valid_null_atom(buf + idx)) { + goto fail; + } + pj.write_tape(0, c); + break; /* goto array_continue; */ + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (!parse_number(buf, pj, idx, false)) { + goto fail; + } + break; /* goto array_continue; */ + } + case '-': { + if (!parse_number(buf, pj, idx, true)) { + goto fail; + } + break; /* goto array_continue; */ + } + case '{': { + /* we have not yet encountered ] so we need to come back for it */ + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + SET_GOTO_ARRAY_CONTINUE() + /* we found an object inside an array, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + + goto object_begin; + } + case '[': { + /* we have not yet encountered ] so we need to come back for it */ + pj.containing_scope_offset[depth] = pj.get_current_loc(); + pj.write_tape(0, c); /* here the compilers knows what c is so this gets + optimized */ + SET_GOTO_ARRAY_CONTINUE() + /* we found an array inside an array, so we need to increment the depth + */ + depth++; + if (depth >= pj.depth_capacity) { + goto fail; + } + goto array_begin; + } + default: + goto fail; + } + +array_continue: + UPDATE_CHAR(); + switch (c) { + case ',': + UPDATE_CHAR(); + goto main_array_switch; + case ']': + goto scope_end; + default: + goto fail; + } + + /*//////////////////////////// FINAL STATES ///////////////////////////*/ + +succeed: + depth--; + if (depth != 0) { + fprintf(stderr, "internal bug\n"); + abort(); + } + if (pj.containing_scope_offset[depth] != 0) { + fprintf(stderr, "internal bug\n"); + abort(); + } + pj.annotate_previous_loc(pj.containing_scope_offset[depth], + pj.get_current_loc()); + pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ + + pj.valid = true; + pj.error_code = simdjson::SUCCESS; + return pj.error_code; +fail: + /* we do not need the next line because this is done by pj.init(), + * pessimistically. + * pj.is_valid = false; + * At this point in the code, we have all the time in the world. + * Note that we know exactly where we are in the document so we could, + * without any overhead on the processing code, report a specific + * location. + * We could even trigger special code paths to assess what happened + * carefully, + * all without any added cost. */ + if (depth >= pj.depth_capacity) { + pj.error_code = simdjson::DEPTH_ERROR; + return pj.error_code; + } + switch (c) { + case '"': + pj.error_code = simdjson::STRING_ERROR; + return pj.error_code; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + pj.error_code = simdjson::NUMBER_ERROR; + return pj.error_code; + case 't': + pj.error_code = simdjson::T_ATOM_ERROR; + return pj.error_code; + case 'n': + pj.error_code = simdjson::N_ATOM_ERROR; + return pj.error_code; + case 'f': + pj.error_code = simdjson::F_ATOM_ERROR; + return pj.error_code; + default: + break; + } + pj.error_code = simdjson::TAPE_ERROR; + return pj.error_code; +} + +} // namespace simdjson +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION +#endif // IS_ARM64 /* end file src/stage2_build_tape.cpp */ /* begin file src/parsedjson.cpp */ diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index 2d5ca358..a16fe67b 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Sun Aug 4 15:43:41 EDT 2019. Do not edit! */ +/* auto-generated on Wed Aug 14 10:31:26 DST 2019. Do not edit! */ /* begin file include/simdjson/simdjson_version.h */ // /include/simdjson/simdjson_version.h automatically generated by release.py, // do not change by hand @@ -44,16 +44,20 @@ enum { #define TARGET_REGION(T) \ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T))) #define UNTARGET_REGION _Pragma("GCC pop_options") -#else +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef TARGET_REGION #define TARGET_REGION(T) #define UNTARGET_REGION -#endif // clang then gcc +#endif // under GCC and CLANG, we use these two macros #define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul") #define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul") - -#endif // x86 +#define TARGET_ARM64 #ifdef _MSC_VER #include @@ -65,6 +69,14 @@ enum { #endif #endif +#if defined(__clang__) +#define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define NO_SANITIZE_UNDEFINED +#endif + #ifdef _MSC_VER /* Microsoft C/C++-compatible compiler */ #include @@ -119,7 +131,7 @@ static inline bool mul_overflow(uint64_t value1, uint64_t value2, } /* result might be undefined when input_num is zero */ -static inline int trailing_zeroes(uint64_t input_num) { +static inline NO_SANITIZE_UNDEFINED int trailing_zeroes(uint64_t input_num) { #ifdef __BMI__ // tzcnt is BMI1 return _tzcnt_u64(input_num); #else @@ -343,8 +355,8 @@ static inline uint32_t detect_supported_architectures() { #endif /* end file include/simdjson/isadetection.h */ /* begin file include/simdjson/simdjson.h */ -#ifndef SIMDJSON_ERR_H -#define SIMDJSON_ERR_H +#ifndef SIMDJSON_SIMDJSON_H +#define SIMDJSON_SIMDJSON_H #include @@ -386,7 +398,7 @@ enum ErrorValues { }; const std::string &error_message(const int); } // namespace simdjson -#endif +#endif // SIMDJSON_SIMDJSON_H /* end file include/simdjson/simdjson.h */ /* begin file include/simdjson/common_defs.h */ #ifndef SIMDJSON_COMMON_DEFS_H @@ -433,38 +445,9 @@ const std::string &error_message(const int); #define unlikely(x) x #endif -// For Visual Studio compilers, same-page buffer overrun is not fine. -#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false #else -// For non-Visual Studio compilers, we may assume that same-page buffer overrun -// is fine. However, it will make it difficult to be "valgrind clean". -//#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN -//#define ALLOW_SAME_PAGE_BUFFER_OVERRUN true -//#else -#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false -//#endif - -// The following is likely unnecessarily complex. -#ifdef __SANITIZE_ADDRESS__ -// we have GCC, stuck with https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 -#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false -#elif defined(__has_feature) -// we have CLANG? -// todo: if we're setting ALLOW_SAME_PAGE_BUFFER_OVERRUN to false, why do we -// have a non-empty qualifier? -#if (__has_feature(address_sanitizer)) -#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER \ - __attribute__((no_sanitize("address"))) -#endif -#endif - -#if defined(__has_feature) -#if (__has_feature(memory_sanitizer)) -#define LENIENT_MEM_SANITIZER __attribute__((no_sanitize("memory"))) -#endif -#endif #define really_inline inline __attribute__((always_inline, unused)) #define never_inline inline __attribute__((noinline, unused)) @@ -481,14 +464,6 @@ const std::string &error_message(const int); #endif // MSC_VER -// if it does not apply, make it an empty macro -#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER -#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER -#endif -#ifndef LENIENT_MEM_SANITIZER -#define LENIENT_MEM_SANITIZER -#endif - #endif // SIMDJSON_COMMON_DEFS_H /* end file include/simdjson/common_defs.h */ /* begin file include/simdjson/padded_string.h */ @@ -1064,7 +1039,6 @@ static inline void print_with_escapes(const char *src, std::ostream &os, } } // namespace simdjson -# #endif /* end file include/simdjson/jsonformatutils.h */ /* begin file include/simdjson/jsonioutil.h */ @@ -36174,6 +36148,250 @@ static const uint32_t mask256_epi32[] = { } #endif /* end file include/simdjson/simdprune_tables.h */ +/* begin file include/simdjson/simd_input.h */ +#ifndef SIMDJSON_SIMD_INPUT_H +#define SIMDJSON_SIMD_INPUT_H + +#include + +namespace simdjson { + +template struct simd_input; + +template +simd_input fill_input(const uint8_t *ptr); + +// a straightforward comparison of a mask against input. +template +uint64_t cmp_mask_against_input(simd_input in, uint8_t m); + +// find all values less than or equal than the content of maxval (using unsigned +// arithmetic) +template +uint64_t unsigned_lteq_against_input(simd_input in, uint8_t m); + +} // namespace simdjson + +#endif +/* end file include/simdjson/simd_input.h */ +/* begin file include/simdjson/simd_input_haswell.h */ +#ifndef SIMDJSON_SIMD_INPUT_HASWELL_H +#define SIMDJSON_SIMD_INPUT_HASWELL_H + + +#ifdef IS_X86_64 + +TARGET_HASWELL +namespace simdjson { + +template <> +struct simd_input { + __m256i lo; + __m256i hi; +}; + +template <> +really_inline simd_input +fill_input(const uint8_t *ptr) { + struct simd_input in; + in.lo = _mm256_loadu_si256(reinterpret_cast(ptr + 0)); + in.hi = _mm256_loadu_si256(reinterpret_cast(ptr + 32)); + return in; +} + +template <> +really_inline uint64_t cmp_mask_against_input( + simd_input in, uint8_t m) { + const __m256i mask = _mm256_set1_epi8(m); + __m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); +} + +template <> +really_inline uint64_t unsigned_lteq_against_input( + simd_input in, uint8_t m) { + const __m256i maxval = _mm256_set1_epi8(m); + __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.lo), maxval); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.hi), maxval); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); +} + +} // namespace simdjson +UNTARGET_REGION + +#endif // IS_X86_64 +#endif // SIMDJSON_SIMD_INPUT_HASWELL_H +/* end file include/simdjson/simd_input_haswell.h */ +/* begin file include/simdjson/simd_input_westmere.h */ +#ifndef SIMDJSON_SIMD_INPUT_WESTMERE_H +#define SIMDJSON_SIMD_INPUT_WESTMERE_H + + +#ifdef IS_X86_64 + +TARGET_WESTMERE +namespace simdjson { + +template <> +struct simd_input { + __m128i v0; + __m128i v1; + __m128i v2; + __m128i v3; +}; + +template <> +really_inline simd_input +fill_input(const uint8_t *ptr) { + struct simd_input in; + in.v0 = _mm_loadu_si128(reinterpret_cast(ptr + 0)); + in.v1 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); + in.v2 = _mm_loadu_si128(reinterpret_cast(ptr + 32)); + in.v3 = _mm_loadu_si128(reinterpret_cast(ptr + 48)); + return in; +} + +template <> +really_inline uint64_t cmp_mask_against_input( + simd_input in, uint8_t m) { + const __m128i mask = _mm_set1_epi8(m); + __m128i cmp_res_0 = _mm_cmpeq_epi8(in.v0, mask); + uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); + __m128i cmp_res_1 = _mm_cmpeq_epi8(in.v1, mask); + uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); + __m128i cmp_res_2 = _mm_cmpeq_epi8(in.v2, mask); + uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); + __m128i cmp_res_3 = _mm_cmpeq_epi8(in.v3, mask); + uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); + return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); +} + +template <> +really_inline uint64_t unsigned_lteq_against_input( + simd_input in, uint8_t m) { + const __m128i maxval = _mm_set1_epi8(m); + __m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v0), maxval); + uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); + __m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v1), maxval); + uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); + __m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v2), maxval); + uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); + __m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v3), maxval); + uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); + return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); +} + +} // namespace simdjson +UNTARGET_REGION + +#endif // IS_X86_64 +#endif // SIMDJSON_SIMD_INPUT_WESTMERE_H +/* end file include/simdjson/simd_input_westmere.h */ +/* begin file include/simdjson/simd_input_arm64.h */ +#ifndef SIMDJSON_SIMD_INPUT_ARM64_H +#define SIMDJSON_SIMD_INPUT_ARM64_H + + +#ifdef IS_ARM64 +namespace simdjson { + +template <> +struct simd_input { + uint8x16_t i0; + uint8x16_t i1; + uint8x16_t i2; + uint8x16_t i3; +}; + +template <> +really_inline simd_input +fill_input(const uint8_t *ptr) { + struct simd_input in; + in.i0 = vld1q_u8(ptr + 0); + in.i1 = vld1q_u8(ptr + 16); + in.i2 = vld1q_u8(ptr + 32); + in.i3 = vld1q_u8(ptr + 48); + return in; +} + +really_inline uint16_t neon_movemask(uint8x16_t input) { + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); +} + +really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, + uint8x16_t p2, uint8x16_t p3) { + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; + uint8x16_t t0 = vandq_u8(p0, bit_mask); + uint8x16_t t1 = vandq_u8(p1, bit_mask); + uint8x16_t t2 = vandq_u8(p2, bit_mask); + uint8x16_t t3 = vandq_u8(p3, bit_mask); + uint8x16_t sum0 = vpaddq_u8(t0, t1); + uint8x16_t sum1 = vpaddq_u8(t2, t3); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); +} + +template <> +really_inline uint64_t cmp_mask_against_input( + simd_input in, uint8_t m) { + const uint8x16_t mask = vmovq_n_u8(m); + uint8x16_t cmp_res_0 = vceqq_u8(in.i0, mask); + uint8x16_t cmp_res_1 = vceqq_u8(in.i1, mask); + uint8x16_t cmp_res_2 = vceqq_u8(in.i2, mask); + uint8x16_t cmp_res_3 = vceqq_u8(in.i3, mask); + return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); +} + +template <> +really_inline uint64_t unsigned_lteq_against_input( + simd_input in, uint8_t m) { + const uint8x16_t mask = vmovq_n_u8(m); + uint8x16_t cmp_res_0 = vcleq_u8(in.i0, mask); + uint8x16_t cmp_res_1 = vcleq_u8(in.i1, mask); + uint8x16_t cmp_res_2 = vcleq_u8(in.i2, mask); + uint8x16_t cmp_res_3 = vcleq_u8(in.i3, mask); + return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); +} + +} // namespace simdjson + +#endif // IS_ARM64 +#endif // SIMDJSON_SIMD_INPUT_ARM64_H +/* end file include/simdjson/simd_input_arm64.h */ +/* begin file include/simdjson/simdutf8check.h */ +#ifndef SIMDJSON_SIMDUTF8CHECK_H +#define SIMDJSON_SIMDUTF8CHECK_H + + +namespace simdjson { + +// Holds the state required to perform check_utf8(). +template struct utf8_checking_state; + +template +void check_utf8(simd_input in, utf8_checking_state &state); + +// Checks if the utf8 validation has found any error. +template +ErrorValues check_utf8_errors(utf8_checking_state &state); + +} // namespace simdjson + +#endif // SIMDJSON_SIMDUTF8CHECK_H +/* end file include/simdjson/simdutf8check.h */ /* begin file include/simdjson/simdutf8check_haswell.h */ #ifndef SIMDJSON_SIMDUTF8CHECK_HASWELL_H #define SIMDJSON_SIMDUTF8CHECK_HASWELL_H @@ -36366,6 +36584,48 @@ avx_check_utf8_bytes(__m256i current_bytes, previous->high_nibbles, has_error); return pb; } + +template <> struct utf8_checking_state { + __m256i has_error; + avx_processed_utf_bytes previous; + utf8_checking_state() { + has_error = _mm256_setzero_si256(); + previous.raw_bytes = _mm256_setzero_si256(); + previous.high_nibbles = _mm256_setzero_si256(); + previous.carried_continuations = _mm256_setzero_si256(); + } +}; + +template <> +really_inline void check_utf8( + simd_input in, + utf8_checking_state &state) { + __m256i high_bit = _mm256_set1_epi8(0x80u); + if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { + // it is ascii, we just check continuation + state.has_error = _mm256_or_si256( + _mm256_cmpgt_epi8(state.previous.carried_continuations, + _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 1)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = + avx_check_utf8_bytes(in.lo, &(state.previous), &(state.has_error)); + state.previous = + avx_check_utf8_bytes(in.hi, &(state.previous), &(state.has_error)); + } +} + +template <> +really_inline ErrorValues check_utf8_errors( + utf8_checking_state &state) { + return _mm256_testz_si256(state.has_error, state.has_error) == 0 + ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; +} + } // namespace simdjson UNTARGET_REGION // haswell @@ -36536,6 +36796,61 @@ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous, previous->high_nibbles, has_error); return pb; } + +template <> +struct utf8_checking_state { + __m128i has_error = _mm_setzero_si128(); + processed_utf_bytes previous{ + _mm_setzero_si128(), // raw_bytes + _mm_setzero_si128(), // high_nibbles + _mm_setzero_si128() // carried_continuations + }; +}; + +template <> +really_inline void check_utf8( + simd_input in, + utf8_checking_state &state) { + __m128i high_bit = _mm_set1_epi8(0x80u); + if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { + // it is ascii, we just check continuation + state.has_error = + _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, + _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 1)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = + check_utf8_bytes(in.v0, &(state.previous), &(state.has_error)); + state.previous = + check_utf8_bytes(in.v1, &(state.previous), &(state.has_error)); + } + + if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { + // it is ascii, we just check continuation + state.has_error = + _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, + _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 1)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = + check_utf8_bytes(in.v2, &(state.previous), &(state.has_error)); + state.previous = + check_utf8_bytes(in.v3, &(state.previous), &(state.has_error)); + } +} + +template <> +really_inline ErrorValues check_utf8_errors( + utf8_checking_state &state) { + return _mm_testz_si128(state.has_error, state.has_error) == 0 + ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; +} + } // namespace simdjson UNTARGET_REGION // westmere @@ -36721,6 +37036,64 @@ check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous, previous->high_nibbles, has_error); return pb; } + +template <> +struct utf8_checking_state { + int8x16_t has_error{}; + processed_utf_bytes previous{}; +}; + +// Checks that all bytes are ascii +really_inline bool check_ascii_neon(simd_input in) { + // checking if the most significant bit is always equal to 0. + uint8x16_t high_bit = vdupq_n_u8(0x80); + uint8x16_t t0 = vorrq_u8(in.i0, in.i1); + uint8x16_t t1 = vorrq_u8(in.i2, in.i3); + uint8x16_t t3 = vorrq_u8(t0, t1); + uint8x16_t t4 = vandq_u8(t3, high_bit); + uint64x2_t v64 = vreinterpretq_u64_u8(t4); + uint32x2_t v32 = vqmovn_u64(v64); + uint64x1_t result = vreinterpret_u64_u32(v32); + return vget_lane_u64(result, 0) == 0; +} + +template <> +really_inline void check_utf8( + simd_input in, + utf8_checking_state &state) { + if (check_ascii_neon(in)) { + // All bytes are ascii. Therefore the byte that was just before must be + // ascii too. We only check the byte that was just before simd_input. Nines + // are arbitrary values. + const int8x16_t verror = + (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1}; + state.has_error = + vorrq_s8(vreinterpretq_s8_u8( + vcgtq_s8(state.previous.carried_continuations, verror)), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0), + &(state.previous), &(state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1), + &(state.previous), &(state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2), + &(state.previous), &(state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3), + &(state.previous), &(state.has_error)); + } +} + +template <> +really_inline ErrorValues check_utf8_errors( + utf8_checking_state &state) { + uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); + uint32x2_t v32 = vqmovn_u64(v64); + uint64x1_t result = vreinterpret_u64_u32(v32); + return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR + : simdjson::SUCCESS; +} + } // namespace simdjson #endif #endif @@ -37306,8 +37679,6 @@ bool ParsedJson::Iterator::next() { namespace simdjson { -template struct simd_input; - template uint64_t compute_quote_mask(uint64_t quote_bits); namespace { @@ -37333,17 +37704,6 @@ void check_utf8(simd_input in, utf8_checking_state &state); template ErrorValues check_utf8_errors(utf8_checking_state &state); -// a straightforward comparison of a mask against input. -template -uint64_t cmp_mask_against_input(simd_input in, uint8_t m); - -template simd_input fill_input(const uint8_t *ptr); - -// find all values less than or equal than the content of maxval (using unsigned -// arithmetic) -template -uint64_t unsigned_lteq_against_input(simd_input in, uint8_t m); - template really_inline uint64_t find_odd_backslash_sequences( simd_input in, uint64_t &prev_iter_ends_odd_backslash); @@ -37413,439 +37773,19 @@ int find_structural_bits(const char *buf, size_t len, return find_structural_bits((const uint8_t *)buf, len, pj); } +// flatten out values in 'bits' assuming that they are are to have values of idx +// plus their position in the bitvector, and store these indexes at +// base_ptr[base] incrementing base as we go +// will potentially store extra values beyond end of valid bits, so base_ptr +// needs to be large enough to handle this +template +really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, + uint32_t idx, uint64_t bits); + } // namespace simdjson #endif /* end file include/simdjson/stage1_find_marks.h */ -/* begin file include/simdjson/stage1_find_marks_flatten.h */ -#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H -#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H - -namespace simdjson { - -#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking -// -// This is just a naive implementation. It should be normally -// disable, but can be used for research purposes to compare -// again our optimized version. -really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, - uint32_t idx, uint64_t bits) { - uint32_t *out_ptr = base_ptr + base; - idx -= 64; - while (bits != 0) { - out_ptr[0] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - out_ptr++; - } - base = (out_ptr - base_ptr); -} - -#else -// flatten out values in 'bits' assuming that they are are to have values of idx -// plus their position in the bitvector, and store these indexes at -// base_ptr[base] incrementing base as we go -// will potentially store extra values beyond end of valid bits, so base_ptr -// needs to be large enough to handle this -really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, - uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; - uint32_t cnt = hamming(bits); - uint32_t next_base = base + cnt; - idx -= 64; - base_ptr += base; - { - base_ptr[0] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[1] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[2] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[3] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[4] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[5] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[6] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[7] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr += 8; - } - // We hope that the next branch is easily predicted. - if (cnt > 8) { - base_ptr[0] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[1] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[2] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[3] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[4] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[5] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[6] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr[7] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr += 8; - } - if (cnt > 16) { // unluckly: we rarely get here - // since it means having one structural or pseudo-structral element - // every 4 characters (possible with inputs like "","","",...). - do { - base_ptr[0] = idx + trailing_zeroes(bits); - bits = bits & (bits - 1); - base_ptr++; - } while (bits != 0); - } - base = next_base; -} -#endif // SIMDJSON_NAIVE_FLATTEN -} // namespace simdjson - -#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H/* end file include/simdjson/stage1_find_marks_flatten.h */ -/* begin file include/simdjson/stage1_find_marks_flatten_haswell.h */ -#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H -#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H - -// This file provides the same function as -// stage1_find_marks_flatten.h, but uses Intel intrinsics. -// This should provide better performance on Visual Studio -// and other compilers that do a conservative optimization. - -// Specifically, on x64 processors with BMI, -// x & (x - 1) should be mapped to -// the blsr instruction. By using the -// _blsr_u64 intrinsic, we -// ensure that this will happen. -///////// - - -#ifdef IS_X86_64 - -TARGET_HASWELL -namespace simdjson { -namespace haswell { - -// flatten out values in 'bits' assuming that they are are to have values of idx -// plus their position in the bitvector, and store these indexes at -// base_ptr[base] incrementing base as we go -// will potentially store extra values beyond end of valid bits, so base_ptr -// needs to be large enough to handle this -really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, - uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; - uint32_t cnt = _mm_popcnt_u64(bits); - uint32_t next_base = base + cnt; - idx -= 64; - base_ptr += base; - { - base_ptr[0] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[1] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[2] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[3] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[4] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[5] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[6] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[7] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr += 8; - } - // We hope that the next branch is easily predicted. - if (cnt > 8) { - base_ptr[0] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[1] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[2] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[3] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[4] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[5] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[6] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr[7] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr += 8; - } - if (cnt > 16) { // unluckly: we rarely get here - // since it means having one structural or pseudo-structral element - // every 4 characters (possible with inputs like "","","",...). - do { - base_ptr[0] = idx + trailing_zeroes(bits); - bits = _blsr_u64(bits); - base_ptr++; - } while (bits != 0); - } - base = next_base; -} -} // namespace haswell -} // namespace simdjson -UNTARGET_REGION -#endif // IS_X86_64 -#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H -/* end file include/simdjson/stage1_find_marks_flatten_haswell.h */ -/* begin file include/simdjson/stage1_find_marks_macros.h */ -#ifndef SIMDJSON_STAGE1_FIND_MARKS_MACROS_H -#define SIMDJSON_STAGE1_FIND_MARKS_MACROS_H - -// return a bitvector indicating where we have characters that end an odd-length -// sequence of backslashes (and thus change the behavior of the next character -// to follow). A even-length sequence of backslashes, and, for that matter, the -// largest even-length prefix of our odd-length sequence of backslashes, simply -// modify the behavior of the backslashes themselves. -// We also update the prev_iter_ends_odd_backslash reference parameter to -// indicate whether we end an iteration on an odd-length sequence of -// backslashes, which modifies our subsequent search for odd-length -// sequences of backslashes in an obvious way. -// We need to compile that code for multiple architectures. However, target -// attributes can be used only once by function definition. Huge macro seemed -// better than huge code duplication. uint64_t -// FIND_ODD_BACKSLASH_SEQUENCES(Architecture T, simd_input in, uint64_t -// &prev_iter_ends_odd_backslash) -#define FIND_ODD_BACKSLASH_SEQUENCES(T, in, prev_iter_ends_odd_backslash) \ - { \ - const uint64_t even_bits = 0x5555555555555555ULL; \ - const uint64_t odd_bits = ~even_bits; \ - uint64_t bs_bits = cmp_mask_against_input(in, '\\'); \ - uint64_t start_edges = bs_bits & ~(bs_bits << 1); \ - /* flip lowest if we have an odd-length run at the end of the prior \ - * iteration */ \ - uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; \ - uint64_t even_starts = start_edges & even_start_mask; \ - uint64_t odd_starts = start_edges & ~even_start_mask; \ - uint64_t even_carries = bs_bits + even_starts; \ - \ - uint64_t odd_carries; \ - /* must record the carry-out of our odd-carries out of bit 63; this \ - * indicates whether the sense of any edge going to the next iteration \ - * should be flipped */ \ - bool iter_ends_odd_backslash = \ - add_overflow(bs_bits, odd_starts, &odd_carries); \ - \ - odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a \ - * potential end if we had an \ - * odd-numbered run at the \ - * end of the previous \ - * iteration */ \ - prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; \ - uint64_t even_carry_ends = even_carries & ~bs_bits; \ - uint64_t odd_carry_ends = odd_carries & ~bs_bits; \ - uint64_t even_start_odd_end = even_carry_ends & odd_bits; \ - uint64_t odd_start_even_end = odd_carry_ends & even_bits; \ - uint64_t odd_ends = even_start_odd_end | odd_start_even_end; \ - return odd_ends; \ - } - -// return both the quote mask (which is a half-open mask that covers the first -// quote -// in an unescaped quote pair and everything in the quote pair) and the quote -// bits, which are the simple -// unescaped quoted bits. We also update the prev_iter_inside_quote value to -// tell the next iteration -// whether we finished the final iteration inside a quote pair; if so, this -// inverts our behavior of -// whether we're inside quotes for the next iteration. -// Note that we don't do any error checking to see if we have backslash -// sequences outside quotes; these -// backslash sequences (of any length) will be detected elsewhere. -// We need to compile that code for multiple architectures. However, target -// attributes can be used only once by function definition. Huge macro seemed -// better than huge code duplication. uint64_t -// FIND_QUOTE_MASK_AND_BITS(Architecture T, simd_input in, uint64_t odd_ends, -// uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t -// &error_mask) -#define FIND_QUOTE_MASK_AND_BITS(T, in, odd_ends, prev_iter_inside_quote, \ - quote_bits, error_mask) \ - { \ - quote_bits = cmp_mask_against_input(in, '"'); \ - quote_bits = quote_bits & ~odd_ends; \ - uint64_t quote_mask = compute_quote_mask(quote_bits); \ - quote_mask ^= prev_iter_inside_quote; \ - /* All Unicode characters may be placed within the \ - * quotation marks, except for the characters that MUST be escaped: \ - * quotation mark, reverse solidus, and the control characters (U+0000 \ - * through U+001F). \ - * https://tools.ietf.org/html/rfc8259 */ \ - uint64_t unescaped = unsigned_lteq_against_input(in, 0x1F); \ - error_mask |= quote_mask & unescaped; \ - /* right shift of a signed value expected to be well-defined and standard \ - * compliant as of C++20, \ - * John Regher from Utah U. says this is fine code */ \ - prev_iter_inside_quote = \ - static_cast(static_cast(quote_mask) >> 63); \ - return quote_mask; \ - } - -// Find structural bits in a 64-byte chunk. -// We need to compile that code for multiple architectures. However, target -// attributes can be used only once by function definition. Huge macro seemed -// better than huge code duplication. void FIND_STRUCTURAL_BITS_64( -// Architecture T, -// const uint8_t *buf, -// size_t idx, -// uint32_t *base_ptr, -// uint32_t &base, -// uint64_t &prev_iter_ends_odd_backslash, -// uint64_t &prev_iter_inside_quote, -// uint64_t &prev_iter_ends_pseudo_pred, -// uint64_t &structurals, -// uint64_t &error_mask, -// utf8_checking_state &utf8_state, flatten -// function) -#define FIND_STRUCTURAL_BITS_64( \ - T, buf, idx, base_ptr, base, prev_iter_ends_odd_backslash, \ - prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, \ - error_mask, utf8_state, flat) \ - { \ - simd_input in = fill_input(buf); \ - check_utf8(in, utf8_state); \ - /* detect odd sequences of backslashes */ \ - uint64_t odd_ends = \ - find_odd_backslash_sequences(in, prev_iter_ends_odd_backslash); \ - \ - /* detect insides of quote pairs ("quote_mask") and also our quote_bits \ - * themselves */ \ - uint64_t quote_bits; \ - uint64_t quote_mask = find_quote_mask_and_bits( \ - in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); \ - \ - /* take the previous iterations structural bits, not our current \ - * iteration, \ - * and flatten */ \ - flat(base_ptr, base, idx, structurals); \ - \ - uint64_t whitespace; \ - find_whitespace_and_structurals(in, whitespace, structurals); \ - \ - /* fixup structurals to reflect quotes and add pseudo-structural \ - * characters */ \ - structurals = \ - finalize_structurals(structurals, whitespace, quote_mask, quote_bits, \ - prev_iter_ends_pseudo_pred); \ - } - -// We need to compile that code for multiple architectures. However, target -// attributes can be used only once by function definition. Huge macro seemed -// better than huge code duplication. ErrorValues -// FIND_STRUCTURAL_BITS(Architecture T, const uint8_t *buf, size_t len, -// ParsedJson &pj, flatten function) -#define FIND_STRUCTURAL_BITS(T, buf, len, pj, flat) \ - { \ - if (len > pj.byte_capacity) { \ - std::cerr << "Your ParsedJson object only supports documents up to " \ - << pj.byte_capacity << " bytes but you are trying to process " \ - << len << " bytes" << std::endl; \ - return simdjson::CAPACITY; \ - } \ - uint32_t *base_ptr = pj.structural_indexes; \ - uint32_t base = 0; \ - utf8_checking_state utf8_state; \ - \ - /* we have padded the input out to 64 byte multiple with the remainder \ - * being zeros persistent state across loop does the last iteration end \ - * with an odd-length sequence of backslashes? */ \ - \ - /* either 0 or 1, but a 64-bit value */ \ - uint64_t prev_iter_ends_odd_backslash = 0ULL; \ - /* does the previous iteration end inside a double-quote pair? */ \ - uint64_t prev_iter_inside_quote = \ - 0ULL; /* either all zeros or all ones \ - * does the previous iteration end on something that is a \ - * predecessor of a pseudo-structural character - i.e. \ - * whitespace or a structural character effectively the very \ - * first char is considered to follow "whitespace" for the \ - * purposes of pseudo-structural character detection so we \ - * initialize to 1 */ \ - uint64_t prev_iter_ends_pseudo_pred = 1ULL; \ - \ - /* structurals are persistent state across loop as we flatten them on the \ - * subsequent iteration into our array pointed to be base_ptr. \ - * This is harmless on the first iteration as structurals==0 \ - * and is done for performance reasons; we can hide some of the latency of \ - * the \ - * expensive carryless multiply in the previous step with this work */ \ - uint64_t structurals = 0; \ - \ - size_t lenminus64 = len < 64 ? 0 : len - 64; \ - size_t idx = 0; \ - uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII \ - code points < 0x20) */ \ - \ - for (; idx < lenminus64; idx += 64) { \ - FIND_STRUCTURAL_BITS_64( \ - T, &buf[idx], idx, base_ptr, base, prev_iter_ends_odd_backslash, \ - prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, \ - error_mask, utf8_state, flat); \ - } \ - /* If we have a final chunk of less than 64 bytes, pad it to 64 with \ - * spaces before processing it (otherwise, we risk invalidating the UTF-8 \ - * checks). */ \ - if (idx < len) { \ - uint8_t tmp_buf[64]; \ - memset(tmp_buf, 0x20, 64); \ - memcpy(tmp_buf, buf + idx, len - idx); \ - FIND_STRUCTURAL_BITS_64( \ - T, &tmp_buf[0], idx, base_ptr, base, prev_iter_ends_odd_backslash, \ - prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, \ - error_mask, utf8_state, flat); \ - idx += 64; \ - } \ - \ - /* is last string quote closed? */ \ - if (prev_iter_inside_quote) { \ - return simdjson::UNCLOSED_STRING; \ - } \ - \ - /* finally, flatten out the remaining structurals from the last iteration \ - */ \ - flat(base_ptr, base, idx, structurals); \ - \ - pj.n_structural_indexes = base; \ - /* a valid JSON file cannot have zero structural indexes - we should have \ - * found something */ \ - if (pj.n_structural_indexes == 0u) { \ - return simdjson::EMPTY; \ - } \ - if (base_ptr[pj.n_structural_indexes - 1] > len) { \ - return simdjson::UNEXPECTED_ERROR; \ - } \ - if (len != base_ptr[pj.n_structural_indexes - 1]) { \ - /* the string might not be NULL terminated, but we add a virtual NULL \ - * ending \ - * character. */ \ - base_ptr[pj.n_structural_indexes++] = len; \ - } \ - /* make it safe to dereference one beyond this array */ \ - base_ptr[pj.n_structural_indexes] = 0; \ - if (error_mask) { \ - return simdjson::UNESCAPED_CHARS; \ - } \ - return check_utf8_errors(utf8_state); \ - } - -#endif // SIMDJSON_STAGE1_FIND_MARKS_MACROS_H/* end file include/simdjson/stage1_find_marks_macros.h */ /* begin file include/simdjson/stage1_find_marks_westmere.h */ #ifndef SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H #define SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H @@ -37855,23 +37795,6 @@ UNTARGET_REGION TARGET_WESTMERE namespace simdjson { -template <> struct simd_input { - __m128i v0; - __m128i v1; - __m128i v2; - __m128i v3; -}; - -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.v0 = _mm_loadu_si128(reinterpret_cast(ptr + 0)); - in.v1 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); - in.v2 = _mm_loadu_si128(reinterpret_cast(ptr + 32)); - in.v3 = _mm_loadu_si128(reinterpret_cast(ptr + 48)); - return in; -} template <> really_inline uint64_t @@ -37880,106 +37803,6 @@ compute_quote_mask(uint64_t quote_bits) { _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0)); } -template <> struct utf8_checking_state { - __m128i has_error = _mm_setzero_si128(); - processed_utf_bytes previous{ - _mm_setzero_si128(), // raw_bytes - _mm_setzero_si128(), // high_nibbles - _mm_setzero_si128() // carried_continuations - }; -}; - -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - __m128i high_bit = _mm_set1_epi8(0x80u); - if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = - _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, - _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - check_utf8_bytes(in.v0, &(state.previous), &(state.has_error)); - state.previous = - check_utf8_bytes(in.v1, &(state.previous), &(state.has_error)); - } - - if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = - _mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations, - _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - check_utf8_bytes(in.v2, &(state.previous), &(state.has_error)); - state.previous = - check_utf8_bytes(in.v3, &(state.previous), &(state.has_error)); - } -} - -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - return _mm_testz_si128(state.has_error, state.has_error) == 0 - ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} - -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const __m128i mask = _mm_set1_epi8(m); - __m128i cmp_res_0 = _mm_cmpeq_epi8(in.v0, mask); - uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); - __m128i cmp_res_1 = _mm_cmpeq_epi8(in.v1, mask); - uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); - __m128i cmp_res_2 = _mm_cmpeq_epi8(in.v2, mask); - uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); - __m128i cmp_res_3 = _mm_cmpeq_epi8(in.v3, mask); - uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); - return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); -} - -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const __m128i maxval = _mm_set1_epi8(m); - __m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v0), maxval); - uint64_t res_0 = _mm_movemask_epi8(cmp_res_0); - __m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v1), maxval); - uint64_t res_1 = _mm_movemask_epi8(cmp_res_1); - __m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v2), maxval); - uint64_t res_2 = _mm_movemask_epi8(cmp_res_2); - __m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v3), maxval); - uint64_t res_3 = _mm_movemask_epi8(cmp_res_3); - return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48); -} - -template <> -really_inline uint64_t find_odd_backslash_sequences( - simd_input in, - uint64_t &prev_iter_ends_odd_backslash) { - FIND_ODD_BACKSLASH_SEQUENCES(Architecture::WESTMERE, in, - prev_iter_ends_odd_backslash); -} - -template <> -really_inline uint64_t find_quote_mask_and_bits( - simd_input in, uint64_t odd_ends, - uint64_t &prev_iter_inside_quote, uint64_t "e_bits, - uint64_t &error_mask) { - FIND_QUOTE_MASK_AND_BITS(Architecture::WESTMERE, in, odd_ends, - prev_iter_inside_quote, quote_bits, error_mask) -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, @@ -38036,7 +37859,8 @@ really_inline void find_whitespace_and_structurals( UNTARGET_REGION #endif // IS_X86_64 -#endif // SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H/* end file include/simdjson/stage1_find_marks_westmere.h */ +#endif // SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H +/* end file include/simdjson/stage1_find_marks_westmere.h */ /* begin file include/simdjson/stage1_find_marks_haswell.h */ #ifndef SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H #define SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H @@ -38046,19 +37870,6 @@ UNTARGET_REGION TARGET_HASWELL namespace simdjson { -template <> struct simd_input { - __m256i lo; - __m256i hi; -}; - -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.lo = _mm256_loadu_si256(reinterpret_cast(ptr + 0)); - in.hi = _mm256_loadu_si256(reinterpret_cast(ptr + 32)); - return in; -} template <> really_inline uint64_t @@ -38070,86 +37881,6 @@ compute_quote_mask(uint64_t quote_bits) { return quote_mask; } -template <> struct utf8_checking_state { - __m256i has_error; - avx_processed_utf_bytes previous; - utf8_checking_state() { - has_error = _mm256_setzero_si256(); - previous.raw_bytes = _mm256_setzero_si256(); - previous.high_nibbles = _mm256_setzero_si256(); - previous.carried_continuations = _mm256_setzero_si256(); - } -}; - -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - __m256i high_bit = _mm256_set1_epi8(0x80u); - if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { - // it is ascii, we just check continuation - state.has_error = _mm256_or_si256( - _mm256_cmpgt_epi8(state.previous.carried_continuations, - _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 1)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = - avx_check_utf8_bytes(in.lo, &(state.previous), &(state.has_error)); - state.previous = - avx_check_utf8_bytes(in.hi, &(state.previous), &(state.has_error)); - } -} - -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - return _mm256_testz_si256(state.has_error, state.has_error) == 0 - ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} - -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const __m256i mask = _mm256_set1_epi8(m); - __m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask); - uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); - __m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask); - uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); - return res_0 | (res_1 << 32); -} - -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const __m256i maxval = _mm256_set1_epi8(m); - __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.lo), maxval); - uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); - __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.hi), maxval); - uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); - return res_0 | (res_1 << 32); -} - -template <> -really_inline uint64_t find_odd_backslash_sequences( - simd_input in, - uint64_t &prev_iter_ends_odd_backslash) { - FIND_ODD_BACKSLASH_SEQUENCES(Architecture::HASWELL, in, - prev_iter_ends_odd_backslash); -} - -template <> -really_inline uint64_t find_quote_mask_and_bits( - simd_input in, uint64_t odd_ends, - uint64_t &prev_iter_inside_quote, uint64_t "e_bits, - uint64_t &error_mask) { - FIND_QUOTE_MASK_AND_BITS(Architecture::HASWELL, in, odd_ends, - prev_iter_inside_quote, quote_bits, error_mask) -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, @@ -38243,7 +37974,8 @@ really_inline void find_whitespace_and_structurals( UNTARGET_REGION #endif // IS_X86_64 -#endif // SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H/* end file include/simdjson/stage1_find_marks_haswell.h */ +#endif // SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H +/* end file include/simdjson/stage1_find_marks_haswell.h */ /* begin file include/simdjson/stage1_find_marks_arm64.h */ #ifndef SIMDJSON_STAGE1_FIND_MARKS_ARM64_H #define SIMDJSON_STAGE1_FIND_MARKS_ARM64_H @@ -38251,48 +37983,6 @@ UNTARGET_REGION #ifdef IS_ARM64 namespace simdjson { -template <> struct simd_input { - uint8x16_t i0; - uint8x16_t i1; - uint8x16_t i2; - uint8x16_t i3; -}; - -template <> -really_inline simd_input -fill_input(const uint8_t *ptr) { - struct simd_input in; - in.i0 = vld1q_u8(ptr + 0); - in.i1 = vld1q_u8(ptr + 16); - in.i2 = vld1q_u8(ptr + 32); - in.i3 = vld1q_u8(ptr + 48); - return in; -} - -really_inline uint16_t neon_movemask(uint8x16_t input) { - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; - uint8x16_t minput = vandq_u8(input, bit_mask); - uint8x16_t tmp = vpaddq_u8(minput, minput); - tmp = vpaddq_u8(tmp, tmp); - tmp = vpaddq_u8(tmp, tmp); - return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); -} - -really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, - uint8x16_t p2, uint8x16_t p3) { - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; - uint8x16_t t0 = vandq_u8(p0, bit_mask); - uint8x16_t t1 = vandq_u8(p1, bit_mask); - uint8x16_t t2 = vandq_u8(p2, bit_mask); - uint8x16_t t3 = vandq_u8(p3, bit_mask); - uint8x16_t sum0 = vpaddq_u8(t0, t1); - uint8x16_t sum1 = vpaddq_u8(t2, t3); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); -} template <> really_inline uint64_t @@ -38304,101 +37994,6 @@ compute_quote_mask(uint64_t quote_bits) { #endif } -template <> struct utf8_checking_state { - int8x16_t has_error{}; - processed_utf_bytes previous{}; -}; - -// Checks that all bytes are ascii -really_inline bool check_ascii_neon(simd_input in) { - // checking if the most significant bit is always equal to 0. - uint8x16_t high_bit = vdupq_n_u8(0x80); - uint8x16_t t0 = vorrq_u8(in.i0, in.i1); - uint8x16_t t1 = vorrq_u8(in.i2, in.i3); - uint8x16_t t3 = vorrq_u8(t0, t1); - uint8x16_t t4 = vandq_u8(t3, high_bit); - uint64x2_t v64 = vreinterpretq_u64_u8(t4); - uint32x2_t v32 = vqmovn_u64(v64); - uint64x1_t result = vreinterpret_u64_u32(v32); - return vget_lane_u64(result, 0) == 0; -} - -template <> -really_inline void check_utf8( - simd_input in, - utf8_checking_state &state) { - if (check_ascii_neon(in)) { - // All bytes are ascii. Therefore the byte that was just before must be - // ascii too. We only check the byte that was just before simd_input. Nines - // are arbitrary values. - const int8x16_t verror = - (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1}; - state.has_error = - vorrq_s8(vreinterpretq_s8_u8( - vcgtq_s8(state.previous.carried_continuations, verror)), - state.has_error); - } else { - // it is not ascii so we have to do heavy work - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2), - &(state.previous), &(state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3), - &(state.previous), &(state.has_error)); - } -} - -template <> -really_inline ErrorValues check_utf8_errors( - utf8_checking_state &state) { - uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); - uint32x2_t v32 = vqmovn_u64(v64); - uint64x1_t result = vreinterpret_u64_u32(v32); - return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR - : simdjson::SUCCESS; -} - -template <> -really_inline uint64_t cmp_mask_against_input( - simd_input in, uint8_t m) { - const uint8x16_t mask = vmovq_n_u8(m); - uint8x16_t cmp_res_0 = vceqq_u8(in.i0, mask); - uint8x16_t cmp_res_1 = vceqq_u8(in.i1, mask); - uint8x16_t cmp_res_2 = vceqq_u8(in.i2, mask); - uint8x16_t cmp_res_3 = vceqq_u8(in.i3, mask); - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); -} - -template <> -really_inline uint64_t unsigned_lteq_against_input( - simd_input in, uint8_t m) { - const uint8x16_t mask = vmovq_n_u8(m); - uint8x16_t cmp_res_0 = vcleq_u8(in.i0, mask); - uint8x16_t cmp_res_1 = vcleq_u8(in.i1, mask); - uint8x16_t cmp_res_2 = vcleq_u8(in.i2, mask); - uint8x16_t cmp_res_3 = vcleq_u8(in.i3, mask); - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); -} - -template <> -really_inline uint64_t find_odd_backslash_sequences( - simd_input in, - uint64_t &prev_iter_ends_odd_backslash) { - FIND_ODD_BACKSLASH_SEQUENCES(Architecture::ARM64, in, - prev_iter_ends_odd_backslash); -} - -template <> -really_inline uint64_t find_quote_mask_and_bits( - simd_input in, uint64_t odd_ends, - uint64_t &prev_iter_inside_quote, uint64_t "e_bits, - uint64_t &error_mask) { - FIND_QUOTE_MASK_AND_BITS(Architecture::ARM64, in, odd_ends, - prev_iter_inside_quote, quote_bits, error_mask) -} - template <> really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, @@ -38450,7 +38045,8 @@ really_inline void find_whitespace_and_structurals( } // namespace simdjson #endif // IS_ARM64 -#endif // SIMDJSON_STAGE1_FIND_MARKS_ARM64_H/* end file include/simdjson/stage1_find_marks_arm64.h */ +#endif // SIMDJSON_STAGE1_FIND_MARKS_ARM64_H +/* end file include/simdjson/stage1_find_marks_arm64.h */ /* begin file include/simdjson/stringparsing.h */ #ifndef SIMDJSON_STRINGPARSING_H #define SIMDJSON_STRINGPARSING_H @@ -38541,7 +38137,7 @@ parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst); template -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER +WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset); @@ -38552,95 +38148,6 @@ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER #endif /* end file include/simdjson/stringparsing.h */ -/* begin file include/simdjson/stringparsing_macros.h */ -#ifndef SIMDJSON_STRINGPARSING_MACROS_H -#define SIMDJSON_STRINGPARSING_MACROS_H - -// We need to compile that code for multiple architectures. However, target -// attributes can be used only once by function definition. Huge macro seemed -// better than huge code duplication.รง -// bool PARSE_STRING(Architecture T, const uint8_t *buf, size_t len, ParsedJson -// &pj,const uint32_t depth, uint32_t offset) -#define PARSE_STRING(T, buf, len, pj, depth, offset) \ - { \ - pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); \ - const uint8_t *src = \ - &buf[offset + 1]; /* we know that buf at offset is a " */ \ - uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); \ - const uint8_t *const start_of_string = dst; \ - while (1) { \ - parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); \ - if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { \ - /* we encountered quotes first. Move dst to point to quotes and exit \ - */ \ - \ - /* find out where the quote is... */ \ - uint32_t quote_dist = trailing_zeroes(helper.quote_bits); \ - \ - /* NULL termination is still handy if you expect all your strings to \ - * be NULL terminated? */ \ - /* It comes at a small cost */ \ - dst[quote_dist] = 0; \ - \ - uint32_t str_length = (dst - start_of_string) + quote_dist; \ - memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); \ - /***************************** \ - * Above, check for overflow in case someone has a crazy string \ - * (>=4GB?) _ \ - * But only add the overflow check when the document itself exceeds \ - * 4GB \ - * Currently unneeded because we refuse to parse docs larger or equal \ - * to 4GB. \ - ****************************/ \ - \ - /* we advance the point, accounting for the fact that we have a NULL \ - * termination */ \ - pj.current_string_buf_loc = dst + quote_dist + 1; \ - return true; \ - } \ - if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { \ - /* find out where the backspace is */ \ - uint32_t bs_dist = trailing_zeroes(helper.bs_bits); \ - uint8_t escape_char = src[bs_dist + 1]; \ - /* we encountered backslash first. Handle backslash */ \ - if (escape_char == 'u') { \ - /* move src/dst up to the start; they will be further adjusted \ - within the unicode codepoint handling code. */ \ - src += bs_dist; \ - dst += bs_dist; \ - if (!handle_unicode_codepoint(&src, &dst)) { \ - return false; \ - } \ - } else { \ - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and \ - * write bs_dist+1 characters to output \ - * note this may reach beyond the part of the buffer we've actually \ - * seen. I think this is ok */ \ - uint8_t escape_result = escape_map[escape_char]; \ - if (escape_result == 0u) { \ - return false; /* bogus escape value is an error */ \ - } \ - dst[bs_dist] = escape_result; \ - src += bs_dist + 2; \ - dst += bs_dist + 1; \ - } \ - } else { \ - /* they are the same. Since they can't co-occur, it means we \ - * encountered neither. */ \ - if constexpr (T == Architecture::WESTMERE) { \ - src += 16; \ - dst += 16; \ - } else { \ - src += 32; \ - dst += 32; \ - } \ - } \ - } \ - /* can't be reached */ \ - return true; \ - } - -#endif/* end file include/simdjson/stringparsing_macros.h */ /* begin file include/simdjson/stringparsing_westmere.h */ #ifndef SIMDJSON_STRINGPARSING_WESTMERE_H #define SIMDJSON_STRINGPARSING_WESTMERE_H @@ -38666,21 +38173,122 @@ find_bs_bits_and_quote_bits(const uint8_t *src, static_cast(_mm_movemask_epi8(quote_mask)) // quote_bits }; } - -template <> -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER - really_inline bool - parse_string(UNUSED const uint8_t *buf, - UNUSED size_t len, ParsedJson &pj, - UNUSED const uint32_t depth, - UNUSED uint32_t offset) { - PARSE_STRING(Architecture::WESTMERE, buf, len, pj, depth, offset); -} } // namespace simdjson UNTARGET_REGION -#endif -#endif/* end file include/simdjson/stringparsing_westmere.h */ +#define TARGETED_ARCHITECTURE Architecture::WESTMERE +#define TARGETED_REGION TARGET_WESTMERE +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stringparsing.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION +namespace simdjson { + +template <> +WARN_UNUSED + really_inline bool + parse_string(UNUSED const uint8_t *buf, + UNUSED size_t len, ParsedJson &pj, + UNUSED const uint32_t depth, + UNUSED uint32_t offset) { + pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); + const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ + uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); + const uint8_t *const start_of_string = dst; + while (1) { + parse_string_helper helper = + find_bs_bits_and_quote_bits(src, dst); + if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { + /* we encountered quotes first. Move dst to point to quotes and exit + */ + + /* find out where the quote is... */ + uint32_t quote_dist = trailing_zeroes(helper.quote_bits); + + /* NULL termination is still handy if you expect all your strings to + * be NULL terminated? */ + /* It comes at a small cost */ + dst[quote_dist] = 0; + + uint32_t str_length = (dst - start_of_string) + quote_dist; + memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); + /***************************** + * Above, check for overflow in case someone has a crazy string + * (>=4GB?) _ + * But only add the overflow check when the document itself exceeds + * 4GB + * Currently unneeded because we refuse to parse docs larger or equal + * to 4GB. + ****************************/ + + /* we advance the point, accounting for the fact that we have a NULL + * termination */ + pj.current_string_buf_loc = dst + quote_dist + 1; + return true; + } + if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { + /* find out where the backspace is */ + uint32_t bs_dist = trailing_zeroes(helper.bs_bits); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return false; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return false; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + if constexpr (TARGETED_ARCHITECTURE == Architecture::WESTMERE) { + src += 16; + dst += 16; + } else { + src += 32; + dst += 32; + } + } + } + /* can't be reached */ + return true; +} + +} // namespace simdjson +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION + +#endif // IS_X86_64 + +#endif +/* end file include/simdjson/stringparsing_westmere.h */ /* begin file include/simdjson/stringparsing_haswell.h */ #ifndef SIMDJSON_STRINGPARSING_HASWELL_H #define SIMDJSON_STRINGPARSING_HASWELL_H @@ -38707,22 +38315,122 @@ find_bs_bits_and_quote_bits(const uint8_t *src, static_cast(_mm256_movemask_epi8(quote_mask)) // quote_bits }; } +} // namespace simdjson +UNTARGET_REGION + +#define TARGETED_ARCHITECTURE Architecture::HASWELL +#define TARGETED_REGION TARGET_HASWELL +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stringparsing.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION +namespace simdjson { template <> -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER +WARN_UNUSED really_inline bool - parse_string(UNUSED const uint8_t *buf, + parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset) { - PARSE_STRING(Architecture::HASWELL, buf, len, pj, depth, offset); + pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); + const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ + uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); + const uint8_t *const start_of_string = dst; + while (1) { + parse_string_helper helper = + find_bs_bits_and_quote_bits(src, dst); + if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { + /* we encountered quotes first. Move dst to point to quotes and exit + */ + + /* find out where the quote is... */ + uint32_t quote_dist = trailing_zeroes(helper.quote_bits); + + /* NULL termination is still handy if you expect all your strings to + * be NULL terminated? */ + /* It comes at a small cost */ + dst[quote_dist] = 0; + + uint32_t str_length = (dst - start_of_string) + quote_dist; + memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); + /***************************** + * Above, check for overflow in case someone has a crazy string + * (>=4GB?) _ + * But only add the overflow check when the document itself exceeds + * 4GB + * Currently unneeded because we refuse to parse docs larger or equal + * to 4GB. + ****************************/ + + /* we advance the point, accounting for the fact that we have a NULL + * termination */ + pj.current_string_buf_loc = dst + quote_dist + 1; + return true; + } + if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { + /* find out where the backspace is */ + uint32_t bs_dist = trailing_zeroes(helper.bs_bits); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return false; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return false; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + if constexpr (TARGETED_ARCHITECTURE == Architecture::WESTMERE) { + src += 16; + dst += 16; + } else { + src += 32; + dst += 32; + } + } + } + /* can't be reached */ + return true; } } // namespace simdjson UNTARGET_REGION -#endif -#endif/* end file include/simdjson/stringparsing_haswell.h */ +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION + +#endif // IS_X86_64 + +#endif +/* end file include/simdjson/stringparsing_haswell.h */ /* begin file include/simdjson/stringparsing_arm64.h */ #ifndef SIMDJSON_STRINGPARSING_ARM64_H #define SIMDJSON_STRINGPARSING_ARM64_H @@ -38766,17 +38474,118 @@ find_bs_bits_and_quote_bits(const uint8_t *src, }; } -template <> -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER - really_inline bool - parse_string(UNUSED const uint8_t *buf, - UNUSED size_t len, ParsedJson &pj, - UNUSED const uint32_t depth, - UNUSED uint32_t offset) { - PARSE_STRING(Architecture::ARM64, buf, len, pj, depth, offset); -} } // namespace simdjson -#endif + +#define TARGETED_ARCHITECTURE Architecture::ARM64 +#define TARGETED_REGION TARGET_ARM64 +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "simdjson/stringparsing.h" (this simplifies amalgation) + +#ifdef TARGETED_ARCHITECTURE +#ifdef TARGETED_REGION + +TARGETED_REGION +namespace simdjson { + +template <> +WARN_UNUSED + really_inline bool + parse_string(UNUSED const uint8_t *buf, + UNUSED size_t len, ParsedJson &pj, + UNUSED const uint32_t depth, + UNUSED uint32_t offset) { + pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); + const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ + uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); + const uint8_t *const start_of_string = dst; + while (1) { + parse_string_helper helper = + find_bs_bits_and_quote_bits(src, dst); + if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { + /* we encountered quotes first. Move dst to point to quotes and exit + */ + + /* find out where the quote is... */ + uint32_t quote_dist = trailing_zeroes(helper.quote_bits); + + /* NULL termination is still handy if you expect all your strings to + * be NULL terminated? */ + /* It comes at a small cost */ + dst[quote_dist] = 0; + + uint32_t str_length = (dst - start_of_string) + quote_dist; + memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); + /***************************** + * Above, check for overflow in case someone has a crazy string + * (>=4GB?) _ + * But only add the overflow check when the document itself exceeds + * 4GB + * Currently unneeded because we refuse to parse docs larger or equal + * to 4GB. + ****************************/ + + /* we advance the point, accounting for the fact that we have a NULL + * termination */ + pj.current_string_buf_loc = dst + quote_dist + 1; + return true; + } + if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { + /* find out where the backspace is */ + uint32_t bs_dist = trailing_zeroes(helper.bs_bits); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return false; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return false; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + if constexpr (TARGETED_ARCHITECTURE == Architecture::WESTMERE) { + src += 16; + dst += 16; + } else { + src += 32; + dst += 32; + } + } + } + /* can't be reached */ + return true; +} + +} // namespace simdjson +UNTARGET_REGION + +#else +#error TARGETED_REGION must be specified before including. +#endif // TARGETED_REGION +#else +#error TARGETED_ARCHITECTURE must be specified before including. +#endif // TARGETED_ARCHITECTURE +#undef TARGETED_ARCHITECTURE +#undef TARGETED_REGION + +#endif // IS_ARM64 #endif /* end file include/simdjson/stringparsing_arm64.h */ /* begin file include/simdjson/numberparsing.h */ @@ -39433,7 +39242,7 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) { } template -WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int +WARN_UNUSED int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj); template @@ -39463,38 +39272,12 @@ int json_parse_implementation(const uint8_t *buf, size_t len, ParsedJson &pj, } bool reallocated = false; if (realloc_if_needed) { -#if ALLOW_SAME_PAGE_BUFFER_OVERRUN -// realloc is needed if the end of the memory crosses a page -#ifdef _MSC_VER - SYSTEM_INFO sysInfo; - GetSystemInfo(&sysInfo); - long page_size = sysInfo.dwPageSize; -#else - long page_size = sysconf(_SC_PAGESIZE); -#endif - ////////////// - // We want to check that buf + len - 1 and buf + len - 1 + SIMDJSON_PADDING - // are in the same page. - // That is, we want to check that - // (buf + len - 1) / page_size == (buf + len - 1 + SIMDJSON_PADDING) / - // page_size That's true if (buf + len - 1) % page_size + SIMDJSON_PADDING < - // page_size. - /////////// - if ((reinterpret_cast(buf + len - 1) % page_size) + - SIMDJSON_PADDING < - static_cast(page_size)) { -#else // SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN - if (true) { // if not SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN, we always - // reallocate -#endif const uint8_t *tmp_buf = buf; buf = (uint8_t *)allocate_padded_buffer(len); if (buf == NULL) return simdjson::MEMALLOC; memcpy((void *)buf, tmp_buf, len); reallocated = true; - } // if (true) OR if ( (reinterpret_cast(buf + len - 1) % - // page_size ) + SIMDJSON_PADDING < static_cast(page_size) ) { } // if(realloc_if_needed) { int stage1_is_ok = simdjson::find_structural_bits(buf, len, pj); if (stage1_is_ok != simdjson::SUCCESS) {