diff --git a/benchmark/parse.cpp b/benchmark/parse.cpp index 2f61572f..db42fc35 100644 --- a/benchmark/parse.cpp +++ b/benchmark/parse.cpp @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { } #ifndef SQUASH_COUNTERS - printf("number of bytes %ld number of structural chars %d ratio %.3f\n", + printf("number of bytes %ld number of structural chars %u ratio %.3f\n", p.second, pj.n_structural_indexes, (double)pj.n_structural_indexes / p.second); unsigned long total = cy1 + cy2 + cy3; diff --git a/include/jsonparser/common_defs.h b/include/jsonparser/common_defs.h index 5faf4be3..cc96781d 100644 --- a/include/jsonparser/common_defs.h +++ b/include/jsonparser/common_defs.h @@ -40,7 +40,7 @@ typedef __m256i m256; #define unlikely(x) __builtin_expect(!!(x), 0) #endif -static inline u32 ctz64(u64 x) { +/*static inline u32 ctz64(u64 x) { assert(x); // behaviour not defined for x == 0 #if defined(_WIN64) unsigned long r; @@ -56,4 +56,4 @@ static inline u32 ctz64(u64 x) { #else return (u32)__builtin_ctzll(x); #endif -} +}*/ diff --git a/include/jsonparser/jsoncharutils.h b/include/jsonparser/jsoncharutils.h index 8512010f..d8725cd0 100644 --- a/include/jsonparser/jsoncharutils.h +++ b/include/jsonparser/jsoncharutils.h @@ -50,7 +50,7 @@ const char digittoval[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1}; // return true if we have a valid hex between 0000 and FFFF -inline bool hex_to_u32(const u8 *src, u32 *res) { +/*inline bool hex_to_u32(const u8 *src, u32 *res) { u8 v1 = src[0]; u8 v2 = src[1]; u8 v3 = src[2]; @@ -58,7 +58,7 @@ inline bool hex_to_u32(const u8 *src, u32 *res) { *res = digittoval[v1] << 12 | digittoval[v2] << 8 | digittoval[v3] << 4 | digittoval[v4]; return (int32_t)(*res) >= 0; -} +}*/ // returns a value with the highest bit set if it is not valud uint32_t hex_to_u32_nocheck(const u8 *src) { diff --git a/include/jsonparser/numberparsing.h b/include/jsonparser/numberparsing.h index ec58a022..7e15d02f 100644 --- a/include/jsonparser/numberparsing.h +++ b/include/jsonparser/numberparsing.h @@ -147,6 +147,8 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) { // // This function will almost never be called!!! // +// Note: a redesign could avoid this function entirely. +// static never_inline bool parse_highprecision_float(const u8 *const buf, UNUSED size_t len, ParsedJson &pj, UNUSED const u32 depth, const u32 offset, diff --git a/include/jsonparser/simdjson_internal.h b/include/jsonparser/simdjson_internal.h index 60558761..9d47fbbe 100644 --- a/include/jsonparser/simdjson_internal.h +++ b/include/jsonparser/simdjson_internal.h @@ -97,30 +97,32 @@ public: // // this should be considered a private function - void write_tape(u64 val, u8 c) { + inline void write_tape(u64 val, u8 c) { tape[current_loc++] = val | (((u64)c) << 56); //tape[tape_locs[depth]] = val | (((u64)c) << 56); //tape_locs[depth]++; } - void write_tape_s64(s64 i) { - *((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy + inline void write_tape_s64(s64 i) { + memcpy(current_number_buf_loc, &i, sizeof(s64)); + //*((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy current_number_buf_loc += sizeof(s64); write_tape(current_number_buf_loc - number_buf, 'l'); } - void write_tape_double(double d) { - *((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy + inline void write_tape_double(double d) { + memcpy(current_number_buf_loc, &d, sizeof(double)); + //*((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy current_number_buf_loc += sizeof(double); write_tape(current_number_buf_loc - number_buf, 'd'); } - u32 get_current_loc() { + inline u32 get_current_loc() { return current_loc; } - void annotate_previousloc(u32 saved_loc,u64 val) { + inline void annotate_previousloc(u32 saved_loc,u64 val) { tape[saved_loc] |= val; } @@ -167,7 +169,7 @@ public: #ifdef DEBUG -inline void dump256(m256 d, const std::string msg) { +inline void dump256(m256 d, const std::string& msg) { for (u32 i = 0; i < 32; i++) { std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i); if (!((i + 1) % 8)) @@ -181,14 +183,14 @@ inline void dump256(m256 d, const std::string msg) { } // dump bits low to high -inline void dumpbits(u64 v, const std::string msg) { +inline void dumpbits(u64 v, const std::string& msg) { for (u32 i = 0; i < 64; i++) { std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_"); } std::cout << " " << msg << "\n"; } -inline void dumpbits32(u32 v, const std::string msg) { +inline void dumpbits32(u32 v, const std::string& msg) { for (u32 i = 0; i < 32; i++) { std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_"); } @@ -201,14 +203,14 @@ inline void dumpbits32(u32 v, const std::string msg) { #endif // dump bits low to high -inline void dumpbits_always(u64 v, const std::string msg) { +inline void dumpbits_always(u64 v, const std::string& msg) { for (u32 i = 0; i < 64; i++) { std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_"); } std::cout << " " << msg << "\n"; } -inline void dumpbits32_always(u32 v, const std::string msg) { +inline void dumpbits32_always(u32 v, const std::string& msg) { for (u32 i = 0; i < 32; i++) { std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_"); } diff --git a/include/jsonparser/simdutf8check.h b/include/jsonparser/simdutf8check.h index 3bf001d6..dd48ef27 100644 --- a/include/jsonparser/simdutf8check.h +++ b/include/jsonparser/simdutf8check.h @@ -24,168 +24,7 @@ */ // all byte values must be no larger than 0xF4 -static inline void checkSmallerThan0xF4(__m128i current_bytes, - __m128i *has_error) { - // unsigned, saturates to 0 below max - *has_error = _mm_or_si128(*has_error, - _mm_subs_epu8(current_bytes, _mm_set1_epi8(0xF4))); -} -static inline __m128i continuationLengths(__m128i high_nibbles) { - return _mm_shuffle_epi8( - _mm_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) - 0, 0, 0, 0, // 10xx (continuation) - 2, 2, // 110x - 3, // 1110 - 4), // 1111, next should be 0 (not checked here) - high_nibbles); -} - -static inline __m128i carryContinuations(__m128i initial_lengths, - __m128i previous_carries) { - - __m128i right1 = - _mm_subs_epu8(_mm_alignr_epi8(initial_lengths, previous_carries, 16 - 1), - _mm_set1_epi8(1)); - __m128i sum = _mm_add_epi8(initial_lengths, right1); - - __m128i right2 = _mm_subs_epu8(_mm_alignr_epi8(sum, previous_carries, 16 - 2), - _mm_set1_epi8(2)); - return _mm_add_epi8(sum, right2); -} - -static inline void checkContinuations(__m128i initial_lengths, __m128i carries, - __m128i *has_error) { - - // overlap || underlap - // carry > length && length > 0 || !(carry > length) && !(length > 0) - // (carries > length) == (lengths > 0) - __m128i overunder = - _mm_cmpeq_epi8(_mm_cmpgt_epi8(carries, initial_lengths), - _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128())); - - *has_error = _mm_or_si128(*has_error, overunder); -} - -// when 0xED is found, next byte must be no larger than 0x9F -// when 0xF4 is found, next byte must be no larger than 0x8F -// next byte must be continuation, ie sign bit is set, so signed < is ok -static inline void checkFirstContinuationMax(__m128i current_bytes, - __m128i off1_current_bytes, - __m128i *has_error) { - __m128i maskED = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xED)); - __m128i maskF4 = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xF4)); - - __m128i badfollowED = - _mm_and_si128(_mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x9F)), maskED); - __m128i badfollowF4 = - _mm_and_si128(_mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x8F)), maskF4); - - *has_error = _mm_or_si128(*has_error, _mm_or_si128(badfollowED, badfollowF4)); -} - -// map off1_hibits => error condition -// hibits off1 cur -// C => < C2 && true -// E => < E1 && < A0 -// F => < F1 && < 90 -// else false && false -static inline void checkOverlong(__m128i current_bytes, - __m128i off1_current_bytes, __m128i hibits, - __m128i previous_hibits, __m128i *has_error) { - __m128i off1_hibits = _mm_alignr_epi8(hibits, previous_hibits, 16 - 1); - __m128i initial_mins = _mm_shuffle_epi8( - _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, // 10xx => false - 0xC2, -128, // 110x - 0xE1, // 1110 - 0xF1), - off1_hibits); - - __m128i initial_under = _mm_cmpgt_epi8(initial_mins, off1_current_bytes); - - __m128i second_mins = _mm_shuffle_epi8( - _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, // 10xx => false - 127, 127, // 110x => true - 0xA0, // 1110 - 0x90), - off1_hibits); - __m128i second_under = _mm_cmpgt_epi8(second_mins, current_bytes); - *has_error = - _mm_or_si128(*has_error, _mm_and_si128(initial_under, second_under)); -} - -struct processed_utf_bytes { - __m128i rawbytes; - __m128i high_nibbles; - __m128i carried_continuations; -}; - -static inline void count_nibbles(__m128i bytes, - struct processed_utf_bytes *answer) { - answer->rawbytes = bytes; - answer->high_nibbles = - _mm_and_si128(_mm_srli_epi16(bytes, 4), _mm_set1_epi8(0x0F)); -} - -// check whether the current bytes are valid UTF-8 -// at the end of the function, previous gets updated -static struct processed_utf_bytes -checkUTF8Bytes(__m128i current_bytes, struct processed_utf_bytes *previous, - __m128i *has_error) { - struct processed_utf_bytes pb; - count_nibbles(current_bytes, &pb); - - checkSmallerThan0xF4(current_bytes, has_error); - - __m128i initial_lengths = continuationLengths(pb.high_nibbles); - - pb.carried_continuations = - carryContinuations(initial_lengths, previous->carried_continuations); - - checkContinuations(initial_lengths, pb.carried_continuations, has_error); - - __m128i off1_current_bytes = - _mm_alignr_epi8(pb.rawbytes, previous->rawbytes, 16 - 1); - checkFirstContinuationMax(current_bytes, off1_current_bytes, has_error); - - checkOverlong(current_bytes, off1_current_bytes, pb.high_nibbles, - previous->high_nibbles, has_error); - return pb; -} - -static inline bool validate_utf8_fast(const char *src, size_t len) { - size_t i = 0; - __m128i has_error = _mm_setzero_si128(); - struct processed_utf_bytes previous = {.rawbytes = _mm_setzero_si128(), - .high_nibbles = _mm_setzero_si128(), - .carried_continuations = - _mm_setzero_si128()}; - if (len >= 16) { - for (; i <= len - 16; i += 16) { - __m128i current_bytes = _mm_loadu_si128((const __m128i *)(src + i)); - previous = checkUTF8Bytes(current_bytes, &previous, &has_error); - } - } - - // last part - if (i < len) { - char buffer[16]; - memset(buffer, 0, 16); - memcpy(buffer, src + i, len - i); - __m128i current_bytes = _mm_loadu_si128((const __m128i *)(buffer)); - previous = checkUTF8Bytes(current_bytes, &previous, &has_error); - } else { - has_error = - _mm_or_si128(_mm_cmpgt_epi8(previous.carried_continuations, - _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 1)), - has_error); - } - - return _mm_testz_si128(has_error, has_error); -} #ifdef __AVX2__ @@ -349,109 +188,7 @@ avxcheckUTF8Bytes(__m256i current_bytes, return pb; } -static inline bool validate_utf8_fast_avx(const char *src, size_t len) { - size_t i = 0; - __m256i has_error = _mm256_setzero_si256(); - struct avx_processed_utf_bytes previous = { - .rawbytes = _mm256_setzero_si256(), - .high_nibbles = _mm256_setzero_si256(), - .carried_continuations = _mm256_setzero_si256()}; - if (len >= 32) { - for (; i <= len - 32; i += 32) { - __m256i current_bytes = _mm256_loadu_si256((const __m256i *)(src + i)); - previous = avxcheckUTF8Bytes(current_bytes, &previous, &has_error); - } - } - - // last part - if (i < len) { - char buffer[32]; - memset(buffer, 0, 32); - memcpy(buffer, src + i, len - i); - __m256i current_bytes = _mm256_loadu_si256((const __m256i *)(buffer)); - previous = avxcheckUTF8Bytes(current_bytes, &previous, &has_error); - } else { - has_error = _mm256_or_si256( - _mm256_cmpgt_epi8(previous.carried_continuations, - _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 1)), - has_error); - } - - return _mm256_testz_si256(has_error, has_error); -} - - -// check whether the current bytes are valid UTF-8 -// at the end of the function, previous gets updated -static struct avx_processed_utf_bytes -avxcheckUTF8Bytes_asciipath(__m256i current_bytes, - struct avx_processed_utf_bytes *previous, - __m256i *has_error) { - if(_mm256_testz_si256(current_bytes,_mm256_set1_epi8(0x80))) { // fast ascii path - *has_error = _mm256_or_si256( - _mm256_cmpgt_epi8(previous->carried_continuations, - _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 1)),*has_error); - return *previous; - } - struct avx_processed_utf_bytes pb; - avx_count_nibbles(current_bytes, &pb); - - avxcheckSmallerThan0xF4(current_bytes, has_error); - - __m256i initial_lengths = avxcontinuationLengths(pb.high_nibbles); - - pb.carried_continuations = - avxcarryContinuations(initial_lengths, previous->carried_continuations); - - avxcheckContinuations(initial_lengths, pb.carried_continuations, has_error); - - __m256i off1_current_bytes = - push_last_byte_of_a_to_b(previous->rawbytes, pb.rawbytes); - avxcheckFirstContinuationMax(current_bytes, off1_current_bytes, has_error); - - avxcheckOverlong(current_bytes, off1_current_bytes, pb.high_nibbles, - previous->high_nibbles, has_error); - return pb; -} - -static inline bool validate_utf8_fast_avx_asciipath(const char *src, size_t len) { - size_t i = 0; - __m256i has_error = _mm256_setzero_si256(); - struct avx_processed_utf_bytes previous = { - .rawbytes = _mm256_setzero_si256(), - .high_nibbles = _mm256_setzero_si256(), - .carried_continuations = _mm256_setzero_si256()}; - if (len >= 32) { - for (; i <= len - 32; i += 32) { - __m256i current_bytes = _mm256_loadu_si256((const __m256i *)(src + i)); - previous = avxcheckUTF8Bytes_asciipath(current_bytes, &previous, &has_error); - } - } - - // last part - if (i < len) { - char buffer[32]; - memset(buffer, 0, 32); - memcpy(buffer, src + i, len - i); - __m256i current_bytes = _mm256_loadu_si256((const __m256i *)(buffer)); - previous = avxcheckUTF8Bytes(current_bytes, &previous, &has_error); - } else { - has_error = _mm256_or_si256( - _mm256_cmpgt_epi8(previous.carried_continuations, - _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 1)), - has_error); - } - - return _mm256_testz_si256(has_error, has_error); -} - - - +#else // __AVX2__ +#warning "We require AVX2 support!" #endif // __AVX2__ #endif diff --git a/src/jsonminifier.cpp b/src/jsonminifier.cpp index 8642f9dd..62eef28b 100644 --- a/src/jsonminifier.cpp +++ b/src/jsonminifier.cpp @@ -205,10 +205,10 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) { uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; - bool iter_ends_odd_backslash = __builtin_uaddll_overflow( - bs_bits, odd_starts, (unsigned long long *)&odd_carries); + //bool iter_ends_odd_backslash = + __builtin_uaddll_overflow( bs_bits, odd_starts, (unsigned long long *)&odd_carries); odd_carries |= prev_iter_ends_odd_backslash; - prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; + //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; // we never use it uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; diff --git a/src/stage2_flatten.cpp b/src/stage2_flatten.cpp index f22dd7b6..02b2268b 100644 --- a/src/stage2_flatten.cpp +++ b/src/stage2_flatten.cpp @@ -54,7 +54,7 @@ bool flatten_indexes(size_t len, ParsedJson &pj) { u32 *base_ptr = pj.structural_indexes; u32 base = 0; #ifdef BUILDHISTOGRAM - uint32_t counters[65]; + uint32_t counters[66]; uint32_t total = 0; for (int k = 0; k < 66; k++) counters[k] = 0;