From 41f33ecbb96b46b6219edcf504dfd2882a51ff4c Mon Sep 17 00:00:00 2001 From: Brendan Knapp Date: Thu, 25 Jun 2020 17:07:17 -0700 Subject: [PATCH 1/8] Permit 32-bit GCC compilation --- include/simdjson/portability.h | 11 +++--- singleheader/amalgamate_demo.cpp | 3 +- singleheader/simdjson.cpp | 15 ++++---- singleheader/simdjson.h | 65 +++++++++++++++++++++++++------- src/haswell/bitmanipulation.h | 2 +- src/jsoncharutils.h | 9 ++--- src/westmere/bitmanipulation.h | 2 +- 7 files changed, 72 insertions(+), 35 deletions(-) diff --git a/include/simdjson/portability.h b/include/simdjson/portability.h index a936fc0d..0210beb2 100644 --- a/include/simdjson/portability.h +++ b/include/simdjson/portability.h @@ -37,13 +37,14 @@ #if defined(__x86_64__) || defined(_M_AMD64) #define SIMDJSON_IS_X86_64 1 -#endif -#if defined(__aarch64__) || defined(_M_ARM64) +#elif defined(__aarch64__) || defined(_M_ARM64) #define SIMDJSON_IS_ARM64 1 +#else +#define SIMDJSON_IS_32BITS 1 #endif -#if (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#ifdef SIMDJSON_IS_32BITS +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(__GNUC__) #pragma message("The simdjson library is designed\ for 64-bit processors and it seems that you are not \ compiling for a known 64-bit platform. All fast kernels \ @@ -54,7 +55,7 @@ use a 64-bit target such as x64 or 64-bit ARM.") for 64-bit processors. It seems that you are not \ compiling for a known 64-bit platform." #endif -#endif // (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) +#endif // SIMDJSON_IS_32BITS // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ diff --git a/singleheader/amalgamate_demo.cpp b/singleheader/amalgamate_demo.cpp index 54cfc77a..0207b391 100644 --- a/singleheader/amalgamate_demo.cpp +++ b/singleheader/amalgamate_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */ +/* auto-generated on Thu Jun 25 16:43:19 PDT 2020. Do not edit! */ #include #include "simdjson.h" @@ -43,3 +43,4 @@ int main(int argc, char *argv[]) { } return EXIT_SUCCESS; } + diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index 1506f247..d0c23880 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */ +/* auto-generated on Thu Jun 25 16:43:19 PDT 2020. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" @@ -934,9 +934,8 @@ struct value128 { uint64_t high; }; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) && \ - !defined(_M_X64) && !defined(_M_ARM64)// _umul128 for x86, arm -// this is a slow emulation routine for 32-bit Windows +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit // static inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; @@ -955,7 +954,7 @@ static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -963,7 +962,7 @@ really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) __uint128_t r = ((__uint128_t)value1) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -7826,7 +7825,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2, @@ -11114,7 +11113,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2, diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index f6f12ed1..e0a5a3d9 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */ +/* auto-generated on Thu Jun 25 16:43:19 PDT 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -91,13 +91,14 @@ #if defined(__x86_64__) || defined(_M_AMD64) #define SIMDJSON_IS_X86_64 1 -#endif -#if defined(__aarch64__) || defined(_M_ARM64) +#elif defined(__aarch64__) || defined(_M_ARM64) #define SIMDJSON_IS_ARM64 1 +#else +#define SIMDJSON_IS_32BITS 1 #endif -#if (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#ifdef SIMDJSON_IS_32BITS +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(__GNUC__) #pragma message("The simdjson library is designed\ for 64-bit processors and it seems that you are not \ compiling for a known 64-bit platform. All fast kernels \ @@ -108,7 +109,7 @@ use a 64-bit target such as x64 or 64-bit ARM.") for 64-bit processors. It seems that you are not \ compiling for a known 64-bit platform." #endif -#endif // (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) +#endif // SIMDJSON_IS_32BITS // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ @@ -2632,7 +2633,6 @@ inline error_code dom_parser_implementation::allocate(size_t capacity, size_t ma #endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H /* end file include/simdjson/internal/dom_parser_implementation.h */ -#include #include #include #include @@ -4008,21 +4008,42 @@ public: */ inline simdjson_result get_object() const noexcept; /** - * Cast this element to a string. + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. * - * Equivalent to get(). + * The get_c_str() function is equivalent to get(). + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. * - * @returns An pointer to a null-terminated string. This string is stored in the parser and will + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will * be invalidated the next time it parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ inline simdjson_result get_c_str() const noexcept; /** - * Cast this element to a string. + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * - * @returns A string. The string is stored in the parser and will be invalidated the next time it + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it * parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ @@ -4199,7 +4220,9 @@ public: inline operator bool() const noexcept(false); /** - * Read this element as a null-terminated string. + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. @@ -4210,7 +4233,7 @@ public: inline explicit operator const char*() const noexcept(false); /** - * Read this element as a null-terminated string. + * Read this element as a null-terminated UTF-8 string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. @@ -4410,6 +4433,7 @@ public: really_inline simdjson_result get_array() const noexcept; really_inline simdjson_result get_object() const noexcept; really_inline simdjson_result get_c_str() const noexcept; + really_inline simdjson_result get_string_length() const noexcept; really_inline simdjson_result get_string() const noexcept; really_inline simdjson_result get_int64() const noexcept; really_inline simdjson_result get_uint64() const noexcept; @@ -5820,6 +5844,10 @@ really_inline simdjson_result simdjson_result::get_c if (error()) { return error(); } return first.get_c_str(); } +really_inline simdjson_result simdjson_result::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); +} really_inline simdjson_result simdjson_result::get_string() const noexcept { if (error()) { return error(); } return first.get_string(); @@ -5960,6 +5988,15 @@ inline simdjson_result element::get_c_str() const noexcept { return INCORRECT_TYPE; } } +inline simdjson_result element::get_string_length() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_string_length(); + } + default: + return INCORRECT_TYPE; + } +} inline simdjson_result element::get_string() const noexcept { switch (tape.tape_ref_type()) { case internal::tape_type::STRING: diff --git a/src/haswell/bitmanipulation.h b/src/haswell/bitmanipulation.h index ed7ee1d5..f6b8fc72 100644 --- a/src/haswell/bitmanipulation.h +++ b/src/haswell/bitmanipulation.h @@ -58,7 +58,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2, diff --git a/src/jsoncharutils.h b/src/jsoncharutils.h index 2c539e60..4380db8a 100644 --- a/src/jsoncharutils.h +++ b/src/jsoncharutils.h @@ -317,9 +317,8 @@ struct value128 { uint64_t high; }; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) && \ - !defined(_M_X64) && !defined(_M_ARM64)// _umul128 for x86, arm -// this is a slow emulation routine for 32-bit Windows +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit // static inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; @@ -338,7 +337,7 @@ static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -346,7 +345,7 @@ really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) __uint128_t r = ((__uint128_t)value1) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); diff --git a/src/westmere/bitmanipulation.h b/src/westmere/bitmanipulation.h index d55d4455..29a8623a 100644 --- a/src/westmere/bitmanipulation.h +++ b/src/westmere/bitmanipulation.h @@ -66,7 +66,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2, From 2956bce047dfc04328751dbda29dc492e7457e03 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 25 Jun 2020 21:12:26 -0400 Subject: [PATCH 2/8] Minor fixes to avoid 32-bit warnings. --- include/simdjson/inline/tape_ref.h | 4 ++-- singleheader/amalgamate_demo.cpp | 2 +- singleheader/simdjson.cpp | 2 +- singleheader/simdjson.h | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/simdjson/inline/tape_ref.h b/include/simdjson/inline/tape_ref.h index bdd0d57e..1d7e5efd 100644 --- a/include/simdjson/inline/tape_ref.h +++ b/include/simdjson/inline/tape_ref.h @@ -83,13 +83,13 @@ really_inline T tape_ref::next_tape_value() const noexcept { really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { uint64_t string_buf_index = size_t(tape_value()); uint32_t len; - memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + memcpy(&len, &doc->string_buf[size_t(string_buf_index)], sizeof(len)); return len; } really_inline const char * internal::tape_ref::get_c_str() const noexcept { uint64_t string_buf_index = size_t(tape_value()); - return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); + return reinterpret_cast(&doc->string_buf[size_t(string_buf_index) + sizeof(uint32_t)]); } inline std::string_view internal::tape_ref::get_string_view() const noexcept { diff --git a/singleheader/amalgamate_demo.cpp b/singleheader/amalgamate_demo.cpp index 0207b391..04b81412 100644 --- a/singleheader/amalgamate_demo.cpp +++ b/singleheader/amalgamate_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Thu Jun 25 16:43:19 PDT 2020. Do not edit! */ +/* auto-generated on Fri Jun 26 01:04:15 UTC 2020. Do not edit! */ #include #include "simdjson.h" diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index d0c23880..f9199c2d 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Thu Jun 25 16:43:19 PDT 2020. Do not edit! */ +/* auto-generated on Fri Jun 26 01:04:15 UTC 2020. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index e0a5a3d9..930fdee8 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Thu Jun 25 16:43:19 PDT 2020. Do not edit! */ +/* auto-generated on Fri Jun 26 01:04:15 UTC 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -7649,13 +7649,13 @@ really_inline T tape_ref::next_tape_value() const noexcept { really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { uint64_t string_buf_index = size_t(tape_value()); uint32_t len; - memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + memcpy(&len, &doc->string_buf[size_t(string_buf_index)], sizeof(len)); return len; } really_inline const char * internal::tape_ref::get_c_str() const noexcept { uint64_t string_buf_index = size_t(tape_value()); - return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); + return reinterpret_cast(&doc->string_buf[size_t(string_buf_index) + sizeof(uint32_t)]); } inline std::string_view internal::tape_ref::get_string_view() const noexcept { From b6997a56df5406333e7d363f6ce07764433e2dab Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Jun 2020 12:15:16 -0400 Subject: [PATCH 3/8] Patching things up and adding tests. --- .drone.yml | 42 ++++++++++++++++++++++++++++++ include/simdjson/inline/tape_ref.h | 6 ++--- singleheader/amalgamate_demo.cpp | 2 +- singleheader/simdjson.cpp | 2 +- singleheader/simdjson.h | 8 +++--- src/simdprune_tables.h | 6 +++++ 6 files changed, 57 insertions(+), 9 deletions(-) diff --git a/.drone.yml b/.drone.yml index 33c3ac7c..2166727a 100644 --- a/.drone.yml +++ b/.drone.yml @@ -1,4 +1,46 @@ kind: pipeline +name: i386-gcc # we do not support 32-bit systems, but we run tests +platform: { os: linux, arch: amd64 } +steps: +- name: Build and Test + image: i386/ubuntu + environment: + CC: gcc + CXX: g++ + BUILD_FLAGS: -- -j + CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON + CTEST_FLAGS: -j4 --output-on-failure -E checkperf -E numberparsingcheck # A ULP marging 1 is detected on 32-bit GCC + commands: + - apt-get update -qq + - apt-get install -y g++ cmake gcc + - mkdir build + - cd build + - cmake $CMAKE_FLAGS .. + - cmake --build . $BUILD_FLAGS + - ctest $CTEST_FLAGS +--- +kind: pipeline +name: i386-clang # we do not support 32-bit systems, but we run tests +platform: { os: linux, arch: amd64 } +steps: +- name: Build and Test + image: i386/ubuntu + environment: + CC: clang-6.0 + CXX: clang++-6.0 + BUILD_FLAGS: -- -j + CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON + CTEST_FLAGS: -j4 --output-on-failure -E checkperf + commands: + - apt-get update -qq + - apt-get install -y clang++-6.0 cmake + - mkdir build + - cd build + - cmake $CMAKE_FLAGS .. + - cmake --build . $BUILD_FLAGS + - ctest $CTEST_FLAGS +--- +kind: pipeline name: gcc9 platform: { os: linux, arch: amd64 } steps: diff --git a/include/simdjson/inline/tape_ref.h b/include/simdjson/inline/tape_ref.h index 1d7e5efd..deccbaae 100644 --- a/include/simdjson/inline/tape_ref.h +++ b/include/simdjson/inline/tape_ref.h @@ -81,15 +81,15 @@ really_inline T tape_ref::next_tape_value() const noexcept { } really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); + size_t string_buf_index = size_t(tape_value()); uint32_t len; memcpy(&len, &doc->string_buf[size_t(string_buf_index)], sizeof(len)); return len; } really_inline const char * internal::tape_ref::get_c_str() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); - return reinterpret_cast(&doc->string_buf[size_t(string_buf_index) + sizeof(uint32_t)]); + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } inline std::string_view internal::tape_ref::get_string_view() const noexcept { diff --git a/singleheader/amalgamate_demo.cpp b/singleheader/amalgamate_demo.cpp index 04b81412..464db2e8 100644 --- a/singleheader/amalgamate_demo.cpp +++ b/singleheader/amalgamate_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Fri Jun 26 01:04:15 UTC 2020. Do not edit! */ +/* auto-generated on Fri Jun 26 15:35:58 UTC 2020. Do not edit! */ #include #include "simdjson.h" diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index f9199c2d..47f31cc9 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Fri Jun 26 01:04:15 UTC 2020. Do not edit! */ +/* auto-generated on Fri Jun 26 15:35:58 UTC 2020. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index 930fdee8..4b38763d 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Fri Jun 26 01:04:15 UTC 2020. Do not edit! */ +/* auto-generated on Fri Jun 26 15:35:58 UTC 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -7647,15 +7647,15 @@ really_inline T tape_ref::next_tape_value() const noexcept { } really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); + size_t string_buf_index = size_t(tape_value()); uint32_t len; memcpy(&len, &doc->string_buf[size_t(string_buf_index)], sizeof(len)); return len; } really_inline const char * internal::tape_ref::get_c_str() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); - return reinterpret_cast(&doc->string_buf[size_t(string_buf_index) + sizeof(uint32_t)]); + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } inline std::string_view internal::tape_ref::get_string_view() const noexcept { diff --git a/src/simdprune_tables.h b/src/simdprune_tables.h index 21be30c6..8f27fbda 100644 --- a/src/simdprune_tables.h +++ b/src/simdprune_tables.h @@ -1,5 +1,9 @@ #ifndef SIMDJSON_SIMDPRUNE_TABLES_H #define SIMDJSON_SIMDPRUNE_TABLES_H + + +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE + #include namespace simdjson { // table modified and copied from @@ -127,4 +131,6 @@ static const uint64_t thintable_epi8[256] = { } // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE #endif // SIMDJSON_SIMDPRUNE_TABLES_H From 88da62ba0930dbec09b0a41c8bcd02e88bf9f551 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Jun 2020 13:02:12 -0400 Subject: [PATCH 4/8] Better documentation in the code. --- include/simdjson/portability.h | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/include/simdjson/portability.h b/include/simdjson/portability.h index 0210beb2..9b346376 100644 --- a/include/simdjson/portability.h +++ b/include/simdjson/portability.h @@ -41,20 +41,23 @@ #define SIMDJSON_IS_ARM64 1 #else #define SIMDJSON_IS_32BITS 1 + +// We do not support 32-bit platforms, but it can be +// handy to identify them. +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 #endif +#endif // defined(__x86_64__) || defined(_M_AMD64) + #ifdef SIMDJSON_IS_32BITS -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(__GNUC__) -#pragma message("The simdjson library is designed\ - for 64-bit processors and it seems that you are not \ +#pragma message("The simdjson library is designed \ +for 64-bit processors and it seems that you are not \ compiling for a known 64-bit platform. All fast kernels \ will be disabled and performance may be poor. Please \ use a 64-bit target such as x64 or 64-bit ARM.") -#else -#error "The simdjson library is designed\ - for 64-bit processors. It seems that you are not \ -compiling for a known 64-bit platform." -#endif #endif // SIMDJSON_IS_32BITS // this is almost standard? @@ -75,6 +78,15 @@ compiling for a known 64-bit platform." #define SIMDJSON_IMPLEMENTATION_WESTMERE 0 #endif // SIMDJSON_IS_ARM64 +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. #if SIMDJSON_IS_X86_64 #ifndef SIMDJSON_IMPLEMENTATION_HASWELL #define SIMDJSON_IMPLEMENTATION_HASWELL 1 @@ -85,7 +97,7 @@ compiling for a known 64-bit platform." #define SIMDJSON_IMPLEMENTATION_ARM64 0 #endif // SIMDJSON_IS_X86_64 -// we are going to use runtime dispatch +// We are going to use runtime dispatch. #ifdef SIMDJSON_IS_X86_64 #ifdef __clang__ // clang does not have GCC push pop From deaa74d378251d71f8480146be3899e1e608f734 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Jun 2020 18:57:34 -0400 Subject: [PATCH 5/8] Re-enabling tests generally. --- .drone.yml | 2 +- src/generic/stage2/numberparsing.h | 4 +++- tests/numberparsingcheck.cpp | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.drone.yml b/.drone.yml index 2166727a..df0bfdd5 100644 --- a/.drone.yml +++ b/.drone.yml @@ -9,7 +9,7 @@ steps: CXX: g++ BUILD_FLAGS: -- -j CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON - CTEST_FLAGS: -j4 --output-on-failure -E checkperf -E numberparsingcheck # A ULP marging 1 is detected on 32-bit GCC + CTEST_FLAGS: -j4 --output-on-failure -E checkperf commands: - apt-get update -qq - apt-get install -y g++ cmake gcc diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h index ee0b092f..07aaa908 100644 --- a/src/generic/stage2/numberparsing.h +++ b/src/generic/stage2/numberparsing.h @@ -1,6 +1,8 @@ namespace stage2 { namespace numberparsing { - +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) +#warning "Your floating-point rounding default is inadequate and may lead to inexact parsing." +#endif // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is diff --git a/tests/numberparsingcheck.cpp b/tests/numberparsingcheck.cpp index dbcc3047..bcd88f32 100644 --- a/tests/numberparsingcheck.cpp +++ b/tests/numberparsingcheck.cpp @@ -208,6 +208,12 @@ bool validate(const char *dirname) { } int main(int argc, char *argv[]) { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + std::cout << "Your floating-point rounding default is inadequate and may lead to inexact parsing." << std::endl; + std::cout << "We are not going to check number parsing precision." << std::endl; + std::cout << "We are returning with a success condition nevertheless (to avoid noisy failing tests)." << std::endl; + return EXIT_SUCCESS; +#endif if (argc != 2) { std::cerr << "Usage: " << argv[0] << " " << std::endl; From bb5ce007e6502f0a37b1cd9d948c32def8be6926 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Jun 2020 19:03:28 -0400 Subject: [PATCH 6/8] Something better. --- src/generic/stage2/numberparsing.h | 9 ++++++--- tests/numberparsingcheck.cpp | 6 ------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h index 07aaa908..9a9e4bef 100644 --- a/src/generic/stage2/numberparsing.h +++ b/src/generic/stage2/numberparsing.h @@ -1,8 +1,6 @@ namespace stage2 { namespace numberparsing { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) -#warning "Your floating-point rounding default is inadequate and may lead to inexact parsing." -#endif + // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is @@ -15,7 +13,12 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. double d = double(i); diff --git a/tests/numberparsingcheck.cpp b/tests/numberparsingcheck.cpp index bcd88f32..dbcc3047 100644 --- a/tests/numberparsingcheck.cpp +++ b/tests/numberparsingcheck.cpp @@ -208,12 +208,6 @@ bool validate(const char *dirname) { } int main(int argc, char *argv[]) { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - std::cout << "Your floating-point rounding default is inadequate and may lead to inexact parsing." << std::endl; - std::cout << "We are not going to check number parsing precision." << std::endl; - std::cout << "We are returning with a success condition nevertheless (to avoid noisy failing tests)." << std::endl; - return EXIT_SUCCESS; -#endif if (argc != 2) { std::cerr << "Usage: " << argv[0] << " " << std::endl; From 94e910586d5bce6f608e7747cb30d5e87181d9ed Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Jun 2020 19:06:39 -0400 Subject: [PATCH 7/8] Removing a cast. --- include/simdjson/inline/tape_ref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/simdjson/inline/tape_ref.h b/include/simdjson/inline/tape_ref.h index deccbaae..a4b2feb4 100644 --- a/include/simdjson/inline/tape_ref.h +++ b/include/simdjson/inline/tape_ref.h @@ -83,7 +83,7 @@ really_inline T tape_ref::next_tape_value() const noexcept { really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { size_t string_buf_index = size_t(tape_value()); uint32_t len; - memcpy(&len, &doc->string_buf[size_t(string_buf_index)], sizeof(len)); + memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return len; } From 444ec4ad27208471e29ec7b9924483a8a9911417 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Jun 2020 19:29:28 -0400 Subject: [PATCH 8/8] Stupid me --- include/simdjson/portability.h | 1 + src/generic/stage2/numberparsing.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/include/simdjson/portability.h b/include/simdjson/portability.h index 9b346376..41dc3007 100644 --- a/include/simdjson/portability.h +++ b/include/simdjson/portability.h @@ -4,6 +4,7 @@ #include #include #include +#include #ifdef _MSC_VER diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h index 9a9e4bef..cd59fd45 100644 --- a/src/generic/stage2/numberparsing.h +++ b/src/generic/stage2/numberparsing.h @@ -13,6 +13,9 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) // We cannot be certain that x/y is rounded to nearest. if (0 <= power && power <= 22 && i <= 9007199254740991) {