diff --git a/.drone.yml b/.drone.yml index 33c3ac7c..df0bfdd5 100644 --- a/.drone.yml +++ b/.drone.yml @@ -1,4 +1,46 @@ kind: pipeline +name: i386-gcc # we do not support 32-bit systems, but we run tests +platform: { os: linux, arch: amd64 } +steps: +- name: Build and Test + image: i386/ubuntu + environment: + CC: gcc + CXX: g++ + BUILD_FLAGS: -- -j + CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON + CTEST_FLAGS: -j4 --output-on-failure -E checkperf + commands: + - apt-get update -qq + - apt-get install -y g++ cmake gcc + - mkdir build + - cd build + - cmake $CMAKE_FLAGS .. + - cmake --build . $BUILD_FLAGS + - ctest $CTEST_FLAGS +--- +kind: pipeline +name: i386-clang # we do not support 32-bit systems, but we run tests +platform: { os: linux, arch: amd64 } +steps: +- name: Build and Test + image: i386/ubuntu + environment: + CC: clang-6.0 + CXX: clang++-6.0 + BUILD_FLAGS: -- -j + CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON + CTEST_FLAGS: -j4 --output-on-failure -E checkperf + commands: + - apt-get update -qq + - apt-get install -y clang++-6.0 cmake + - mkdir build + - cd build + - cmake $CMAKE_FLAGS .. + - cmake --build . $BUILD_FLAGS + - ctest $CTEST_FLAGS +--- +kind: pipeline name: gcc9 platform: { os: linux, arch: amd64 } steps: diff --git a/include/simdjson/inline/tape_ref.h b/include/simdjson/inline/tape_ref.h index bdd0d57e..a4b2feb4 100644 --- a/include/simdjson/inline/tape_ref.h +++ b/include/simdjson/inline/tape_ref.h @@ -81,14 +81,14 @@ really_inline T tape_ref::next_tape_value() const noexcept { } really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); + size_t string_buf_index = size_t(tape_value()); uint32_t len; memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return len; } really_inline const char * internal::tape_ref::get_c_str() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); + size_t string_buf_index = size_t(tape_value()); return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } diff --git a/include/simdjson/portability.h b/include/simdjson/portability.h index 8ef0cc28..c683df10 100644 --- a/include/simdjson/portability.h +++ b/include/simdjson/portability.h @@ -4,6 +4,7 @@ #include #include #include +#include #ifdef _MSC_VER @@ -37,24 +38,28 @@ #if defined(__x86_64__) || defined(_M_AMD64) #define SIMDJSON_IS_X86_64 1 -#endif -#if defined(__aarch64__) || defined(_M_ARM64) +#elif defined(__aarch64__) || defined(_M_ARM64) #define SIMDJSON_IS_ARM64 1 +#else +#define SIMDJSON_IS_32BITS 1 + +// We do not support 32-bit platforms, but it can be +// handy to identify them. +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 #endif -#if (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -#pragma message("The simdjson library is designed\ - for 64-bit processors and it seems that you are not \ +#endif // defined(__x86_64__) || defined(_M_AMD64) + +#ifdef SIMDJSON_IS_32BITS +#pragma message("The simdjson library is designed \ +for 64-bit processors and it seems that you are not \ compiling for a known 64-bit platform. All fast kernels \ will be disabled and performance may be poor. Please \ use a 64-bit target such as x64 or 64-bit ARM.") -#else -#error "The simdjson library is designed\ - for 64-bit processors. It seems that you are not \ -compiling for a known 64-bit platform." -#endif -#endif // (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) +#endif // SIMDJSON_IS_32BITS // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ @@ -74,6 +79,15 @@ compiling for a known 64-bit platform." #define SIMDJSON_IMPLEMENTATION_WESTMERE 0 #endif // SIMDJSON_IS_ARM64 +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. #if SIMDJSON_IS_X86_64 #ifndef SIMDJSON_IMPLEMENTATION_HASWELL #define SIMDJSON_IMPLEMENTATION_HASWELL 1 @@ -84,7 +98,7 @@ compiling for a known 64-bit platform." #define SIMDJSON_IMPLEMENTATION_ARM64 0 #endif // SIMDJSON_IS_X86_64 -// we are going to use runtime dispatch +// We are going to use runtime dispatch. #ifdef SIMDJSON_IS_X86_64 #ifdef __clang__ // clang does not have GCC push pop diff --git a/singleheader/amalgamate_demo.cpp b/singleheader/amalgamate_demo.cpp index 54cfc77a..464db2e8 100644 --- a/singleheader/amalgamate_demo.cpp +++ b/singleheader/amalgamate_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */ +/* auto-generated on Fri Jun 26 15:35:58 UTC 2020. Do not edit! */ #include #include "simdjson.h" @@ -43,3 +43,4 @@ int main(int argc, char *argv[]) { } return EXIT_SUCCESS; } + diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index 1506f247..47f31cc9 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */ +/* auto-generated on Fri Jun 26 15:35:58 UTC 2020. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" @@ -934,9 +934,8 @@ struct value128 { uint64_t high; }; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) && \ - !defined(_M_X64) && !defined(_M_ARM64)// _umul128 for x86, arm -// this is a slow emulation routine for 32-bit Windows +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit // static inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; @@ -955,7 +954,7 @@ static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -963,7 +962,7 @@ really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) __uint128_t r = ((__uint128_t)value1) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -7826,7 +7825,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2, @@ -11114,7 +11113,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2, diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index f6f12ed1..4b38763d 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */ +/* auto-generated on Fri Jun 26 15:35:58 UTC 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -91,13 +91,14 @@ #if defined(__x86_64__) || defined(_M_AMD64) #define SIMDJSON_IS_X86_64 1 -#endif -#if defined(__aarch64__) || defined(_M_ARM64) +#elif defined(__aarch64__) || defined(_M_ARM64) #define SIMDJSON_IS_ARM64 1 +#else +#define SIMDJSON_IS_32BITS 1 #endif -#if (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#ifdef SIMDJSON_IS_32BITS +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(__GNUC__) #pragma message("The simdjson library is designed\ for 64-bit processors and it seems that you are not \ compiling for a known 64-bit platform. All fast kernels \ @@ -108,7 +109,7 @@ use a 64-bit target such as x64 or 64-bit ARM.") for 64-bit processors. It seems that you are not \ compiling for a known 64-bit platform." #endif -#endif // (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64)) +#endif // SIMDJSON_IS_32BITS // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ @@ -2632,7 +2633,6 @@ inline error_code dom_parser_implementation::allocate(size_t capacity, size_t ma #endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H /* end file include/simdjson/internal/dom_parser_implementation.h */ -#include #include #include #include @@ -4008,21 +4008,42 @@ public: */ inline simdjson_result get_object() const noexcept; /** - * Cast this element to a string. + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. * - * Equivalent to get(). + * The get_c_str() function is equivalent to get(). + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. * - * @returns An pointer to a null-terminated string. This string is stored in the parser and will + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will * be invalidated the next time it parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ inline simdjson_result get_c_str() const noexcept; /** - * Cast this element to a string. + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * - * @returns A string. The string is stored in the parser and will be invalidated the next time it + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it * parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ @@ -4199,7 +4220,9 @@ public: inline operator bool() const noexcept(false); /** - * Read this element as a null-terminated string. + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. @@ -4210,7 +4233,7 @@ public: inline explicit operator const char*() const noexcept(false); /** - * Read this element as a null-terminated string. + * Read this element as a null-terminated UTF-8 string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. @@ -4410,6 +4433,7 @@ public: really_inline simdjson_result get_array() const noexcept; really_inline simdjson_result get_object() const noexcept; really_inline simdjson_result get_c_str() const noexcept; + really_inline simdjson_result get_string_length() const noexcept; really_inline simdjson_result get_string() const noexcept; really_inline simdjson_result get_int64() const noexcept; really_inline simdjson_result get_uint64() const noexcept; @@ -5820,6 +5844,10 @@ really_inline simdjson_result simdjson_result::get_c if (error()) { return error(); } return first.get_c_str(); } +really_inline simdjson_result simdjson_result::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); +} really_inline simdjson_result simdjson_result::get_string() const noexcept { if (error()) { return error(); } return first.get_string(); @@ -5960,6 +5988,15 @@ inline simdjson_result element::get_c_str() const noexcept { return INCORRECT_TYPE; } } +inline simdjson_result element::get_string_length() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_string_length(); + } + default: + return INCORRECT_TYPE; + } +} inline simdjson_result element::get_string() const noexcept { switch (tape.tape_ref_type()) { case internal::tape_type::STRING: @@ -7610,14 +7647,14 @@ really_inline T tape_ref::next_tape_value() const noexcept { } really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); + size_t string_buf_index = size_t(tape_value()); uint32_t len; - memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + memcpy(&len, &doc->string_buf[size_t(string_buf_index)], sizeof(len)); return len; } really_inline const char * internal::tape_ref::get_c_str() const noexcept { - uint64_t string_buf_index = size_t(tape_value()); + size_t string_buf_index = size_t(tape_value()); return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h index ee0b092f..cd59fd45 100644 --- a/src/generic/stage2/numberparsing.h +++ b/src/generic/stage2/numberparsing.h @@ -13,7 +13,15 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. double d = double(i); diff --git a/src/haswell/bitmanipulation.h b/src/haswell/bitmanipulation.h index ed7ee1d5..f6b8fc72 100644 --- a/src/haswell/bitmanipulation.h +++ b/src/haswell/bitmanipulation.h @@ -58,7 +58,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2, diff --git a/src/jsoncharutils.h b/src/jsoncharutils.h index 2c539e60..4380db8a 100644 --- a/src/jsoncharutils.h +++ b/src/jsoncharutils.h @@ -317,9 +317,8 @@ struct value128 { uint64_t high; }; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) && \ - !defined(_M_X64) && !defined(_M_ARM64)// _umul128 for x86, arm -// this is a slow emulation routine for 32-bit Windows +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit // static inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; @@ -338,7 +337,7 @@ static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -346,7 +345,7 @@ really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) __uint128_t r = ((__uint128_t)value1) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); diff --git a/src/simdprune_tables.h b/src/simdprune_tables.h index 21be30c6..8f27fbda 100644 --- a/src/simdprune_tables.h +++ b/src/simdprune_tables.h @@ -1,5 +1,9 @@ #ifndef SIMDJSON_SIMDPRUNE_TABLES_H #define SIMDJSON_SIMDPRUNE_TABLES_H + + +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE + #include namespace simdjson { // table modified and copied from @@ -127,4 +131,6 @@ static const uint64_t thintable_epi8[256] = { } // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE #endif // SIMDJSON_SIMDPRUNE_TABLES_H diff --git a/src/westmere/bitmanipulation.h b/src/westmere/bitmanipulation.h index d55d4455..29a8623a 100644 --- a/src/westmere/bitmanipulation.h +++ b/src/westmere/bitmanipulation.h @@ -66,7 +66,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, #endif } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #pragma intrinsic(_umul128) #endif really_inline bool mul_overflow(uint64_t value1, uint64_t value2,