Improving support for VS

This commit is contained in:
Daniel Lemire 2019-07-04 17:36:26 -04:00
parent 2b2d93b05f
commit 19cdc09928
3 changed files with 20 additions and 21 deletions

View File

@ -6,17 +6,16 @@
#include "simdjson/parsedjson.h" #include "simdjson/parsedjson.h"
#include "simdjson/portability.h" #include "simdjson/portability.h"
#if defined (__AVX2__) || (__SSE4_2__) #if defined (__AVX2__) || defined (__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
#ifndef SIMDJSON_SKIPUTF8VALIDATION #ifndef SIMDJSON_SKIPUTF8VALIDATION
#define SIMDJSON_UTF8VALIDATE #define SIMDJSON_UTF8VALIDATE
#endif #endif
#else #else
// currently we don't UTF8 validate for ARM // currently we don't UTF8 validate for ARM
// also we assume that if you're not __AVX2__ // also we assume that if you're not __AVX2__
// you're ARM, which is a bit dumb. TODO: Fix... // you're ARM, which is a bit dumb. TODO: Fix...
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
#include <arm_neon.h> #include <arm_neon.h>
#else #else
#warning It appears that neither ARM NEON nor AVX2 are detected. #warning It appears that neither ARM NEON nor AVX2 are detected.
@ -45,7 +44,7 @@ struct simd_input<instruction_set::avx2>
}; };
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> template<>
struct simd_input<instruction_set::sse4_2> struct simd_input<instruction_set::sse4_2>
{ {
@ -56,7 +55,7 @@ struct simd_input<instruction_set::sse4_2>
}; };
#endif #endif
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
template<> struct simd_input<instruction_set::neon> template<> struct simd_input<instruction_set::neon>
{ {
#ifndef TRANSPOSE #ifndef TRANSPOSE
@ -70,7 +69,7 @@ template<> struct simd_input<instruction_set::neon>
}; };
#endif #endif
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
really_inline really_inline
uint16_t neonmovemask(uint8x16_t input) { uint16_t neonmovemask(uint8x16_t input) {
const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
@ -165,7 +164,7 @@ uint64_t compute_quote_mask<instruction_set::avx2>(uint64_t quote_bits) {
} }
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
uint64_t compute_quote_mask<instruction_set::sse4_2>(uint64_t quote_bits) { uint64_t compute_quote_mask<instruction_set::sse4_2>(uint64_t quote_bits) {
// CLMUL is supported on some SSE42 hardware such as Sandy Bridge, // CLMUL is supported on some SSE42 hardware such as Sandy Bridge,
@ -179,7 +178,7 @@ uint64_t compute_quote_mask<instruction_set::sse4_2>(uint64_t quote_bits) {
} }
#endif #endif
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
template<> really_inline template<> really_inline
uint64_t compute_quote_mask<instruction_set::neon>(uint64_t quote_bits) { uint64_t compute_quote_mask<instruction_set::neon>(uint64_t quote_bits) {
#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension #ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
@ -209,7 +208,7 @@ struct check_utf8_helper<instruction_set::avx2>
}; };
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> template<>
struct check_utf8_helper<instruction_set::sse4_2> struct check_utf8_helper<instruction_set::sse4_2>
{ {
@ -246,7 +245,7 @@ void check_utf8<instruction_set::avx2>(simd_input<instruction_set::avx2> in,
} }
#endif //__AVX2__ #endif //__AVX2__
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
void check_utf8<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in, void check_utf8<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in,
check_utf8_helper<instruction_set::sse4_2>& helper) { check_utf8_helper<instruction_set::sse4_2>& helper) {
@ -290,7 +289,7 @@ errorValues check_utf8_errors<instruction_set::avx2>(check_utf8_helper<instructi
} }
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
errorValues check_utf8_errors<instruction_set::sse4_2>(check_utf8_helper<instruction_set::sse4_2>& helper) { errorValues check_utf8_errors<instruction_set::sse4_2>(check_utf8_helper<instruction_set::sse4_2>& helper) {
return _mm_testz_si128(helper.has_error, helper.has_error) == 0 ? simdjson::UTF8_ERROR : simdjson::SUCCESS; return _mm_testz_si128(helper.has_error, helper.has_error) == 0 ? simdjson::UTF8_ERROR : simdjson::SUCCESS;
@ -311,7 +310,7 @@ simd_input<instruction_set::avx2> fill_input<instruction_set::avx2>(const uint8_
} }
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
simd_input<instruction_set::sse4_2> fill_input<instruction_set::sse4_2>(const uint8_t * ptr) { simd_input<instruction_set::sse4_2> fill_input<instruction_set::sse4_2>(const uint8_t * ptr) {
struct simd_input<instruction_set::sse4_2> in; struct simd_input<instruction_set::sse4_2> in;
@ -323,7 +322,7 @@ simd_input<instruction_set::sse4_2> fill_input<instruction_set::sse4_2>(const ui
} }
#endif #endif
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
template<> really_inline template<> really_inline
simd_input<instruction_set::neon> fill_input<instruction_set::neon>(const uint8_t * ptr) { simd_input<instruction_set::neon> fill_input<instruction_set::neon>(const uint8_t * ptr) {
struct simd_input<instruction_set::neon> in; struct simd_input<instruction_set::neon> in;
@ -356,7 +355,7 @@ uint64_t cmp_mask_against_input<instruction_set::avx2>(simd_input<instruction_se
} }
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
uint64_t cmp_mask_against_input<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in, uint8_t m) { uint64_t cmp_mask_against_input<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in, uint8_t m) {
const __m128i mask = _mm_set1_epi8(m); const __m128i mask = _mm_set1_epi8(m);
@ -373,7 +372,7 @@ uint64_t cmp_mask_against_input<instruction_set::sse4_2>(simd_input<instruction_
} }
#endif #endif
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
template<> really_inline template<> really_inline
uint64_t cmp_mask_against_input<instruction_set::neon>(simd_input<instruction_set::neon> in, uint8_t m) { uint64_t cmp_mask_against_input<instruction_set::neon>(simd_input<instruction_set::neon> in, uint8_t m) {
const uint8x16_t mask = vmovq_n_u8(m); const uint8x16_t mask = vmovq_n_u8(m);
@ -401,7 +400,7 @@ uint64_t unsigned_lteq_against_input<instruction_set::avx2>(simd_input<instructi
} }
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
uint64_t unsigned_lteq_against_input<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in, uint8_t m) { uint64_t unsigned_lteq_against_input<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in, uint8_t m) {
const __m128i maxval = _mm_set1_epi8(m); const __m128i maxval = _mm_set1_epi8(m);
@ -417,7 +416,7 @@ uint64_t unsigned_lteq_against_input<instruction_set::sse4_2>(simd_input<instruc
} }
#endif #endif
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
template<> really_inline template<> really_inline
uint64_t unsigned_lteq_against_input<instruction_set::neon>(simd_input<instruction_set::neon> in, uint8_t m) { uint64_t unsigned_lteq_against_input<instruction_set::neon>(simd_input<instruction_set::neon> in, uint8_t m) {
const uint8x16_t mask = vmovq_n_u8(m); const uint8x16_t mask = vmovq_n_u8(m);
@ -609,7 +608,7 @@ void find_whitespace_and_structurals<instruction_set::avx2>(simd_input<instructi
} }
#endif // __AVX2__ #endif // __AVX2__
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
void find_whitespace_and_structurals<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in, void find_whitespace_and_structurals<instruction_set::sse4_2>(simd_input<instruction_set::sse4_2> in,
uint64_t &whitespace, uint64_t &whitespace,
@ -680,7 +679,7 @@ void find_whitespace_and_structurals<instruction_set::sse4_2>(simd_input<instruc
} }
#endif // __SSE4_2__ #endif // __SSE4_2__
#ifdef __ARM_NEON #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
template<> really_inline template<> really_inline
void find_whitespace_and_structurals<instruction_set::neon>( void find_whitespace_and_structurals<instruction_set::neon>(
simd_input<instruction_set::neon> in, simd_input<instruction_set::neon> in,

View File

@ -109,7 +109,7 @@ parse_string_helper find_bs_bits_and_quote_bits<instruction_set::avx2> (const ui
} }
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
template<> really_inline template<> really_inline
parse_string_helper find_bs_bits_and_quote_bits<instruction_set::sse4_2> (const uint8_t *src, uint8_t *dst) { parse_string_helper find_bs_bits_and_quote_bits<instruction_set::sse4_2> (const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require // this can read up to 31 bytes beyond the buffer size, but we require

View File

@ -40,7 +40,7 @@ int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool rea
json_parse_ptr = avx_implementation; json_parse_ptr = avx_implementation;
break; break;
#endif #endif
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64))
case instruction_set::sse4_2 : case instruction_set::sse4_2 :
json_parse_ptr = sse4_2_implementation; json_parse_ptr = sse4_2_implementation;
break; break;