Fixing clang under visual studio (#1028)
* Lots of fixes * Removing some lambdas * Removing some functional programming. Co-authored-by: Daniel Lemire <lemire@gmai.com>
This commit is contained in:
parent
a19f635a6a
commit
d0ce2f0b5a
|
@ -0,0 +1,25 @@
|
|||
name: VS16-CLANG-CI
|
||||
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
ci:
|
||||
name: windows-vs16
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: 'Run CMake with VS16'
|
||||
uses: lukka/run-cmake@v2
|
||||
with:
|
||||
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
|
||||
cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
|
||||
buildDirectory: "${{ github.workspace }}/../../_temp/windows"
|
||||
cmakeBuildType: Release
|
||||
buildWithCMake: true
|
||||
cmakeGenerator: VS16Win64
|
||||
cmakeAppendedArgs: -T ClangCL -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_BUILD_STATIC=ON
|
||||
buildWithCMakeArgs: --config Release
|
||||
|
||||
- name: 'Run CTest'
|
||||
run: ctest -C Release -E checkperf --output-on-failure
|
||||
working-directory: "${{ github.workspace }}/../../_temp/windows"
|
|
@ -30,7 +30,6 @@
|
|||
event_count allocate_count = collector.end(); \
|
||||
aggregate << allocate_count; \
|
||||
} \
|
||||
uint64_t S = size; \
|
||||
if (collector.has_events()) { \
|
||||
printf("%7.3f", aggregate.best.cycles() / static_cast<double>(size)); \
|
||||
if (verbose) { \
|
||||
|
@ -76,7 +75,6 @@
|
|||
event_count allocate_count = collector.end(); \
|
||||
aggregate << allocate_count; \
|
||||
} \
|
||||
uint64_t S = size; \
|
||||
if (collector.has_events()) { \
|
||||
printf("%7.3f", aggregate.best.cycles() / static_cast<double>(size)); \
|
||||
if (verbose) { \
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Wed Jul 1 14:00:57 EDT 2020. Do not edit! */
|
||||
/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */
|
||||
|
||||
#include <iostream>
|
||||
#include "simdjson.h"
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Wed Jul 1 14:00:57 EDT 2020. Do not edit! */
|
||||
/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */
|
||||
/* begin file include/simdjson.h */
|
||||
#ifndef SIMDJSON_H
|
||||
#define SIMDJSON_H
|
||||
|
|
|
@ -26,13 +26,24 @@ struct json_character_block {
|
|||
};
|
||||
|
||||
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t> in) {
|
||||
auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
|
||||
auto nib_lo = chunk & 0xf;
|
||||
auto nib_hi = chunk.shr<4>();
|
||||
auto shuf_lo = nib_lo.lookup_16<uint8_t>(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||
auto shuf_hi = nib_hi.lookup_16<uint8_t>(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||
return shuf_lo & shuf_hi;
|
||||
});
|
||||
// Functional programming causes trouble with Visual Studio.
|
||||
// Keeping this version in comments since it is much nicer:
|
||||
// auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
|
||||
// auto nib_lo = chunk & 0xf;
|
||||
// auto nib_hi = chunk.shr<4>();
|
||||
// auto shuf_lo = nib_lo.lookup_16<uint8_t>(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||
// auto shuf_hi = nib_hi.lookup_16<uint8_t>(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||
// return shuf_lo & shuf_hi;
|
||||
// });
|
||||
const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||
const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||
|
||||
auto v = simd8x64<uint8_t>(
|
||||
(in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
|
||||
(in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
|
||||
(in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
|
||||
(in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2)
|
||||
);
|
||||
|
||||
|
||||
// We compute whitespace and op separately. If the code later only use one or the
|
||||
|
@ -51,13 +62,25 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
|||
// there is a small untaken optimization opportunity here. We deliberately
|
||||
// do not pick it up.
|
||||
|
||||
uint64_t op = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x7); }).to_bitmask();
|
||||
uint64_t whitespace = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x18); }).to_bitmask();
|
||||
uint64_t op = simd8x64<bool>(
|
||||
v.chunks[0].any_bits_set(0x7),
|
||||
v.chunks[1].any_bits_set(0x7),
|
||||
v.chunks[2].any_bits_set(0x7),
|
||||
v.chunks[3].any_bits_set(0x7)
|
||||
).to_bitmask();
|
||||
|
||||
uint64_t whitespace = simd8x64<bool>(
|
||||
v.chunks[0].any_bits_set(0x18),
|
||||
v.chunks[1].any_bits_set(0x18),
|
||||
v.chunks[2].any_bits_set(0x18),
|
||||
v.chunks[3].any_bits_set(0x18)
|
||||
).to_bitmask();
|
||||
|
||||
return { whitespace, op };
|
||||
}
|
||||
|
||||
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
||||
simd8<uint8_t> bits = input.reduce([&](simd8<uint8_t> a,simd8<uint8_t> b) { return a|b; });
|
||||
simd8<uint8_t> bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]);
|
||||
return bits.max() < 0b10000000u;
|
||||
}
|
||||
|
||||
|
|
|
@ -442,43 +442,6 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
|
|||
each(3);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk) const
|
||||
{
|
||||
each_chunk(this->chunks[0]);
|
||||
each_chunk(this->chunks[1]);
|
||||
each_chunk(this->chunks[2]);
|
||||
each_chunk(this->chunks[3]);
|
||||
}
|
||||
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0]),
|
||||
map_chunk(this->chunks[1]),
|
||||
map_chunk(this->chunks[2]),
|
||||
map_chunk(this->chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(const simd8x64<T> b, F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
map_chunk(this->chunks[1], b.chunks[1]),
|
||||
map_chunk(this->chunks[2], b.chunks[2]),
|
||||
map_chunk(this->chunks[3], b.chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd8<T> reduce(F const& reduce_pair) const {
|
||||
return reduce_pair(
|
||||
reduce_pair(this->chunks[0], this->chunks[1]),
|
||||
reduce_pair(this->chunks[2], this->chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() const {
|
||||
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||
const uint8x16_t bit_mask = make_uint8x16_t(
|
||||
|
@ -501,17 +464,32 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
|
|||
|
||||
really_inline simd8x64<T> bit_or(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a | mask; } );
|
||||
return simd8x64<T>(
|
||||
this->chunks[0] | mask,
|
||||
this->chunks[1] | mask,
|
||||
this->chunks[2] | mask,
|
||||
this->chunks[3] | mask
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t eq(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a == mask; } ).to_bitmask();
|
||||
return simd8x64<bool>(
|
||||
this->chunks[0] == mask,
|
||||
this->chunks[1] == mask,
|
||||
this->chunks[2] == mask,
|
||||
this->chunks[3] == mask
|
||||
).to_bitmask();
|
||||
}
|
||||
|
||||
really_inline uint64_t lteq(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a <= mask; } ).to_bitmask();
|
||||
return simd8x64<bool>(
|
||||
this->chunks[0] <= mask,
|
||||
this->chunks[1] <= mask,
|
||||
this->chunks[2] <= mask,
|
||||
this->chunks[3] <= mask
|
||||
).to_bitmask();
|
||||
}
|
||||
}; // struct simd8x64<T>
|
||||
|
||||
|
|
|
@ -31,31 +31,7 @@ public:
|
|||
really_inline size_t remaining_len() {
|
||||
return parser.len - *current_structural;
|
||||
}
|
||||
template<typename F>
|
||||
really_inline bool with_space_terminated_copy(const F& f) {
|
||||
/**
|
||||
* We need to make a copy to make sure that the string is space terminated.
|
||||
* This is not about padding the input, which should already padded up
|
||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||
* on how the padding was done. What if the input string was padded with nulls?
|
||||
* It is quite common for an input string to have an extra null character (C string).
|
||||
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
|
||||
* document, but the string "9\0" by itself is fine. So we make a copy and
|
||||
* pad the input with spaces when we know that there is just one input element.
|
||||
* This copy is relatively expensive, but it will almost never be called in
|
||||
* practice unless you are in the strange scenario where you have many JSON
|
||||
* documents made of single atoms.
|
||||
*/
|
||||
char *copy = static_cast<char *>(malloc(parser.len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
return true;
|
||||
}
|
||||
memcpy(copy, buf, parser.len);
|
||||
memset(copy + parser.len, ' ', SIMDJSON_PADDING);
|
||||
bool result = f(reinterpret_cast<const uint8_t*>(copy), *current_structural);
|
||||
free(copy);
|
||||
return result;
|
||||
}
|
||||
|
||||
really_inline bool past_end(uint32_t n_structural_indexes) {
|
||||
return current_structural >= &parser.structural_indexes[n_structural_indexes];
|
||||
}
|
||||
|
|
|
@ -169,6 +169,31 @@ struct structural_parser : structural_iterator {
|
|||
return parse_number(current(), found_minus);
|
||||
}
|
||||
|
||||
really_inline bool parse_number_with_space_terminated_copy(const bool is_negative) {
|
||||
/**
|
||||
* We need to make a copy to make sure that the string is space terminated.
|
||||
* This is not about padding the input, which should already padded up
|
||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||
* on how the padding was done. What if the input string was padded with nulls?
|
||||
* It is quite common for an input string to have an extra null character (C string).
|
||||
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
|
||||
* document, but the string "9\0" by itself is fine. So we make a copy and
|
||||
* pad the input with spaces when we know that there is just one input element.
|
||||
* This copy is relatively expensive, but it will almost never be called in
|
||||
* practice unless you are in the strange scenario where you have many JSON
|
||||
* documents made of single atoms.
|
||||
*/
|
||||
uint8_t *copy = static_cast<uint8_t *>(malloc(parser.len + SIMDJSON_PADDING));
|
||||
if (copy == nullptr) {
|
||||
return true;
|
||||
}
|
||||
memcpy(copy, buf, parser.len);
|
||||
memset(copy + parser.len, ' ', SIMDJSON_PADDING);
|
||||
size_t idx = *current_structural;
|
||||
bool result = parse_number(©[idx], is_negative); // parse_number does not throw
|
||||
free(copy);
|
||||
return result;
|
||||
}
|
||||
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
|
||||
switch (advance_char()) {
|
||||
case '"':
|
||||
|
@ -306,6 +331,7 @@ struct structural_parser : structural_iterator {
|
|||
#undef FAIL_IF
|
||||
#define FAIL_IF(EXPR) { if (EXPR) { goto error; } }
|
||||
|
||||
|
||||
template<bool STREAMING>
|
||||
WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept {
|
||||
dom_parser.doc = &doc;
|
||||
|
@ -351,18 +377,16 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
|||
goto finish;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
||||
return parser.parse_number(©[idx], false);
|
||||
})
|
||||
);
|
||||
// Next line used to be an interesting functional programming exercise with
|
||||
// a lambda that gets passed to another function via a closure. This would confuse the
|
||||
// clangcl compiler under Visual Studio 2019 (recent release).
|
||||
{ if(parser.parse_number_with_space_terminated_copy(false)) { goto error; }}
|
||||
goto finish;
|
||||
case '-':
|
||||
FAIL_IF(
|
||||
parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
||||
return parser.parse_number(©[idx], true);
|
||||
})
|
||||
);
|
||||
// Next line used to be an interesting functional programming exercise with
|
||||
// a lambda that gets passed to another function via a closure. This would confuse the
|
||||
// clangcl compiler under Visual Studio 2019 (recent release).
|
||||
{ if(parser.parse_number_with_space_terminated_copy(true)) { goto error; }}
|
||||
goto finish;
|
||||
default:
|
||||
parser.log_error("Document starts with a non-value character");
|
||||
|
|
|
@ -37,19 +37,20 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
|||
// hope that useless computations will be omitted. This is namely case when
|
||||
// minifying (we only need whitespace).
|
||||
|
||||
uint64_t whitespace = in.map([&](simd8<uint8_t> _in) {
|
||||
return _in == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, _in));
|
||||
}).to_bitmask();
|
||||
|
||||
uint64_t op = in.map([&](simd8<uint8_t> _in) {
|
||||
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
|
||||
return (_in | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, _in-','));
|
||||
}).to_bitmask();
|
||||
uint64_t whitespace = simd8x64<bool>(
|
||||
in.chunks[0] == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, in.chunks[0])),
|
||||
in.chunks[1] == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, in.chunks[1]))
|
||||
).to_bitmask();
|
||||
|
||||
uint64_t op = simd8x64<bool>(
|
||||
(in.chunks[0] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[0]-',')),
|
||||
(in.chunks[1] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[1]-','))
|
||||
).to_bitmask();
|
||||
return { whitespace, op };
|
||||
}
|
||||
|
||||
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
||||
simd8<uint8_t> bits = input.reduce([&](simd8<uint8_t> a,simd8<uint8_t> b) { return a|b; });
|
||||
simd8<uint8_t> bits = (input.chunks[0] | input.chunks[1]);
|
||||
return !bits.any_bits_set_anywhere(0b10000000u);
|
||||
}
|
||||
|
||||
|
|
|
@ -316,36 +316,6 @@ namespace simd {
|
|||
this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk) const
|
||||
{
|
||||
each_chunk(this->chunks[0]);
|
||||
each_chunk(this->chunks[1]);
|
||||
}
|
||||
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0]),
|
||||
map_chunk(this->chunks[1])
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename R=bool, typename F>
|
||||
really_inline simd8x64<R> map(const simd8x64<uint8_t> b, F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
map_chunk(this->chunks[1], b.chunks[1])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd8<T> reduce(F const& reduce_pair) const {
|
||||
return reduce_pair(this->chunks[0], this->chunks[1]);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() const {
|
||||
uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
|
||||
uint64_t r_hi = this->chunks[1].to_bitmask();
|
||||
|
@ -354,17 +324,26 @@ namespace simd {
|
|||
|
||||
really_inline simd8x64<T> bit_or(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a | mask; } );
|
||||
return simd8x64<T>(
|
||||
this->chunks[0] | mask,
|
||||
this->chunks[1] | mask
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t eq(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a == mask; } ).to_bitmask();
|
||||
return simd8x64<bool>(
|
||||
this->chunks[0] == mask,
|
||||
this->chunks[1] == mask
|
||||
).to_bitmask();
|
||||
}
|
||||
|
||||
really_inline uint64_t lteq(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a <= mask; } ).to_bitmask();
|
||||
return simd8x64<bool>(
|
||||
this->chunks[0] <= mask,
|
||||
this->chunks[1] <= mask
|
||||
).to_bitmask();
|
||||
}
|
||||
}; // struct simd8x64<T>
|
||||
|
||||
|
|
|
@ -38,19 +38,25 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
|||
// hope that useless computations will be omitted. This is namely case when
|
||||
// minifying (we only need whitespace).
|
||||
|
||||
uint64_t whitespace = in.map([&](simd8<uint8_t> _in) {
|
||||
return _in == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, _in));
|
||||
}).to_bitmask();
|
||||
uint64_t whitespace = simd8x64<bool>(
|
||||
in.chunks[0] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[0])),
|
||||
in.chunks[1] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[1])),
|
||||
in.chunks[2] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[2])),
|
||||
in.chunks[3] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[3]))
|
||||
).to_bitmask();
|
||||
|
||||
uint64_t op = in.map([&](simd8<uint8_t> _in) {
|
||||
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
|
||||
return (_in | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, _in-','));
|
||||
}).to_bitmask();
|
||||
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
|
||||
uint64_t op = simd8x64<bool>(
|
||||
(in.chunks[0] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[0]-',')),
|
||||
(in.chunks[1] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[1]-',')),
|
||||
(in.chunks[2] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[2]-',')),
|
||||
(in.chunks[3] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[3]-','))
|
||||
).to_bitmask();
|
||||
return { whitespace, op };
|
||||
}
|
||||
|
||||
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
||||
simd8<uint8_t> bits = input.reduce([&](simd8<uint8_t> a,simd8<uint8_t> b) { return a|b; });
|
||||
simd8<uint8_t> bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]);
|
||||
return !bits.any_bits_set_anywhere(0b10000000u);
|
||||
}
|
||||
|
||||
|
|
|
@ -292,43 +292,6 @@ namespace simd {
|
|||
each(3);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline void each(F const& each_chunk) const
|
||||
{
|
||||
each_chunk(this->chunks[0]);
|
||||
each_chunk(this->chunks[1]);
|
||||
each_chunk(this->chunks[2]);
|
||||
each_chunk(this->chunks[3]);
|
||||
}
|
||||
|
||||
template <typename F, typename R=bool>
|
||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0]),
|
||||
map_chunk(this->chunks[1]),
|
||||
map_chunk(this->chunks[2]),
|
||||
map_chunk(this->chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F, typename R=bool>
|
||||
really_inline simd8x64<R> map(const simd8x64<uint8_t> b, F const& map_chunk) const {
|
||||
return simd8x64<R>(
|
||||
map_chunk(this->chunks[0], b.chunks[0]),
|
||||
map_chunk(this->chunks[1], b.chunks[1]),
|
||||
map_chunk(this->chunks[2], b.chunks[2]),
|
||||
map_chunk(this->chunks[3], b.chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
really_inline simd8<T> reduce(F const& reduce_pair) const {
|
||||
return reduce_pair(
|
||||
reduce_pair(this->chunks[0], this->chunks[1]),
|
||||
reduce_pair(this->chunks[2], this->chunks[3])
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t to_bitmask() const {
|
||||
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
|
||||
uint64_t r1 = this->chunks[1].to_bitmask();
|
||||
|
@ -339,17 +302,32 @@ namespace simd {
|
|||
|
||||
really_inline simd8x64<T> bit_or(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a | mask; } );
|
||||
return simd8x64<T>(
|
||||
this->chunks[0] | mask,
|
||||
this->chunks[1] | mask,
|
||||
this->chunks[2] | mask,
|
||||
this->chunks[3] | mask
|
||||
);
|
||||
}
|
||||
|
||||
really_inline uint64_t eq(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a == mask; } ).to_bitmask();
|
||||
return simd8x64<bool>(
|
||||
this->chunks[0] == mask,
|
||||
this->chunks[1] == mask,
|
||||
this->chunks[2] == mask,
|
||||
this->chunks[3] == mask
|
||||
).to_bitmask();
|
||||
}
|
||||
|
||||
really_inline uint64_t lteq(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return this->map( [&](simd8<T> a) { return a <= mask; } ).to_bitmask();
|
||||
return simd8x64<bool>(
|
||||
this->chunks[0] <= mask,
|
||||
this->chunks[1] <= mask,
|
||||
this->chunks[2] <= mask,
|
||||
this->chunks[3] <= mask
|
||||
).to_bitmask();
|
||||
}
|
||||
}; // struct simd8x64<T>
|
||||
|
||||
|
|
Loading…
Reference in New Issue