Fixing clang under visual studio (#1028)
* Lots of fixes * Removing some lambdas * Removing some functional programming. Co-authored-by: Daniel Lemire <lemire@gmai.com>
This commit is contained in:
parent
a19f635a6a
commit
d0ce2f0b5a
|
@ -0,0 +1,25 @@
|
||||||
|
name: VS16-CLANG-CI
|
||||||
|
|
||||||
|
on: push
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ci:
|
||||||
|
name: windows-vs16
|
||||||
|
runs-on: windows-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: 'Run CMake with VS16'
|
||||||
|
uses: lukka/run-cmake@v2
|
||||||
|
with:
|
||||||
|
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
|
||||||
|
cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
|
||||||
|
buildDirectory: "${{ github.workspace }}/../../_temp/windows"
|
||||||
|
cmakeBuildType: Release
|
||||||
|
buildWithCMake: true
|
||||||
|
cmakeGenerator: VS16Win64
|
||||||
|
cmakeAppendedArgs: -T ClangCL -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_BUILD_STATIC=ON
|
||||||
|
buildWithCMakeArgs: --config Release
|
||||||
|
|
||||||
|
- name: 'Run CTest'
|
||||||
|
run: ctest -C Release -E checkperf --output-on-failure
|
||||||
|
working-directory: "${{ github.workspace }}/../../_temp/windows"
|
|
@ -30,7 +30,6 @@
|
||||||
event_count allocate_count = collector.end(); \
|
event_count allocate_count = collector.end(); \
|
||||||
aggregate << allocate_count; \
|
aggregate << allocate_count; \
|
||||||
} \
|
} \
|
||||||
uint64_t S = size; \
|
|
||||||
if (collector.has_events()) { \
|
if (collector.has_events()) { \
|
||||||
printf("%7.3f", aggregate.best.cycles() / static_cast<double>(size)); \
|
printf("%7.3f", aggregate.best.cycles() / static_cast<double>(size)); \
|
||||||
if (verbose) { \
|
if (verbose) { \
|
||||||
|
@ -76,7 +75,6 @@
|
||||||
event_count allocate_count = collector.end(); \
|
event_count allocate_count = collector.end(); \
|
||||||
aggregate << allocate_count; \
|
aggregate << allocate_count; \
|
||||||
} \
|
} \
|
||||||
uint64_t S = size; \
|
|
||||||
if (collector.has_events()) { \
|
if (collector.has_events()) { \
|
||||||
printf("%7.3f", aggregate.best.cycles() / static_cast<double>(size)); \
|
printf("%7.3f", aggregate.best.cycles() / static_cast<double>(size)); \
|
||||||
if (verbose) { \
|
if (verbose) { \
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* auto-generated on Wed Jul 1 14:00:57 EDT 2020. Do not edit! */
|
/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
/* auto-generated on Wed Jul 1 14:00:57 EDT 2020. Do not edit! */
|
/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */
|
||||||
/* begin file include/simdjson.h */
|
/* begin file include/simdjson.h */
|
||||||
#ifndef SIMDJSON_H
|
#ifndef SIMDJSON_H
|
||||||
#define SIMDJSON_H
|
#define SIMDJSON_H
|
||||||
|
|
|
@ -26,13 +26,24 @@ struct json_character_block {
|
||||||
};
|
};
|
||||||
|
|
||||||
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t> in) {
|
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t> in) {
|
||||||
auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
|
// Functional programming causes trouble with Visual Studio.
|
||||||
auto nib_lo = chunk & 0xf;
|
// Keeping this version in comments since it is much nicer:
|
||||||
auto nib_hi = chunk.shr<4>();
|
// auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
|
||||||
auto shuf_lo = nib_lo.lookup_16<uint8_t>(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
// auto nib_lo = chunk & 0xf;
|
||||||
auto shuf_hi = nib_hi.lookup_16<uint8_t>(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
// auto nib_hi = chunk.shr<4>();
|
||||||
return shuf_lo & shuf_hi;
|
// auto shuf_lo = nib_lo.lookup_16<uint8_t>(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||||
});
|
// auto shuf_hi = nib_hi.lookup_16<uint8_t>(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||||
|
// return shuf_lo & shuf_hi;
|
||||||
|
// });
|
||||||
|
const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||||
|
const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||||
|
|
||||||
|
auto v = simd8x64<uint8_t>(
|
||||||
|
(in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
|
||||||
|
(in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
|
||||||
|
(in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
|
||||||
|
(in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
// We compute whitespace and op separately. If the code later only use one or the
|
// We compute whitespace and op separately. If the code later only use one or the
|
||||||
|
@ -51,13 +62,25 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
||||||
// there is a small untaken optimization opportunity here. We deliberately
|
// there is a small untaken optimization opportunity here. We deliberately
|
||||||
// do not pick it up.
|
// do not pick it up.
|
||||||
|
|
||||||
uint64_t op = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x7); }).to_bitmask();
|
uint64_t op = simd8x64<bool>(
|
||||||
uint64_t whitespace = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x18); }).to_bitmask();
|
v.chunks[0].any_bits_set(0x7),
|
||||||
|
v.chunks[1].any_bits_set(0x7),
|
||||||
|
v.chunks[2].any_bits_set(0x7),
|
||||||
|
v.chunks[3].any_bits_set(0x7)
|
||||||
|
).to_bitmask();
|
||||||
|
|
||||||
|
uint64_t whitespace = simd8x64<bool>(
|
||||||
|
v.chunks[0].any_bits_set(0x18),
|
||||||
|
v.chunks[1].any_bits_set(0x18),
|
||||||
|
v.chunks[2].any_bits_set(0x18),
|
||||||
|
v.chunks[3].any_bits_set(0x18)
|
||||||
|
).to_bitmask();
|
||||||
|
|
||||||
return { whitespace, op };
|
return { whitespace, op };
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
||||||
simd8<uint8_t> bits = input.reduce([&](simd8<uint8_t> a,simd8<uint8_t> b) { return a|b; });
|
simd8<uint8_t> bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]);
|
||||||
return bits.max() < 0b10000000u;
|
return bits.max() < 0b10000000u;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -442,43 +442,6 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
|
||||||
each(3);
|
each(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
|
||||||
really_inline void each(F const& each_chunk) const
|
|
||||||
{
|
|
||||||
each_chunk(this->chunks[0]);
|
|
||||||
each_chunk(this->chunks[1]);
|
|
||||||
each_chunk(this->chunks[2]);
|
|
||||||
each_chunk(this->chunks[3]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename R=bool, typename F>
|
|
||||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
|
||||||
return simd8x64<R>(
|
|
||||||
map_chunk(this->chunks[0]),
|
|
||||||
map_chunk(this->chunks[1]),
|
|
||||||
map_chunk(this->chunks[2]),
|
|
||||||
map_chunk(this->chunks[3])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename R=bool, typename F>
|
|
||||||
really_inline simd8x64<R> map(const simd8x64<T> b, F const& map_chunk) const {
|
|
||||||
return simd8x64<R>(
|
|
||||||
map_chunk(this->chunks[0], b.chunks[0]),
|
|
||||||
map_chunk(this->chunks[1], b.chunks[1]),
|
|
||||||
map_chunk(this->chunks[2], b.chunks[2]),
|
|
||||||
map_chunk(this->chunks[3], b.chunks[3])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename F>
|
|
||||||
really_inline simd8<T> reduce(F const& reduce_pair) const {
|
|
||||||
return reduce_pair(
|
|
||||||
reduce_pair(this->chunks[0], this->chunks[1]),
|
|
||||||
reduce_pair(this->chunks[2], this->chunks[3])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
really_inline uint64_t to_bitmask() const {
|
really_inline uint64_t to_bitmask() const {
|
||||||
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||||
const uint8x16_t bit_mask = make_uint8x16_t(
|
const uint8x16_t bit_mask = make_uint8x16_t(
|
||||||
|
@ -501,17 +464,32 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
|
||||||
|
|
||||||
really_inline simd8x64<T> bit_or(const T m) const {
|
really_inline simd8x64<T> bit_or(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a | mask; } );
|
return simd8x64<T>(
|
||||||
|
this->chunks[0] | mask,
|
||||||
|
this->chunks[1] | mask,
|
||||||
|
this->chunks[2] | mask,
|
||||||
|
this->chunks[3] | mask
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline uint64_t eq(const T m) const {
|
really_inline uint64_t eq(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a == mask; } ).to_bitmask();
|
return simd8x64<bool>(
|
||||||
|
this->chunks[0] == mask,
|
||||||
|
this->chunks[1] == mask,
|
||||||
|
this->chunks[2] == mask,
|
||||||
|
this->chunks[3] == mask
|
||||||
|
).to_bitmask();
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline uint64_t lteq(const T m) const {
|
really_inline uint64_t lteq(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a <= mask; } ).to_bitmask();
|
return simd8x64<bool>(
|
||||||
|
this->chunks[0] <= mask,
|
||||||
|
this->chunks[1] <= mask,
|
||||||
|
this->chunks[2] <= mask,
|
||||||
|
this->chunks[3] <= mask
|
||||||
|
).to_bitmask();
|
||||||
}
|
}
|
||||||
}; // struct simd8x64<T>
|
}; // struct simd8x64<T>
|
||||||
|
|
||||||
|
|
|
@ -31,31 +31,7 @@ public:
|
||||||
really_inline size_t remaining_len() {
|
really_inline size_t remaining_len() {
|
||||||
return parser.len - *current_structural;
|
return parser.len - *current_structural;
|
||||||
}
|
}
|
||||||
template<typename F>
|
|
||||||
really_inline bool with_space_terminated_copy(const F& f) {
|
|
||||||
/**
|
|
||||||
* We need to make a copy to make sure that the string is space terminated.
|
|
||||||
* This is not about padding the input, which should already padded up
|
|
||||||
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
|
||||||
* on how the padding was done. What if the input string was padded with nulls?
|
|
||||||
* It is quite common for an input string to have an extra null character (C string).
|
|
||||||
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
|
|
||||||
* document, but the string "9\0" by itself is fine. So we make a copy and
|
|
||||||
* pad the input with spaces when we know that there is just one input element.
|
|
||||||
* This copy is relatively expensive, but it will almost never be called in
|
|
||||||
* practice unless you are in the strange scenario where you have many JSON
|
|
||||||
* documents made of single atoms.
|
|
||||||
*/
|
|
||||||
char *copy = static_cast<char *>(malloc(parser.len + SIMDJSON_PADDING));
|
|
||||||
if (copy == nullptr) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
memcpy(copy, buf, parser.len);
|
|
||||||
memset(copy + parser.len, ' ', SIMDJSON_PADDING);
|
|
||||||
bool result = f(reinterpret_cast<const uint8_t*>(copy), *current_structural);
|
|
||||||
free(copy);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
really_inline bool past_end(uint32_t n_structural_indexes) {
|
really_inline bool past_end(uint32_t n_structural_indexes) {
|
||||||
return current_structural >= &parser.structural_indexes[n_structural_indexes];
|
return current_structural >= &parser.structural_indexes[n_structural_indexes];
|
||||||
}
|
}
|
||||||
|
|
|
@ -169,6 +169,31 @@ struct structural_parser : structural_iterator {
|
||||||
return parse_number(current(), found_minus);
|
return parse_number(current(), found_minus);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
really_inline bool parse_number_with_space_terminated_copy(const bool is_negative) {
|
||||||
|
/**
|
||||||
|
* We need to make a copy to make sure that the string is space terminated.
|
||||||
|
* This is not about padding the input, which should already padded up
|
||||||
|
* to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||||
|
* on how the padding was done. What if the input string was padded with nulls?
|
||||||
|
* It is quite common for an input string to have an extra null character (C string).
|
||||||
|
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
|
||||||
|
* document, but the string "9\0" by itself is fine. So we make a copy and
|
||||||
|
* pad the input with spaces when we know that there is just one input element.
|
||||||
|
* This copy is relatively expensive, but it will almost never be called in
|
||||||
|
* practice unless you are in the strange scenario where you have many JSON
|
||||||
|
* documents made of single atoms.
|
||||||
|
*/
|
||||||
|
uint8_t *copy = static_cast<uint8_t *>(malloc(parser.len + SIMDJSON_PADDING));
|
||||||
|
if (copy == nullptr) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
memcpy(copy, buf, parser.len);
|
||||||
|
memset(copy + parser.len, ' ', SIMDJSON_PADDING);
|
||||||
|
size_t idx = *current_structural;
|
||||||
|
bool result = parse_number(©[idx], is_negative); // parse_number does not throw
|
||||||
|
free(copy);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
|
WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) {
|
||||||
switch (advance_char()) {
|
switch (advance_char()) {
|
||||||
case '"':
|
case '"':
|
||||||
|
@ -306,6 +331,7 @@ struct structural_parser : structural_iterator {
|
||||||
#undef FAIL_IF
|
#undef FAIL_IF
|
||||||
#define FAIL_IF(EXPR) { if (EXPR) { goto error; } }
|
#define FAIL_IF(EXPR) { if (EXPR) { goto error; } }
|
||||||
|
|
||||||
|
|
||||||
template<bool STREAMING>
|
template<bool STREAMING>
|
||||||
WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept {
|
WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept {
|
||||||
dom_parser.doc = &doc;
|
dom_parser.doc = &doc;
|
||||||
|
@ -351,18 +377,16 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
||||||
goto finish;
|
goto finish;
|
||||||
case '0': case '1': case '2': case '3': case '4':
|
case '0': case '1': case '2': case '3': case '4':
|
||||||
case '5': case '6': case '7': case '8': case '9':
|
case '5': case '6': case '7': case '8': case '9':
|
||||||
FAIL_IF(
|
// Next line used to be an interesting functional programming exercise with
|
||||||
parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
// a lambda that gets passed to another function via a closure. This would confuse the
|
||||||
return parser.parse_number(©[idx], false);
|
// clangcl compiler under Visual Studio 2019 (recent release).
|
||||||
})
|
{ if(parser.parse_number_with_space_terminated_copy(false)) { goto error; }}
|
||||||
);
|
|
||||||
goto finish;
|
goto finish;
|
||||||
case '-':
|
case '-':
|
||||||
FAIL_IF(
|
// Next line used to be an interesting functional programming exercise with
|
||||||
parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
|
// a lambda that gets passed to another function via a closure. This would confuse the
|
||||||
return parser.parse_number(©[idx], true);
|
// clangcl compiler under Visual Studio 2019 (recent release).
|
||||||
})
|
{ if(parser.parse_number_with_space_terminated_copy(true)) { goto error; }}
|
||||||
);
|
|
||||||
goto finish;
|
goto finish;
|
||||||
default:
|
default:
|
||||||
parser.log_error("Document starts with a non-value character");
|
parser.log_error("Document starts with a non-value character");
|
||||||
|
|
|
@ -37,19 +37,20 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
||||||
// hope that useless computations will be omitted. This is namely case when
|
// hope that useless computations will be omitted. This is namely case when
|
||||||
// minifying (we only need whitespace).
|
// minifying (we only need whitespace).
|
||||||
|
|
||||||
uint64_t whitespace = in.map([&](simd8<uint8_t> _in) {
|
uint64_t whitespace = simd8x64<bool>(
|
||||||
return _in == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, _in));
|
in.chunks[0] == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, in.chunks[0])),
|
||||||
}).to_bitmask();
|
in.chunks[1] == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, in.chunks[1]))
|
||||||
|
).to_bitmask();
|
||||||
uint64_t op = in.map([&](simd8<uint8_t> _in) {
|
|
||||||
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
|
uint64_t op = simd8x64<bool>(
|
||||||
return (_in | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, _in-','));
|
(in.chunks[0] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[0]-',')),
|
||||||
}).to_bitmask();
|
(in.chunks[1] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[1]-','))
|
||||||
|
).to_bitmask();
|
||||||
return { whitespace, op };
|
return { whitespace, op };
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
||||||
simd8<uint8_t> bits = input.reduce([&](simd8<uint8_t> a,simd8<uint8_t> b) { return a|b; });
|
simd8<uint8_t> bits = (input.chunks[0] | input.chunks[1]);
|
||||||
return !bits.any_bits_set_anywhere(0b10000000u);
|
return !bits.any_bits_set_anywhere(0b10000000u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -316,36 +316,6 @@ namespace simd {
|
||||||
this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
|
this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
|
||||||
really_inline void each(F const& each_chunk) const
|
|
||||||
{
|
|
||||||
each_chunk(this->chunks[0]);
|
|
||||||
each_chunk(this->chunks[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename R=bool, typename F>
|
|
||||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
|
||||||
return simd8x64<R>(
|
|
||||||
map_chunk(this->chunks[0]),
|
|
||||||
map_chunk(this->chunks[1])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename R=bool, typename F>
|
|
||||||
really_inline simd8x64<R> map(const simd8x64<uint8_t> b, F const& map_chunk) const {
|
|
||||||
return simd8x64<R>(
|
|
||||||
map_chunk(this->chunks[0], b.chunks[0]),
|
|
||||||
map_chunk(this->chunks[1], b.chunks[1])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename F>
|
|
||||||
really_inline simd8<T> reduce(F const& reduce_pair) const {
|
|
||||||
return reduce_pair(this->chunks[0], this->chunks[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
really_inline uint64_t to_bitmask() const {
|
really_inline uint64_t to_bitmask() const {
|
||||||
uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
|
uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
|
||||||
uint64_t r_hi = this->chunks[1].to_bitmask();
|
uint64_t r_hi = this->chunks[1].to_bitmask();
|
||||||
|
@ -354,17 +324,26 @@ namespace simd {
|
||||||
|
|
||||||
really_inline simd8x64<T> bit_or(const T m) const {
|
really_inline simd8x64<T> bit_or(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a | mask; } );
|
return simd8x64<T>(
|
||||||
|
this->chunks[0] | mask,
|
||||||
|
this->chunks[1] | mask
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline uint64_t eq(const T m) const {
|
really_inline uint64_t eq(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a == mask; } ).to_bitmask();
|
return simd8x64<bool>(
|
||||||
|
this->chunks[0] == mask,
|
||||||
|
this->chunks[1] == mask
|
||||||
|
).to_bitmask();
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline uint64_t lteq(const T m) const {
|
really_inline uint64_t lteq(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a <= mask; } ).to_bitmask();
|
return simd8x64<bool>(
|
||||||
|
this->chunks[0] <= mask,
|
||||||
|
this->chunks[1] <= mask
|
||||||
|
).to_bitmask();
|
||||||
}
|
}
|
||||||
}; // struct simd8x64<T>
|
}; // struct simd8x64<T>
|
||||||
|
|
||||||
|
|
|
@ -38,19 +38,25 @@ really_inline json_character_block json_character_block::classify(const simd::si
|
||||||
// hope that useless computations will be omitted. This is namely case when
|
// hope that useless computations will be omitted. This is namely case when
|
||||||
// minifying (we only need whitespace).
|
// minifying (we only need whitespace).
|
||||||
|
|
||||||
uint64_t whitespace = in.map([&](simd8<uint8_t> _in) {
|
uint64_t whitespace = simd8x64<bool>(
|
||||||
return _in == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, _in));
|
in.chunks[0] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[0])),
|
||||||
}).to_bitmask();
|
in.chunks[1] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[1])),
|
||||||
|
in.chunks[2] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[2])),
|
||||||
|
in.chunks[3] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[3]))
|
||||||
|
).to_bitmask();
|
||||||
|
|
||||||
uint64_t op = in.map([&](simd8<uint8_t> _in) {
|
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
|
||||||
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
|
uint64_t op = simd8x64<bool>(
|
||||||
return (_in | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, _in-','));
|
(in.chunks[0] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[0]-',')),
|
||||||
}).to_bitmask();
|
(in.chunks[1] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[1]-',')),
|
||||||
|
(in.chunks[2] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[2]-',')),
|
||||||
|
(in.chunks[3] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[3]-','))
|
||||||
|
).to_bitmask();
|
||||||
return { whitespace, op };
|
return { whitespace, op };
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
really_inline bool is_ascii(simd8x64<uint8_t> input) {
|
||||||
simd8<uint8_t> bits = input.reduce([&](simd8<uint8_t> a,simd8<uint8_t> b) { return a|b; });
|
simd8<uint8_t> bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]);
|
||||||
return !bits.any_bits_set_anywhere(0b10000000u);
|
return !bits.any_bits_set_anywhere(0b10000000u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -292,43 +292,6 @@ namespace simd {
|
||||||
each(3);
|
each(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
|
||||||
really_inline void each(F const& each_chunk) const
|
|
||||||
{
|
|
||||||
each_chunk(this->chunks[0]);
|
|
||||||
each_chunk(this->chunks[1]);
|
|
||||||
each_chunk(this->chunks[2]);
|
|
||||||
each_chunk(this->chunks[3]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename F, typename R=bool>
|
|
||||||
really_inline simd8x64<R> map(F const& map_chunk) const {
|
|
||||||
return simd8x64<R>(
|
|
||||||
map_chunk(this->chunks[0]),
|
|
||||||
map_chunk(this->chunks[1]),
|
|
||||||
map_chunk(this->chunks[2]),
|
|
||||||
map_chunk(this->chunks[3])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename F, typename R=bool>
|
|
||||||
really_inline simd8x64<R> map(const simd8x64<uint8_t> b, F const& map_chunk) const {
|
|
||||||
return simd8x64<R>(
|
|
||||||
map_chunk(this->chunks[0], b.chunks[0]),
|
|
||||||
map_chunk(this->chunks[1], b.chunks[1]),
|
|
||||||
map_chunk(this->chunks[2], b.chunks[2]),
|
|
||||||
map_chunk(this->chunks[3], b.chunks[3])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename F>
|
|
||||||
really_inline simd8<T> reduce(F const& reduce_pair) const {
|
|
||||||
return reduce_pair(
|
|
||||||
reduce_pair(this->chunks[0], this->chunks[1]),
|
|
||||||
reduce_pair(this->chunks[2], this->chunks[3])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
really_inline uint64_t to_bitmask() const {
|
really_inline uint64_t to_bitmask() const {
|
||||||
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
|
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
|
||||||
uint64_t r1 = this->chunks[1].to_bitmask();
|
uint64_t r1 = this->chunks[1].to_bitmask();
|
||||||
|
@ -339,17 +302,32 @@ namespace simd {
|
||||||
|
|
||||||
really_inline simd8x64<T> bit_or(const T m) const {
|
really_inline simd8x64<T> bit_or(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a | mask; } );
|
return simd8x64<T>(
|
||||||
|
this->chunks[0] | mask,
|
||||||
|
this->chunks[1] | mask,
|
||||||
|
this->chunks[2] | mask,
|
||||||
|
this->chunks[3] | mask
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline uint64_t eq(const T m) const {
|
really_inline uint64_t eq(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a == mask; } ).to_bitmask();
|
return simd8x64<bool>(
|
||||||
|
this->chunks[0] == mask,
|
||||||
|
this->chunks[1] == mask,
|
||||||
|
this->chunks[2] == mask,
|
||||||
|
this->chunks[3] == mask
|
||||||
|
).to_bitmask();
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline uint64_t lteq(const T m) const {
|
really_inline uint64_t lteq(const T m) const {
|
||||||
const simd8<T> mask = simd8<T>::splat(m);
|
const simd8<T> mask = simd8<T>::splat(m);
|
||||||
return this->map( [&](simd8<T> a) { return a <= mask; } ).to_bitmask();
|
return simd8x64<bool>(
|
||||||
|
this->chunks[0] <= mask,
|
||||||
|
this->chunks[1] <= mask,
|
||||||
|
this->chunks[2] <= mask,
|
||||||
|
this->chunks[3] <= mask
|
||||||
|
).to_bitmask();
|
||||||
}
|
}
|
||||||
}; // struct simd8x64<T>
|
}; // struct simd8x64<T>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue