Fixing bug in new despacer.
This commit is contained in:
parent
f46d0a9f88
commit
460b971473
|
@ -109,8 +109,7 @@ int main(int argc, char *argv[]) {
|
|||
memcpy(buffer, p.first, p.second);
|
||||
|
||||
size_t outlength = copy_without_useless_spaces_avx((const uint8_t *)buffer, p.second,(uint8_t *) buffer);
|
||||
printf("these should match: %zu %zu \n", strlength, outlength);
|
||||
|
||||
std::cout << "despaced length is " << outlength << std::endl;
|
||||
|
||||
uint8_t * cbuffer = (uint8_t *)buffer;
|
||||
BEST_TIME(copy_without_useless_spaces_avx(cbuffer, p.second,cbuffer), outlength,
|
||||
|
|
|
@ -102,12 +102,6 @@ static inline size_t copy_without_useless_spaces_avx(const uint8_t *buf, size_t
|
|||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||
whitespace &= ~quote_mask;
|
||||
// surprisingly unhelpful:
|
||||
// if(whitespace == 0) {
|
||||
// _mm256_storeu_si256((__m256i *)out, input_lo);
|
||||
// _mm256_storeu_si256((__m256i *)(out + 32), input_hi);
|
||||
// out += 64;
|
||||
// } else {
|
||||
int mask1 = whitespace & 0xFFFF;
|
||||
int mask2 = (whitespace >> 16) & 0xFFFF;
|
||||
int mask3 = (whitespace >> 32) & 0xFFFF;
|
||||
|
@ -117,18 +111,17 @@ static inline size_t copy_without_useless_spaces_avx(const uint8_t *buf, size_t
|
|||
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||
int pop4 = _popcnt64((~whitespace));
|
||||
__m256i vmask1 =
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFFF),
|
||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFFF));
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
||||
__m256i vmask2 =
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFFF),
|
||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFFF));
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
||||
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
||||
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
||||
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
|
||||
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
|
||||
result2);
|
||||
out += pop4;
|
||||
//}
|
||||
}
|
||||
}
|
||||
// we finish off the job... copying and pasting the code is not ideal here,
|
||||
|
@ -183,10 +176,8 @@ static inline size_t copy_without_useless_spaces_avx(const uint8_t *buf, size_t
|
|||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
|
||||
whitespace &= ~quote_mask;
|
||||
|
||||
//
|
||||
if (len - idx < 64) {
|
||||
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
|
||||
}
|
||||
|
@ -194,7 +185,6 @@ static inline size_t copy_without_useless_spaces_avx(const uint8_t *buf, size_t
|
|||
int mask2 = (whitespace >> 16) & 0xFFFF;
|
||||
int mask3 = (whitespace >> 32) & 0xFFFF;
|
||||
int mask4 = (whitespace >> 48) & 0xFFFF;
|
||||
// dumpbits(whitespace,"whitespace");
|
||||
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
|
||||
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
|
||||
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||
|
|
Loading…
Reference in New Issue