Removing old code.
This commit is contained in:
parent
ef0d14c35c
commit
262a68ba8f
|
@ -1,12 +0,0 @@
|
||||||
HEADERS:=include/avxprocessing.h include/benchmark.h include/common_defs.h include/jsonstruct.h include/ include/util.h
|
|
||||||
|
|
||||||
RAPIDJSON_INCLUDE:=../dependencies/rapidjson/include
|
|
||||||
|
|
||||||
bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
|
|
||||||
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
|
|
||||||
|
|
||||||
$(RAPIDJSON_INCLUDE):
|
|
||||||
git submodule update --init --recursive
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f bench
|
|
|
@ -1,3 +0,0 @@
|
||||||
```
|
|
||||||
./run.sh
|
|
||||||
```
|
|
|
@ -1,129 +0,0 @@
|
||||||
#include "avxprocessing.h"
|
|
||||||
|
|
||||||
#include "avxminifier.h"
|
|
||||||
#include "scalarminifier.h"
|
|
||||||
|
|
||||||
#include "benchmark.h"
|
|
||||||
#include "jsonstruct.h"
|
|
||||||
// #define RAPIDJSON_SSE2 // bad
|
|
||||||
// #define RAPIDJSON_SSE42 // bad
|
|
||||||
#include "rapidjson/document.h"
|
|
||||||
#include "rapidjson/reader.h" // you have to check in the submodule
|
|
||||||
#include "rapidjson/stringbuffer.h"
|
|
||||||
#include "rapidjson/writer.h"
|
|
||||||
#include "util.h"
|
|
||||||
|
|
||||||
// colorfuldisplay(ParsedJson & pj, const u8 * buf)
|
|
||||||
|
|
||||||
using namespace rapidjson;
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
std::string rapidstringmeInsitu(char * json) {
|
|
||||||
Document d;
|
|
||||||
d.ParseInsitu(json);
|
|
||||||
if(d.HasParseError()) {
|
|
||||||
std::cerr << "problem!" << std::endl;
|
|
||||||
return "";// should do something
|
|
||||||
}
|
|
||||||
StringBuffer buffer;
|
|
||||||
Writer<StringBuffer> writer(buffer);
|
|
||||||
d.Accept(writer);
|
|
||||||
return buffer.GetString();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::string rapidstringme(char * json) {
|
|
||||||
Document d;
|
|
||||||
d.Parse(json);
|
|
||||||
if(d.HasParseError()) {
|
|
||||||
std::cerr << "problem!" << std::endl;
|
|
||||||
return "";// should do something
|
|
||||||
}
|
|
||||||
StringBuffer buffer;
|
|
||||||
Writer<StringBuffer> writer(buffer);
|
|
||||||
d.Accept(writer);
|
|
||||||
return buffer.GetString();
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
if (argc < 2) {
|
|
||||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
|
||||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
bool verbose = false;
|
|
||||||
if (argc > 2) {
|
|
||||||
if (strcmp(argv[1], "-v"))
|
|
||||||
verbose = true;
|
|
||||||
}
|
|
||||||
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
|
|
||||||
ParsedJson pj;
|
|
||||||
std::cout << "Input has ";
|
|
||||||
if (p.second > 1024 * 1024)
|
|
||||||
std::cout << p.second / (1024 * 1024) << " MB ";
|
|
||||||
else if (p.second > 1024)
|
|
||||||
std::cout << p.second / 1024 << " KB ";
|
|
||||||
else
|
|
||||||
std::cout << p.second << " B ";
|
|
||||||
std::cout << std::endl;
|
|
||||||
|
|
||||||
if (posix_memalign((void **)&pj.structurals, 8,
|
|
||||||
ROUNDUP_N(p.second, 64) / 8)) {
|
|
||||||
throw "Allocation failed";
|
|
||||||
};
|
|
||||||
|
|
||||||
pj.n_structural_indexes = 0;
|
|
||||||
// we have potentially 1 structure per byte of input
|
|
||||||
// as well as a dummy structure and a root structure
|
|
||||||
// we also potentially write up to 7 iterations beyond
|
|
||||||
// in our 'cheesy flatten', so make some worst-case
|
|
||||||
// sapce for that too
|
|
||||||
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
|
||||||
pj.structural_indexes = new u32[max_structures];
|
|
||||||
pj.nodes = new JsonNode[max_structures];
|
|
||||||
|
|
||||||
|
|
||||||
int repeat = 10;
|
|
||||||
int volume = p.second;
|
|
||||||
BEST_TIME(avx_json_parse(p.first, p.second, pj), true , , repeat, volume,
|
|
||||||
true);
|
|
||||||
|
|
||||||
rapidjson::Document d;
|
|
||||||
|
|
||||||
char * buffer = (char *) malloc(p.second + 1);
|
|
||||||
memcpy(buffer, p.first, p.second);
|
|
||||||
buffer[p.second] = '\0';
|
|
||||||
|
|
||||||
BEST_TIME(d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(), false,
|
|
||||||
memcpy(buffer, p.first, p.second), repeat, volume, true);
|
|
||||||
BEST_TIME(d.Parse((const char *)buffer).HasParseError(), false,
|
|
||||||
memcpy(buffer, p.first, p.second), repeat, volume, true);
|
|
||||||
BEST_TIME(d.ParseInsitu(buffer).HasParseError(), false,
|
|
||||||
memcpy(buffer, p.first, p.second), repeat, volume, true);
|
|
||||||
size_t strlength = rapidstringme((char*) p.first).size();
|
|
||||||
std::cout << "input length is "<< p.second << " stringified length is " << strlength << std::endl;
|
|
||||||
BEST_TIME_NOCHECK(rapidstringme((char*) p.first), , repeat, volume,
|
|
||||||
true);
|
|
||||||
BEST_TIME_NOCHECK(rapidstringmeInsitu((char*) buffer), memcpy(buffer, p.first, p.second) , repeat, volume,
|
|
||||||
true);
|
|
||||||
memcpy(buffer, p.first, p.second);
|
|
||||||
|
|
||||||
size_t outlength = copy_without_useless_spaces_avx((const uint8_t *)buffer, p.second,(uint8_t *) buffer);
|
|
||||||
std::cout << "despaced length is " << outlength << std::endl;
|
|
||||||
|
|
||||||
uint8_t * cbuffer = (uint8_t *)buffer;
|
|
||||||
BEST_TIME(copy_without_useless_spaces_avx(cbuffer, p.second,cbuffer), outlength,
|
|
||||||
memcpy(buffer, p.first, p.second), repeat, volume, true);
|
|
||||||
|
|
||||||
BEST_TIME(scalar_despace(cbuffer, p.second,cbuffer), outlength,
|
|
||||||
memcpy(buffer, p.first, p.second), repeat, volume, true);
|
|
||||||
printf("parsing with RapidJSON after despacing:\n");
|
|
||||||
BEST_TIME(d.ParseInsitu(buffer).HasParseError(),false, cbuffer[copy_without_useless_spaces_avx((const uint8_t *)p.first, p.second,cbuffer)]='\0' , repeat, volume,
|
|
||||||
true);
|
|
||||||
|
|
||||||
free(buffer);
|
|
||||||
delete[] pj.structural_indexes;
|
|
||||||
delete[] pj.nodes;
|
|
||||||
free(p.first);
|
|
||||||
free(pj.structurals);
|
|
||||||
}
|
|
|
@ -1,206 +0,0 @@
|
||||||
#include <stdint.h>
|
|
||||||
#include <x86intrin.h>
|
|
||||||
|
|
||||||
#include "simdprune_tables.h"
|
|
||||||
|
|
||||||
#ifndef __clang__
|
|
||||||
static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi,
|
|
||||||
__m128i const *__addr_lo) {
|
|
||||||
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
|
|
||||||
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
|
||||||
__m256i __a) {
|
|
||||||
__m128i __v128;
|
|
||||||
|
|
||||||
__v128 = _mm256_castsi256_si128(__a);
|
|
||||||
_mm_storeu_si128(__addr_lo, __v128);
|
|
||||||
__v128 = _mm256_extractf128_si256(__a, 1);
|
|
||||||
_mm_storeu_si128(__addr_hi, __v128);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// a straightforward comparison of a mask against input.
|
|
||||||
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
|
||||||
__m256i mask) {
|
|
||||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
|
||||||
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
|
|
||||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
|
||||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
|
||||||
return res_0 | (res_1 << 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
// take input from buf and remove useless whitespace, input and output can be
|
|
||||||
// the same
|
|
||||||
static inline size_t copy_without_useless_spaces_avx(const uint8_t *buf, size_t len,
|
|
||||||
uint8_t *out) {
|
|
||||||
// Useful constant masks
|
|
||||||
const uint64_t even_bits = 0x5555555555555555ULL;
|
|
||||||
const uint64_t odd_bits = ~even_bits;
|
|
||||||
uint8_t *initout(out);
|
|
||||||
uint64_t prev_iter_ends_odd_backslash =
|
|
||||||
0ULL; // either 0 or 1, but a 64-bit value
|
|
||||||
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
|
||||||
size_t idx = 0;
|
|
||||||
if (len >= 64) {
|
|
||||||
size_t avxlen = len - 63;
|
|
||||||
|
|
||||||
for (; idx < avxlen; idx += 64) {
|
|
||||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
|
|
||||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
|
|
||||||
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
|
||||||
_mm256_set1_epi8('\\'));
|
|
||||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
|
||||||
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
|
||||||
uint64_t even_starts = start_edges & even_start_mask;
|
|
||||||
uint64_t odd_starts = start_edges & ~even_start_mask;
|
|
||||||
uint64_t even_carries = bs_bits + even_starts;
|
|
||||||
uint64_t odd_carries;
|
|
||||||
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
|
|
||||||
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
|
|
||||||
odd_carries |= prev_iter_ends_odd_backslash;
|
|
||||||
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
|
||||||
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
|
||||||
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
|
||||||
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
|
||||||
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
|
||||||
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
|
||||||
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
|
||||||
_mm256_set1_epi8('"'));
|
|
||||||
quote_bits = quote_bits & ~odd_ends;
|
|
||||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
|
||||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
|
||||||
prev_iter_inside_quote = (uint64_t)((s64)quote_mask >> 63);
|
|
||||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
|
||||||
// 0 9 a b c d
|
|
||||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
|
||||||
const __m256i high_nibble_mask = _mm256_setr_epi8(
|
|
||||||
// 0 2 3 5 7
|
|
||||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
|
|
||||||
1, 0, 0, 0, 3, 2, 1, 0, 0);
|
|
||||||
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
|
||||||
__m256i v_lo = _mm256_and_si256(
|
|
||||||
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
|
||||||
_mm256_shuffle_epi8(high_nibble_mask,
|
|
||||||
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
|
||||||
_mm256_set1_epi8(0x7f))));
|
|
||||||
|
|
||||||
__m256i v_hi = _mm256_and_si256(
|
|
||||||
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
|
||||||
_mm256_shuffle_epi8(high_nibble_mask,
|
|
||||||
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
|
||||||
_mm256_set1_epi8(0x7f))));
|
|
||||||
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
|
|
||||||
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
|
||||||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
|
||||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
|
||||||
|
|
||||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
|
||||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
|
||||||
whitespace &= ~quote_mask;
|
|
||||||
int mask1 = whitespace & 0xFFFF;
|
|
||||||
int mask2 = (whitespace >> 16) & 0xFFFF;
|
|
||||||
int mask3 = (whitespace >> 32) & 0xFFFF;
|
|
||||||
int mask4 = (whitespace >> 48) & 0xFFFF;
|
|
||||||
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
|
|
||||||
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
|
|
||||||
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
|
||||||
int pop4 = _popcnt64((~whitespace));
|
|
||||||
__m256i vmask1 =
|
|
||||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
|
||||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
|
||||||
__m256i vmask2 =
|
|
||||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
|
||||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
|
||||||
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
|
||||||
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
|
||||||
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
|
|
||||||
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
|
|
||||||
result2);
|
|
||||||
out += pop4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// we finish off the job... copying and pasting the code is not ideal here,
|
|
||||||
// but it gets the job done.
|
|
||||||
if (idx < len) {
|
|
||||||
uint8_t buffer[64];
|
|
||||||
memset(buffer, 0, 64);
|
|
||||||
memcpy(buffer, buf + idx, len - idx);
|
|
||||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
|
|
||||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
|
|
||||||
uint64_t bs_bits =
|
|
||||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
|
||||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
|
||||||
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
|
||||||
uint64_t even_starts = start_edges & even_start_mask;
|
|
||||||
uint64_t odd_starts = start_edges & ~even_start_mask;
|
|
||||||
uint64_t even_carries = bs_bits + even_starts;
|
|
||||||
uint64_t odd_carries;
|
|
||||||
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
|
|
||||||
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
|
|
||||||
odd_carries |= prev_iter_ends_odd_backslash;
|
|
||||||
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
|
||||||
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
|
||||||
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
|
||||||
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
|
||||||
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
|
||||||
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
|
||||||
uint64_t quote_bits =
|
|
||||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
|
|
||||||
quote_bits = quote_bits & ~odd_ends;
|
|
||||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
|
||||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
|
||||||
prev_iter_inside_quote = (uint64_t)((s64)quote_mask >> 63);
|
|
||||||
|
|
||||||
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
|
||||||
__m256i mask_70 =
|
|
||||||
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
|
|
||||||
// but moves any value >= 16 above 128
|
|
||||||
|
|
||||||
__m256i lut_cntrl = _mm256_setr_epi8(
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
|
|
||||||
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
|
|
||||||
|
|
||||||
__m256i tmp_ws_lo = _mm256_or_si256(
|
|
||||||
_mm256_cmpeq_epi8(mask_20, input_lo),
|
|
||||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
|
|
||||||
__m256i tmp_ws_hi = _mm256_or_si256(
|
|
||||||
_mm256_cmpeq_epi8(mask_20, input_hi),
|
|
||||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
|
||||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
|
||||||
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
|
||||||
whitespace &= ~quote_mask;
|
|
||||||
|
|
||||||
if (len - idx < 64) {
|
|
||||||
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
|
|
||||||
}
|
|
||||||
int mask1 = whitespace & 0xFFFF;
|
|
||||||
int mask2 = (whitespace >> 16) & 0xFFFF;
|
|
||||||
int mask3 = (whitespace >> 32) & 0xFFFF;
|
|
||||||
int mask4 = (whitespace >> 48) & 0xFFFF;
|
|
||||||
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
|
|
||||||
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
|
|
||||||
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
|
||||||
int pop4 = _popcnt64((~whitespace));
|
|
||||||
__m256i vmask1 = _mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
|
||||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
|
||||||
__m256i vmask2 = _mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
|
||||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
|
||||||
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
|
||||||
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
|
||||||
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
|
|
||||||
result1);
|
|
||||||
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
|
|
||||||
result2);
|
|
||||||
memcpy(out, buffer, pop4);
|
|
||||||
out += pop4;
|
|
||||||
}
|
|
||||||
return out - initout;
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,55 +0,0 @@
|
||||||
#pragma once
|
|
||||||
#include <cassert>
|
|
||||||
typedef unsigned char u8;
|
|
||||||
typedef unsigned short u16;
|
|
||||||
typedef unsigned int u32;
|
|
||||||
typedef unsigned long long u64;
|
|
||||||
typedef signed char s8;
|
|
||||||
typedef signed short s16;
|
|
||||||
typedef signed int s32;
|
|
||||||
typedef signed long long s64;
|
|
||||||
|
|
||||||
#include <x86intrin.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
|
|
||||||
typedef __m128i m128;
|
|
||||||
typedef __m256i m256;
|
|
||||||
|
|
||||||
// Snippets from Hyperscan
|
|
||||||
|
|
||||||
// Align to N-byte boundary
|
|
||||||
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
|
|
||||||
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
|
|
||||||
|
|
||||||
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)
|
|
||||||
|
|
||||||
#define really_inline inline __attribute__ ((always_inline, unused))
|
|
||||||
#define never_inline inline __attribute__ ((noinline, unused))
|
|
||||||
|
|
||||||
#define UNUSED __attribute__ ((unused))
|
|
||||||
|
|
||||||
#ifndef likely
|
|
||||||
#define likely(x) __builtin_expect(!!(x), 1)
|
|
||||||
#endif
|
|
||||||
#ifndef unlikely
|
|
||||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline
|
|
||||||
u32 ctz64(u64 x) {
|
|
||||||
assert(x); // behaviour not defined for x == 0
|
|
||||||
#if defined(_WIN64)
|
|
||||||
unsigned long r;
|
|
||||||
_BitScanForward64(&r, x);
|
|
||||||
return r;
|
|
||||||
#elif defined(_WIN32)
|
|
||||||
unsigned long r;
|
|
||||||
if (_BitScanForward(&r, (u32)x)) {
|
|
||||||
return (u32)r;
|
|
||||||
}
|
|
||||||
_BitScanForward(&r, x >> 32);
|
|
||||||
return (u32)(r + 32);
|
|
||||||
#else
|
|
||||||
return (u32)__builtin_ctzll(x);
|
|
||||||
#endif
|
|
||||||
}
|
|
|
@ -1,70 +0,0 @@
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "common_defs.h"
|
|
||||||
|
|
||||||
struct JsonNode {
|
|
||||||
u32 next;
|
|
||||||
u32 next_type;
|
|
||||||
u64 payload; // a freeform 'payload' holding a parsed representation of *something*
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ParsedJson {
|
|
||||||
u8 * structurals;
|
|
||||||
u32 n_structural_indexes;
|
|
||||||
u32 * structural_indexes;
|
|
||||||
JsonNode * nodes;
|
|
||||||
};
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
|
||||||
namespace Color {
|
|
||||||
enum Code {
|
|
||||||
FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32,
|
|
||||||
FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36,
|
|
||||||
FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91,
|
|
||||||
FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94,
|
|
||||||
FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97,
|
|
||||||
BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49
|
|
||||||
};
|
|
||||||
class Modifier {
|
|
||||||
Code code;
|
|
||||||
public:
|
|
||||||
Modifier(Code pCode) : code(pCode) {}
|
|
||||||
friend std::ostream&
|
|
||||||
operator<<(std::ostream& os, const Modifier& mod) {
|
|
||||||
return os << "\033[" << mod.code << "m";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
|
|
||||||
Color::Modifier greenfg(Color::FG_GREEN);
|
|
||||||
Color::Modifier yellowfg(Color::FG_YELLOW);
|
|
||||||
Color::Modifier deffg(Color::FG_DEFAULT);
|
|
||||||
size_t i = 0;
|
|
||||||
// skip initial fluff
|
|
||||||
while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
for (; i < pj.n_structural_indexes; i++) {
|
|
||||||
u32 idx = pj.structural_indexes[i];
|
|
||||||
u8 c = buf[idx];
|
|
||||||
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
|
|
||||||
std::cout << greenfg << buf[idx] << deffg;
|
|
||||||
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
|
|
||||||
std::cout << greenfg << buf[idx] << deffg;
|
|
||||||
} else {
|
|
||||||
std::cout << yellowfg << buf[idx] << deffg;
|
|
||||||
}
|
|
||||||
if(i + 1 < pj.n_structural_indexes) {
|
|
||||||
u32 nextidx = pj.structural_indexes[i + 1];
|
|
||||||
for(u32 pos = idx + 1 ; pos < nextidx; pos++) {
|
|
||||||
std::cout << buf[pos];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
|
@ -1,55 +0,0 @@
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
static uint8_t jump_table[256 * 3] = {
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
|
|
||||||
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
||||||
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
||||||
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline size_t scalar_despace(const unsigned char *bytes, size_t howmany,
|
|
||||||
unsigned char *out) {
|
|
||||||
size_t i = 0, pos = 0;
|
|
||||||
uint8_t quote = 0;
|
|
||||||
uint8_t nonescape = 1;
|
|
||||||
|
|
||||||
while (i < howmany) {
|
|
||||||
unsigned char c = bytes[i];
|
|
||||||
uint8_t *meta = jump_table + 3 * c;
|
|
||||||
|
|
||||||
quote = quote ^ (meta[0] & nonescape);
|
|
||||||
out[pos] = c;
|
|
||||||
pos += meta[2] | quote;
|
|
||||||
|
|
||||||
i += 1;
|
|
||||||
nonescape = (~nonescape) | (meta[1]);
|
|
||||||
}
|
|
||||||
return pos;
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +0,0 @@
|
||||||
#include "common_defs.h"
|
|
||||||
|
|
||||||
// get a corpus; pad out to cache line so we can always use SIMD
|
|
||||||
static pair<u8 *, size_t> get_corpus(string filename) {
|
|
||||||
ifstream is(filename, ios::binary);
|
|
||||||
if (is) {
|
|
||||||
stringstream buffer;
|
|
||||||
buffer << is.rdbuf();
|
|
||||||
size_t length = buffer.str().size();
|
|
||||||
char * aligned_buffer;
|
|
||||||
if (posix_memalign( (void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
|
|
||||||
throw "Allocation failed";
|
|
||||||
};
|
|
||||||
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
|
|
||||||
memcpy(aligned_buffer, buffer.str().c_str(), length);
|
|
||||||
is.close();
|
|
||||||
return make_pair((u8 *)aligned_buffer, length);
|
|
||||||
}
|
|
||||||
throw "No corpus";
|
|
||||||
return make_pair((u8 *)0, (size_t)0);
|
|
||||||
}
|
|
|
@ -1,336 +0,0 @@
|
||||||
#ifndef VECDECODE_H
|
|
||||||
#define VECDECODE_H
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define ALIGNED(x) __declspec(align(x))
|
|
||||||
#else
|
|
||||||
#if defined(__GNUC__)
|
|
||||||
#define ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static uint8_t lengthTable[256] = {
|
|
||||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
||||||
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
||||||
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
||||||
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
||||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
|
|
||||||
|
|
||||||
static uint32_t vecDecodeTable[256][8] ALIGNED(16) = {
|
|
||||||
{0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
|
|
||||||
{1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
|
|
||||||
{2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
|
|
||||||
{1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
|
|
||||||
{3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
|
|
||||||
{1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
|
|
||||||
{2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
|
|
||||||
{1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
|
|
||||||
{4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
|
|
||||||
{1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
|
|
||||||
{2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
|
|
||||||
{1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
|
|
||||||
{3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
|
|
||||||
{1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
|
|
||||||
{2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
|
|
||||||
{1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
|
|
||||||
{5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
|
|
||||||
{1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
|
|
||||||
{2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
|
|
||||||
{1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
|
|
||||||
{3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
|
|
||||||
{1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
|
|
||||||
{2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
|
|
||||||
{1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
|
|
||||||
{4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
|
|
||||||
{1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
|
|
||||||
{2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
|
|
||||||
{1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
|
|
||||||
{3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
|
|
||||||
{1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
|
|
||||||
{2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
|
|
||||||
{1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
|
|
||||||
{6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
|
|
||||||
{1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
|
|
||||||
{2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
|
|
||||||
{1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
|
|
||||||
{3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
|
|
||||||
{1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
|
|
||||||
{2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
|
|
||||||
{1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
|
|
||||||
{4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
|
|
||||||
{1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
|
|
||||||
{2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
|
|
||||||
{1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
|
|
||||||
{3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
|
|
||||||
{1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
|
|
||||||
{2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
|
|
||||||
{1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
|
|
||||||
{5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
|
|
||||||
{1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
|
|
||||||
{2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
|
|
||||||
{1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
|
|
||||||
{3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
|
|
||||||
{1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
|
|
||||||
{2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
|
|
||||||
{1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
|
|
||||||
{4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
|
|
||||||
{1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
|
|
||||||
{2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
|
|
||||||
{1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
|
|
||||||
{3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
|
|
||||||
{1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
|
|
||||||
{2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
|
|
||||||
{1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
|
|
||||||
{7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
|
|
||||||
{1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
|
|
||||||
{2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
|
|
||||||
{1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
|
|
||||||
{3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
|
|
||||||
{1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
|
|
||||||
{2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
|
|
||||||
{1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
|
|
||||||
{4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
|
|
||||||
{1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
|
|
||||||
{2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
|
|
||||||
{1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
|
|
||||||
{3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
|
|
||||||
{1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
|
|
||||||
{2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
|
|
||||||
{1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
|
|
||||||
{5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
|
|
||||||
{1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
|
|
||||||
{2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
|
|
||||||
{1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
|
|
||||||
{3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
|
|
||||||
{1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
|
|
||||||
{2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
|
|
||||||
{1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
|
|
||||||
{4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
|
|
||||||
{1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
|
|
||||||
{2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
|
|
||||||
{1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
|
|
||||||
{3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
|
|
||||||
{1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
|
|
||||||
{2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
|
|
||||||
{1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
|
|
||||||
{6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
|
|
||||||
{1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
|
|
||||||
{2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
|
|
||||||
{1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
|
|
||||||
{3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
|
|
||||||
{1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
|
|
||||||
{2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
|
|
||||||
{1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
|
|
||||||
{4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
|
|
||||||
{1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
|
|
||||||
{2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
|
|
||||||
{1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
|
|
||||||
{3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
|
|
||||||
{1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
|
|
||||||
{2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
|
|
||||||
{1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
|
|
||||||
{5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
|
|
||||||
{1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
|
|
||||||
{2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
|
|
||||||
{1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
|
|
||||||
{3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
|
|
||||||
{1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
|
|
||||||
{2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
|
|
||||||
{1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
|
|
||||||
{4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
|
|
||||||
{1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
|
|
||||||
{2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
|
|
||||||
{1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
|
|
||||||
{3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
|
|
||||||
{1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
|
|
||||||
{2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
|
|
||||||
{1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
|
|
||||||
{8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
|
|
||||||
{1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
|
|
||||||
{2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
|
|
||||||
{1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
|
|
||||||
{3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
|
|
||||||
{1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
|
|
||||||
{2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
|
|
||||||
{1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
|
|
||||||
{4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
|
|
||||||
{1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
|
|
||||||
{2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
|
|
||||||
{1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
|
|
||||||
{3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
|
|
||||||
{1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
|
|
||||||
{2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
|
|
||||||
{1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
|
|
||||||
{5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
|
|
||||||
{1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
|
|
||||||
{2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
|
|
||||||
{1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
|
|
||||||
{3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
|
|
||||||
{1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
|
|
||||||
{2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
|
|
||||||
{1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
|
|
||||||
{4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
|
|
||||||
{1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
|
|
||||||
{2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
|
|
||||||
{1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
|
|
||||||
{3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
|
|
||||||
{1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
|
|
||||||
{2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
|
|
||||||
{1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
|
|
||||||
{6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
|
|
||||||
{1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
|
|
||||||
{2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
|
|
||||||
{1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
|
|
||||||
{3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
|
|
||||||
{1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
|
|
||||||
{2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
|
|
||||||
{1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
|
|
||||||
{4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
|
|
||||||
{1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
|
|
||||||
{2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
|
|
||||||
{1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
|
|
||||||
{3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
|
|
||||||
{1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
|
|
||||||
{2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
|
|
||||||
{1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
|
|
||||||
{5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
|
|
||||||
{1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
|
|
||||||
{2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
|
|
||||||
{1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
|
|
||||||
{3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
|
|
||||||
{1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
|
|
||||||
{2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
|
|
||||||
{1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
|
|
||||||
{4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
|
|
||||||
{1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
|
|
||||||
{2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
|
|
||||||
{1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
|
|
||||||
{3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
|
|
||||||
{1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
|
|
||||||
{2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
|
|
||||||
{1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
|
|
||||||
{7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
|
|
||||||
{1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
|
|
||||||
{2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
|
|
||||||
{1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
|
|
||||||
{3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
|
|
||||||
{1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
|
|
||||||
{2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
|
|
||||||
{1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
|
|
||||||
{4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
|
|
||||||
{1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
|
|
||||||
{2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
|
|
||||||
{1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
|
|
||||||
{3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
|
|
||||||
{1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
|
|
||||||
{2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
|
|
||||||
{1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
|
|
||||||
{5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
|
|
||||||
{1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
|
|
||||||
{2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
|
|
||||||
{1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
|
|
||||||
{3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
|
|
||||||
{1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
|
|
||||||
{2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
|
|
||||||
{1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
|
|
||||||
{4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
|
|
||||||
{1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
|
|
||||||
{2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
|
|
||||||
{1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
|
|
||||||
{3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
|
|
||||||
{1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
|
|
||||||
{2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
|
|
||||||
{1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
|
|
||||||
{6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
|
|
||||||
{1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
|
|
||||||
{2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
|
|
||||||
{1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
|
|
||||||
{3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
|
|
||||||
{1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
|
|
||||||
{2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
|
|
||||||
{1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
|
|
||||||
{4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
|
|
||||||
{1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
|
|
||||||
{2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
|
|
||||||
{1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
|
|
||||||
{3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
|
|
||||||
{1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
|
|
||||||
{2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
|
|
||||||
{1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
|
|
||||||
{5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
|
|
||||||
{1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
|
|
||||||
{2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
|
|
||||||
{1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
|
|
||||||
{3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
|
|
||||||
{1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
|
|
||||||
{2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
|
|
||||||
{1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
|
|
||||||
{4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
|
|
||||||
{1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
|
|
||||||
{2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
|
|
||||||
{1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
|
|
||||||
{3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
|
|
||||||
{1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
|
|
||||||
{2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
|
|
||||||
{1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
|
|
||||||
};
|
|
||||||
|
|
||||||
static size_t bitmap_decode_avx2(uint8_t *bitmapInPtr, size_t bitsin, uint32_t *out) {
|
|
||||||
uint32_t *initout = out;
|
|
||||||
__m256i baseVec = _mm256_set1_epi32(-1);
|
|
||||||
__m256i incVec = _mm256_set1_epi32(64);
|
|
||||||
__m256i add8 = _mm256_set1_epi32(8);
|
|
||||||
|
|
||||||
int sizeinwords = bitsin / 64;
|
|
||||||
uint64_t *array = (uint64_t *)bitmapInPtr;
|
|
||||||
|
|
||||||
for (int i = 0; i < sizeinwords; ++i) {
|
|
||||||
uint64_t w = array[i];
|
|
||||||
if (w == 0) {
|
|
||||||
baseVec = _mm256_add_epi32(baseVec, incVec);
|
|
||||||
} else {
|
|
||||||
for (int k = 0; k < 4; ++k) {
|
|
||||||
uint8_t byteA = (uint8_t)w;
|
|
||||||
uint8_t byteB = (uint8_t)(w >> 8);
|
|
||||||
w >>= 16;
|
|
||||||
__m256i vecA =
|
|
||||||
_mm256_load_si256((const __m256i *)vecDecodeTable[byteA]);
|
|
||||||
__m256i vecB =
|
|
||||||
_mm256_load_si256((const __m256i *)vecDecodeTable[byteB]);
|
|
||||||
uint8_t advanceA = lengthTable[byteA];
|
|
||||||
uint8_t advanceB = lengthTable[byteB];
|
|
||||||
vecA = _mm256_add_epi32(baseVec, vecA);
|
|
||||||
baseVec = _mm256_add_epi32(baseVec, add8);
|
|
||||||
vecB = _mm256_add_epi32(baseVec, vecB);
|
|
||||||
baseVec = _mm256_add_epi32(baseVec, add8);
|
|
||||||
_mm256_storeu_si256((__m256i *)out, vecA);
|
|
||||||
out += advanceA;
|
|
||||||
_mm256_storeu_si256((__m256i *)out, vecB);
|
|
||||||
out += advanceB;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((bitsin % 64) != 0) {
|
|
||||||
// finish off the work the slow way.
|
|
||||||
uint64_t bitset = 0;
|
|
||||||
size_t leftoverbits = bitsin - sizeinwords * 64;
|
|
||||||
size_t leftoverbytes = ( leftoverbits + 7 ) / 8;
|
|
||||||
memcpy(&bitset, bitmapInPtr + sizeinwords * 8, leftoverbytes);
|
|
||||||
while (bitset != 0) {
|
|
||||||
uint64_t t = bitset & -bitset;
|
|
||||||
int r = __builtin_ctzll(bitset);
|
|
||||||
*out = sizeinwords * 64 + r;
|
|
||||||
out++;
|
|
||||||
bitset ^= t;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out - initout;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,12 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
echo "Note: the SIMD parser does a bit more work."
|
|
||||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
|
||||||
cd $SCRIPTPATH
|
|
||||||
make bench
|
|
||||||
echo
|
|
||||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
|
||||||
[ -f "$i" ] || break
|
|
||||||
echo $i
|
|
||||||
$SCRIPTPATH/bench $i
|
|
||||||
echo
|
|
||||||
done
|
|
Loading…
Reference in New Issue