Removing old code.

This commit is contained in:
Daniel Lemire 2018-08-20 17:42:09 -04:00
parent ef0d14c35c
commit 262a68ba8f
13 changed files with 0 additions and 35133 deletions

View File

@ -1,12 +0,0 @@
HEADERS:=include/avxprocessing.h include/benchmark.h include/common_defs.h include/jsonstruct.h include/ include/util.h
RAPIDJSON_INCLUDE:=../dependencies/rapidjson/include
bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
$(RAPIDJSON_INCLUDE):
git submodule update --init --recursive
clean:
rm -f bench

View File

@ -1,3 +0,0 @@
```
./run.sh
```

View File

View File

@ -1,129 +0,0 @@
#include "avxprocessing.h"
#include "avxminifier.h"
#include "scalarminifier.h"
#include "benchmark.h"
#include "jsonstruct.h"
// #define RAPIDJSON_SSE2 // bad
// #define RAPIDJSON_SSE42 // bad
#include "rapidjson/document.h"
#include "rapidjson/reader.h" // you have to check in the submodule
#include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h"
#include "util.h"
// colorfuldisplay(ParsedJson & pj, const u8 * buf)
using namespace rapidjson;
using namespace std;
std::string rapidstringmeInsitu(char * json) {
Document d;
d.ParseInsitu(json);
if(d.HasParseError()) {
std::cerr << "problem!" << std::endl;
return "";// should do something
}
StringBuffer buffer;
Writer<StringBuffer> writer(buffer);
d.Accept(writer);
return buffer.GetString();
}
std::string rapidstringme(char * json) {
Document d;
d.Parse(json);
if(d.HasParseError()) {
std::cerr << "problem!" << std::endl;
return "";// should do something
}
StringBuffer buffer;
Writer<StringBuffer> writer(buffer);
d.Accept(writer);
return buffer.GetString();
}
int main(int argc, char *argv[]) {
if (argc < 2) {
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
exit(1);
}
bool verbose = false;
if (argc > 2) {
if (strcmp(argv[1], "-v"))
verbose = true;
}
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
ParsedJson pj;
std::cout << "Input has ";
if (p.second > 1024 * 1024)
std::cout << p.second / (1024 * 1024) << " MB ";
else if (p.second > 1024)
std::cout << p.second / 1024 << " KB ";
else
std::cout << p.second << " B ";
std::cout << std::endl;
if (posix_memalign((void **)&pj.structurals, 8,
ROUNDUP_N(p.second, 64) / 8)) {
throw "Allocation failed";
};
pj.n_structural_indexes = 0;
// we have potentially 1 structure per byte of input
// as well as a dummy structure and a root structure
// we also potentially write up to 7 iterations beyond
// in our 'cheesy flatten', so make some worst-case
// sapce for that too
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
pj.nodes = new JsonNode[max_structures];
int repeat = 10;
int volume = p.second;
BEST_TIME(avx_json_parse(p.first, p.second, pj), true , , repeat, volume,
true);
rapidjson::Document d;
char * buffer = (char *) malloc(p.second + 1);
memcpy(buffer, p.first, p.second);
buffer[p.second] = '\0';
BEST_TIME(d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(), false,
memcpy(buffer, p.first, p.second), repeat, volume, true);
BEST_TIME(d.Parse((const char *)buffer).HasParseError(), false,
memcpy(buffer, p.first, p.second), repeat, volume, true);
BEST_TIME(d.ParseInsitu(buffer).HasParseError(), false,
memcpy(buffer, p.first, p.second), repeat, volume, true);
size_t strlength = rapidstringme((char*) p.first).size();
std::cout << "input length is "<< p.second << " stringified length is " << strlength << std::endl;
BEST_TIME_NOCHECK(rapidstringme((char*) p.first), , repeat, volume,
true);
BEST_TIME_NOCHECK(rapidstringmeInsitu((char*) buffer), memcpy(buffer, p.first, p.second) , repeat, volume,
true);
memcpy(buffer, p.first, p.second);
size_t outlength = copy_without_useless_spaces_avx((const uint8_t *)buffer, p.second,(uint8_t *) buffer);
std::cout << "despaced length is " << outlength << std::endl;
uint8_t * cbuffer = (uint8_t *)buffer;
BEST_TIME(copy_without_useless_spaces_avx(cbuffer, p.second,cbuffer), outlength,
memcpy(buffer, p.first, p.second), repeat, volume, true);
BEST_TIME(scalar_despace(cbuffer, p.second,cbuffer), outlength,
memcpy(buffer, p.first, p.second), repeat, volume, true);
printf("parsing with RapidJSON after despacing:\n");
BEST_TIME(d.ParseInsitu(buffer).HasParseError(),false, cbuffer[copy_without_useless_spaces_avx((const uint8_t *)p.first, p.second,cbuffer)]='\0' , repeat, volume,
true);
free(buffer);
delete[] pj.structural_indexes;
delete[] pj.nodes;
free(p.first);
free(pj.structurals);
}

View File

@ -1,206 +0,0 @@
#include <stdint.h>
#include <x86intrin.h>
#include "simdprune_tables.h"
#ifndef __clang__
static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi,
__m128i const *__addr_lo) {
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
}
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
__m256i __a) {
__m128i __v128;
__v128 = _mm256_castsi256_si128(__a);
_mm_storeu_si128(__addr_lo, __v128);
__v128 = _mm256_extractf128_si256(__a, 1);
_mm_storeu_si128(__addr_hi, __v128);
}
#endif
// a straightforward comparison of a mask against input.
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
}
// take input from buf and remove useless whitespace, input and output can be
// the same
static inline size_t copy_without_useless_spaces_avx(const uint8_t *buf, size_t len,
uint8_t *out) {
// Useful constant masks
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint8_t *initout(out);
uint64_t prev_iter_ends_odd_backslash =
0ULL; // either 0 or 1, but a 64-bit value
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
size_t idx = 0;
if (len >= 64) {
size_t avxlen = len - 63;
for (; idx < avxlen; idx += 64) {
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((s64)quote_mask >> 63);
const __m256i low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
const __m256i high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
1, 0, 0, 0, 3, 2, 1, 0, 0);
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
__m256i v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
_mm256_set1_epi8(0x7f))));
__m256i v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
_mm256_set1_epi8(0x7f))));
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
result2);
out += pop4;
}
}
// we finish off the job... copying and pasting the code is not ideal here,
// but it gets the job done.
if (idx < len) {
uint8_t buffer[64];
memset(buffer, 0, 64);
memcpy(buffer, buf + idx, len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
uint64_t bs_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((s64)quote_mask >> 63);
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
__m256i mask_70 =
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
// but moves any value >= 16 above 128
__m256i lut_cntrl = _mm256_setr_epi8(
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
__m256i tmp_ws_lo = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_lo),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
__m256i tmp_ws_hi = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_hi),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
if (len - idx < 64) {
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
}
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 = _mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
__m256i vmask2 = _mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
result1);
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
result2);
memcpy(out, buffer, pop4);
out += pop4;
}
return out - initout;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +0,0 @@
#pragma once
#include <cassert>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef signed char s8;
typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
#include <x86intrin.h>
#include <immintrin.h>
typedef __m128i m128;
typedef __m256i m256;
// Snippets from Hyperscan
// Align to N-byte boundary
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)
#define really_inline inline __attribute__ ((always_inline, unused))
#define never_inline inline __attribute__ ((noinline, unused))
#define UNUSED __attribute__ ((unused))
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
static inline
u32 ctz64(u64 x) {
assert(x); // behaviour not defined for x == 0
#if defined(_WIN64)
unsigned long r;
_BitScanForward64(&r, x);
return r;
#elif defined(_WIN32)
unsigned long r;
if (_BitScanForward(&r, (u32)x)) {
return (u32)r;
}
_BitScanForward(&r, x >> 32);
return (u32)(r + 32);
#else
return (u32)__builtin_ctzll(x);
#endif
}

View File

@ -1,70 +0,0 @@
#pragma once
#include "common_defs.h"
struct JsonNode {
u32 next;
u32 next_type;
u64 payload; // a freeform 'payload' holding a parsed representation of *something*
};
struct ParsedJson {
u8 * structurals;
u32 n_structural_indexes;
u32 * structural_indexes;
JsonNode * nodes;
};
#include <algorithm>
#include <iostream>
#include <iterator>
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
namespace Color {
enum Code {
FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32,
FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36,
FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91,
FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94,
FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97,
BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49
};
class Modifier {
Code code;
public:
Modifier(Code pCode) : code(pCode) {}
friend std::ostream&
operator<<(std::ostream& os, const Modifier& mod) {
return os << "\033[" << mod.code << "m";
}
};
}
void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
Color::Modifier greenfg(Color::FG_GREEN);
Color::Modifier yellowfg(Color::FG_YELLOW);
Color::Modifier deffg(Color::FG_DEFAULT);
size_t i = 0;
// skip initial fluff
while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){
i++;
}
for (; i < pj.n_structural_indexes; i++) {
u32 idx = pj.structural_indexes[i];
u8 c = buf[idx];
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
std::cout << greenfg << buf[idx] << deffg;
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
std::cout << greenfg << buf[idx] << deffg;
} else {
std::cout << yellowfg << buf[idx] << deffg;
}
if(i + 1 < pj.n_structural_indexes) {
u32 nextidx = pj.structural_indexes[i + 1];
for(u32 pos = idx + 1 ; pos < nextidx; pos++) {
std::cout << buf[pos];
}
}
}
std::cout << std::endl;
}

View File

@ -1,55 +0,0 @@
#include <stdint.h>
static uint8_t jump_table[256 * 3] = {
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
};
static inline size_t scalar_despace(const unsigned char *bytes, size_t howmany,
unsigned char *out) {
size_t i = 0, pos = 0;
uint8_t quote = 0;
uint8_t nonescape = 1;
while (i < howmany) {
unsigned char c = bytes[i];
uint8_t *meta = jump_table + 3 * c;
quote = quote ^ (meta[0] & nonescape);
out[pos] = c;
pos += meta[2] | quote;
i += 1;
nonescape = (~nonescape) | (meta[1]);
}
return pos;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +0,0 @@
#include "common_defs.h"
// get a corpus; pad out to cache line so we can always use SIMD
static pair<u8 *, size_t> get_corpus(string filename) {
ifstream is(filename, ios::binary);
if (is) {
stringstream buffer;
buffer << is.rdbuf();
size_t length = buffer.str().size();
char * aligned_buffer;
if (posix_memalign( (void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
throw "Allocation failed";
};
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
memcpy(aligned_buffer, buffer.str().c_str(), length);
is.close();
return make_pair((u8 *)aligned_buffer, length);
}
throw "No corpus";
return make_pair((u8 *)0, (size_t)0);
}

View File

@ -1,336 +0,0 @@
#ifndef VECDECODE_H
#define VECDECODE_H
#if defined(_MSC_VER)
#define ALIGNED(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define ALIGNED(x) __attribute__((aligned(x)))
#endif
#endif
static uint8_t lengthTable[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
static uint32_t vecDecodeTable[256][8] ALIGNED(16) = {
{0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
{1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
{2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
{1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
{3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
{1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
{2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
{1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
{4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
{1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
{2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
{1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
{3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
{1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
{2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
{1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
{5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
{1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
{2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
{1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
{3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
{1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
{2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
{1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
{4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
{1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
{2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
{1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
{3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
{1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
{2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
{1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
{6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
{1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
{2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
{1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
{3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
{1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
{2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
{1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
{4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
{1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
{2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
{1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
{3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
{1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
{2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
{1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
{5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
{1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
{2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
{1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
{3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
{1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
{2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
{1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
{4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
{1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
{2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
{1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
{3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
{1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
{2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
{1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
{7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
{1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
{2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
{1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
{3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
{1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
{2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
{1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
{4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
{1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
{2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
{1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
{3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
{1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
{2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
{1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
{5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
{1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
{2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
{1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
{3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
{1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
{2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
{1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
{4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
{1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
{2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
{1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
{3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
{1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
{2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
{1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
{6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
{1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
{2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
{1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
{3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
{1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
{2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
{1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
{4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
{1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
{2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
{1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
{3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
{1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
{2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
{1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
{5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
{1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
{2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
{1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
{3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
{1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
{2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
{1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
{4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
{1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
{2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
{1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
{3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
{1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
{2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
{1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
{8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
{1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
{2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
{1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
{3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
{1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
{2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
{1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
{4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
{1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
{2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
{1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
{3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
{1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
{2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
{1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
{5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
{1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
{2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
{1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
{3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
{1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
{2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
{1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
{4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
{1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
{2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
{1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
{3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
{1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
{2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
{1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
{6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
{1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
{2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
{1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
{3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
{1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
{2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
{1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
{4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
{1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
{2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
{1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
{3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
{1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
{2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
{1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
{5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
{1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
{2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
{1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
{3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
{1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
{2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
{1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
{4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
{1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
{2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
{1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
{3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
{1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
{2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
{1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
{7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
{1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
{2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
{1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
{3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
{1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
{2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
{1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
{4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
{1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
{2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
{1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
{3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
{1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
{2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
{1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
{5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
{1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
{2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
{1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
{3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
{1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
{2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
{1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
{4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
{1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
{2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
{1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
{3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
{1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
{2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
{1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
{6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
{1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
{2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
{1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
{3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
{1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
{2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
{1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
{4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
{1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
{2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
{1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
{3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
{1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
{2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
{1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
{5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
{1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
{2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
{1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
{3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
{1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
{2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
{1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
{4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
{1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
{2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
{1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
{3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
{1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
{2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
{1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
};
static size_t bitmap_decode_avx2(uint8_t *bitmapInPtr, size_t bitsin, uint32_t *out) {
uint32_t *initout = out;
__m256i baseVec = _mm256_set1_epi32(-1);
__m256i incVec = _mm256_set1_epi32(64);
__m256i add8 = _mm256_set1_epi32(8);
int sizeinwords = bitsin / 64;
uint64_t *array = (uint64_t *)bitmapInPtr;
for (int i = 0; i < sizeinwords; ++i) {
uint64_t w = array[i];
if (w == 0) {
baseVec = _mm256_add_epi32(baseVec, incVec);
} else {
for (int k = 0; k < 4; ++k) {
uint8_t byteA = (uint8_t)w;
uint8_t byteB = (uint8_t)(w >> 8);
w >>= 16;
__m256i vecA =
_mm256_load_si256((const __m256i *)vecDecodeTable[byteA]);
__m256i vecB =
_mm256_load_si256((const __m256i *)vecDecodeTable[byteB]);
uint8_t advanceA = lengthTable[byteA];
uint8_t advanceB = lengthTable[byteB];
vecA = _mm256_add_epi32(baseVec, vecA);
baseVec = _mm256_add_epi32(baseVec, add8);
vecB = _mm256_add_epi32(baseVec, vecB);
baseVec = _mm256_add_epi32(baseVec, add8);
_mm256_storeu_si256((__m256i *)out, vecA);
out += advanceA;
_mm256_storeu_si256((__m256i *)out, vecB);
out += advanceB;
}
}
}
if ((bitsin % 64) != 0) {
// finish off the work the slow way.
uint64_t bitset = 0;
size_t leftoverbits = bitsin - sizeinwords * 64;
size_t leftoverbytes = ( leftoverbits + 7 ) / 8;
memcpy(&bitset, bitmapInPtr + sizeinwords * 8, leftoverbytes);
while (bitset != 0) {
uint64_t t = bitset & -bitset;
int r = __builtin_ctzll(bitset);
*out = sizeinwords * 64 + r;
out++;
bitset ^= t;
}
}
return out - initout;
}
#endif

View File

@ -1,12 +0,0 @@
#!/bin/bash
echo "Note: the SIMD parser does a bit more work."
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
cd $SCRIPTPATH
make bench
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
$SCRIPTPATH/bench $i
echo
done