Add the high-performance but slightly unprincipled bitmask to index code.

This commit is contained in:
Geoff Langdale 2018-04-06 13:51:52 +10:00
parent e6478e33b3
commit 020109b20c
1 changed files with 27 additions and 7 deletions

View File

@ -281,14 +281,31 @@ never_inline bool flatten_indexes(size_t len, ParsedJson & pj) {
base_ptr[DUMMY_NODE] = base_ptr[ROOT_NODE] = 0; // really shouldn't matter
for (size_t idx = 0; idx < len; idx+=64) {
u64 s = *(u64 *)(pj.structurals + idx/8);
#ifdef SUPPRESS_CHEESY_FLATTEN
while (s) {
u32 si = (u32)idx + __builtin_ctzll(s);
#ifdef DEBUG
cout << "Putting structural index " << si << " at array location " << base << "\n";
#endif
base_ptr[base++] = si;
s &= s - 1ULL;
base_ptr[base++] = (u32)idx + __builtin_ctzll(s); s &= s - 1ULL;
}
#else
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
// spoil the suspense
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
base_ptr[base+0] = (u32)idx + __builtin_ctzll(s); u64 s1 = s & (s - 1ULL);
base_ptr[base+1] = (u32)idx + __builtin_ctzll(s1); u64 s2 = s1 & (s1 - 1ULL);
base_ptr[base+2] = (u32)idx + __builtin_ctzll(s2); //u64 s3 = s2 & (s2 - 1ULL);
base_ptr[base+3] = (u32)idx + __builtin_ctzll(s3); u64 s4 = s3 & (s3 - 1ULL);
base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL);
base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL);
base_ptr[base+6] = (u32)idx + __builtin_ctzll(s6); u64 s7 = s6 & (s6 - 1ULL);
s = s7;
base += 7;
}
base = next_base;
#endif
}
pj.n_structural_indexes = base;
return true;
@ -364,7 +381,10 @@ int main(int argc, char * argv[]) {
pj.n_structural_indexes = 0;
// we have potentially 1 structure per byte of input
// as well as a dummy structure and a root structure
u32 max_structures = ROUNDUP_N(p.second, 64) + 2;
// we also potentially write up to 7 iterations beyond
// in our 'cheesy flatten', so make some worst-case
// sapce for that too
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
pj.nodes = new JsonNode[max_structures];