Add the high-performance but slightly unprincipled bitmask to index code.
This commit is contained in:
parent
e6478e33b3
commit
020109b20c
34
main.cpp
34
main.cpp
|
@ -281,14 +281,31 @@ never_inline bool flatten_indexes(size_t len, ParsedJson & pj) {
|
|||
base_ptr[DUMMY_NODE] = base_ptr[ROOT_NODE] = 0; // really shouldn't matter
|
||||
for (size_t idx = 0; idx < len; idx+=64) {
|
||||
u64 s = *(u64 *)(pj.structurals + idx/8);
|
||||
#ifdef SUPPRESS_CHEESY_FLATTEN
|
||||
while (s) {
|
||||
u32 si = (u32)idx + __builtin_ctzll(s);
|
||||
#ifdef DEBUG
|
||||
cout << "Putting structural index " << si << " at array location " << base << "\n";
|
||||
#endif
|
||||
base_ptr[base++] = si;
|
||||
s &= s - 1ULL;
|
||||
base_ptr[base++] = (u32)idx + __builtin_ctzll(s); s &= s - 1ULL;
|
||||
}
|
||||
#else
|
||||
u32 cnt = __builtin_popcountll(s);
|
||||
u32 next_base = base + cnt;
|
||||
while (s) {
|
||||
// spoil the suspense
|
||||
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
|
||||
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
|
||||
|
||||
base_ptr[base+0] = (u32)idx + __builtin_ctzll(s); u64 s1 = s & (s - 1ULL);
|
||||
base_ptr[base+1] = (u32)idx + __builtin_ctzll(s1); u64 s2 = s1 & (s1 - 1ULL);
|
||||
base_ptr[base+2] = (u32)idx + __builtin_ctzll(s2); //u64 s3 = s2 & (s2 - 1ULL);
|
||||
base_ptr[base+3] = (u32)idx + __builtin_ctzll(s3); u64 s4 = s3 & (s3 - 1ULL);
|
||||
|
||||
base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL);
|
||||
base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL);
|
||||
base_ptr[base+6] = (u32)idx + __builtin_ctzll(s6); u64 s7 = s6 & (s6 - 1ULL);
|
||||
s = s7;
|
||||
base += 7;
|
||||
}
|
||||
base = next_base;
|
||||
#endif
|
||||
}
|
||||
pj.n_structural_indexes = base;
|
||||
return true;
|
||||
|
@ -364,7 +381,10 @@ int main(int argc, char * argv[]) {
|
|||
pj.n_structural_indexes = 0;
|
||||
// we have potentially 1 structure per byte of input
|
||||
// as well as a dummy structure and a root structure
|
||||
u32 max_structures = ROUNDUP_N(p.second, 64) + 2;
|
||||
// we also potentially write up to 7 iterations beyond
|
||||
// in our 'cheesy flatten', so make some worst-case
|
||||
// sapce for that too
|
||||
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
||||
pj.structural_indexes = new u32[max_structures];
|
||||
pj.nodes = new JsonNode[max_structures];
|
||||
|
||||
|
|
Loading…
Reference in New Issue