From 467831e3d8fcc661ba6093496b8de061f321bb52 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 11 Jul 2019 09:40:42 -0400 Subject: [PATCH] Refreshing the single header files. --- singleheader/amalgamation_demo.cpp | 2 +- singleheader/simdjson.cpp | 821 +++++++++++++++++++++++++++++ singleheader/simdjson.h | 74 +-- 3 files changed, 823 insertions(+), 74 deletions(-) create mode 100644 singleheader/simdjson.cpp diff --git a/singleheader/amalgamation_demo.cpp b/singleheader/amalgamation_demo.cpp index e0071a14..2c31db17 100644 --- a/singleheader/amalgamation_demo.cpp +++ b/singleheader/amalgamation_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Tue Jul 9 19:40:16 EDT 2019. Do not edit! */ +/* auto-generated on Thu Jul 11 09:40:22 EDT 2019. Do not edit! */ #include #include "simdjson.h" diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp new file mode 100644 index 00000000..25f4a6e4 --- /dev/null +++ b/singleheader/simdjson.cpp @@ -0,0 +1,821 @@ +/* auto-generated on Thu Jul 11 09:40:22 EDT 2019. Do not edit! */ +#include "simdjson.h" + +/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ +#ifdef DMALLOC +#include "dmalloc.h" +#endif + +/* begin file src/simdjson.cpp */ +#include + +namespace simdjson { +const std::map errorStrings = { + {SUCCESS, "No errors"}, + {CAPACITY, "This ParsedJson can't support a document that big"}, + {MEMALLOC, "Error allocating memory, we're most likely out of memory"}, + {TAPE_ERROR, "Something went wrong while writing to the tape"}, + {STRING_ERROR, "Problem while parsing a string"}, + {T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"}, + {F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"}, + {N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"}, + {NUMBER_ERROR, "Problem while parsing a number"}, + {UTF8_ERROR, "The input is not valid UTF-8"}, + {UNITIALIZED, "Unitialized"}, + {EMPTY, "Empty"}, + {UNESCAPED_CHARS, "Within strings, some characters must be escapted, we found unescapted characters"}, + {UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson"}, +}; + +const std::string& errorMsg(const int errorCode) { + return errorStrings.at(errorCode); +} +} +/* end file src/simdjson.cpp */ +/* begin file src/jsonioutil.cpp */ +#include +#include + +namespace simdjson { +char * allocate_padded_buffer(size_t length) { + // we could do a simple malloc + //return (char *) malloc(length + SIMDJSON_PADDING); + // However, we might as well align to cache lines... + size_t totalpaddedlength = length + SIMDJSON_PADDING; + char *padded_buffer = aligned_malloc_char(64, totalpaddedlength); + return padded_buffer; +} + +padded_string get_corpus(const std::string& filename) { + std::FILE *fp = std::fopen(filename.c_str(), "rb"); + if (fp != nullptr) { + std::fseek(fp, 0, SEEK_END); + size_t len = std::ftell(fp); + padded_string s(len); + if(s.data() == nullptr) { + std::fclose(fp); + throw std::runtime_error("could not allocate memory"); + } + std::rewind(fp); + size_t readb = std::fread(s.data(), 1, len, fp); + std::fclose(fp); + if(readb != len) { + throw std::runtime_error("could not read the data"); + } + return s; + } + throw std::runtime_error("could not load corpus"); +} +} +/* end file src/jsonioutil.cpp */ +/* begin file src/jsonminifier.cpp */ +#include + +#ifndef __AVX2__ + +namespace simdjson { +static uint8_t jump_table[256 * 3] = { + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, +}; + +size_t jsonminify(const unsigned char *bytes, size_t howmany, + unsigned char *out) { + size_t i = 0, pos = 0; + uint8_t quote = 0; + uint8_t nonescape = 1; + + while (i < howmany) { + unsigned char c = bytes[i]; + uint8_t *meta = jump_table + 3 * c; + + quote = quote ^ (meta[0] & nonescape); + out[pos] = c; + pos += meta[2] | quote; + + i += 1; + nonescape = (~nonescape) | (meta[1]); + } + return pos; +} +} // namespace simdjson +#else +#include + +namespace simdjson { +// a straightforward comparison of a mask against input. +static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi, + __m256i mask) { + __m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask); + uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); + __m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask); + uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); + return res_0 | (res_1 << 32); +} + +// take input from buf and remove useless whitespace, input and output can be +// the same, result is null terminated, return the string length (minus the null termination) +size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) { + // Useful constant masks + const uint64_t even_bits = 0x5555555555555555ULL; + const uint64_t odd_bits = ~even_bits; + uint8_t *initout(out); + uint64_t prev_iter_ends_odd_backslash = + 0ULL; // either 0 or 1, but a 64-bit value + uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones + size_t idx = 0; + if (len >= 64) { + size_t avxlen = len - 63; + + for (; idx < avxlen; idx += 64) { + __m256i input_lo = _mm256_loadu_si256(reinterpret_cast(buf + idx + 0)); + __m256i input_hi = _mm256_loadu_si256(reinterpret_cast(buf + idx + 32)); + uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi, + _mm256_set1_epi8('\\')); + uint64_t start_edges = bs_bits & ~(bs_bits << 1); + uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; + uint64_t even_starts = start_edges & even_start_mask; + uint64_t odd_starts = start_edges & ~even_start_mask; + uint64_t even_carries = bs_bits + even_starts; + uint64_t odd_carries; + bool iter_ends_odd_backslash = add_overflow( + bs_bits, odd_starts, &odd_carries); + odd_carries |= prev_iter_ends_odd_backslash; + prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; + uint64_t even_carry_ends = even_carries & ~bs_bits; + uint64_t odd_carry_ends = odd_carries & ~bs_bits; + uint64_t even_start_odd_end = even_carry_ends & odd_bits; + uint64_t odd_start_even_end = odd_carry_ends & even_bits; + uint64_t odd_ends = even_start_odd_end | odd_start_even_end; + uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi, + _mm256_set1_epi8('"')); + quote_bits = quote_bits & ~odd_ends; + uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( + _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); + quote_mask ^= prev_iter_inside_quote; + prev_iter_inside_quote = static_cast(static_cast(quote_mask) >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University + const __m256i low_nibble_mask = _mm256_setr_epi8( + // 0 9 a b c d + 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const __m256i high_nibble_mask = _mm256_setr_epi8( + // 0 2 3 5 7 + 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, + 1, 0, 0, 0, 3, 2, 1, 0, 0); + __m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18); + __m256i v_lo = _mm256_and_si256( + _mm256_shuffle_epi8(low_nibble_mask, input_lo), + _mm256_shuffle_epi8(high_nibble_mask, + _mm256_and_si256(_mm256_srli_epi32(input_lo, 4), + _mm256_set1_epi8(0x7f)))); + + __m256i v_hi = _mm256_and_si256( + _mm256_shuffle_epi8(low_nibble_mask, input_hi), + _mm256_shuffle_epi8(high_nibble_mask, + _mm256_and_si256(_mm256_srli_epi32(input_hi, 4), + _mm256_set1_epi8(0x7f)))); + __m256i tmp_ws_lo = _mm256_cmpeq_epi8( + _mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0)); + __m256i tmp_ws_hi = _mm256_cmpeq_epi8( + _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); + + uint64_t ws_res_0 = static_cast(_mm256_movemask_epi8(tmp_ws_lo)); + uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); + uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); + whitespace &= ~quote_mask; + int mask1 = whitespace & 0xFFFF; + int mask2 = (whitespace >> 16) & 0xFFFF; + int mask3 = (whitespace >> 32) & 0xFFFF; + int mask4 = (whitespace >> 48) & 0xFFFF; + int pop1 = hamming((~whitespace) & 0xFFFF); + int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF)); + int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); + int pop4 = hamming((~whitespace)); + __m256i vmask1 = + _mm256_loadu2_m128i(reinterpret_cast(mask128_epi8) + (mask2 & 0x7FFF), + reinterpret_cast(mask128_epi8) + (mask1 & 0x7FFF)); + __m256i vmask2 = + _mm256_loadu2_m128i(reinterpret_cast(mask128_epi8) + (mask4 & 0x7FFF), + reinterpret_cast(mask128_epi8) + (mask3 & 0x7FFF)); + __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); + __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); + _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), reinterpret_cast<__m128i *>(out), result1); + _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), reinterpret_cast<__m128i *>(out + pop2), + result2); + out += pop4; + } + } + // we finish off the job... copying and pasting the code is not ideal here, + // but it gets the job done. + if (idx < len) { + uint8_t buffer[64]; + memset(buffer, 0, 64); + memcpy(buffer, buf + idx, len - idx); + __m256i input_lo = _mm256_loadu_si256(reinterpret_cast(buffer)); + __m256i input_hi = _mm256_loadu_si256(reinterpret_cast(buffer + 32)); + uint64_t bs_bits = + cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\')); + uint64_t start_edges = bs_bits & ~(bs_bits << 1); + uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; + uint64_t even_starts = start_edges & even_start_mask; + uint64_t odd_starts = start_edges & ~even_start_mask; + uint64_t even_carries = bs_bits + even_starts; + uint64_t odd_carries; + //bool iter_ends_odd_backslash = + add_overflow( bs_bits, odd_starts, &odd_carries); + odd_carries |= prev_iter_ends_odd_backslash; + //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; // we never use it + uint64_t even_carry_ends = even_carries & ~bs_bits; + uint64_t odd_carry_ends = odd_carries & ~bs_bits; + uint64_t even_start_odd_end = even_carry_ends & odd_bits; + uint64_t odd_start_even_end = odd_carry_ends & even_bits; + uint64_t odd_ends = even_start_odd_end | odd_start_even_end; + uint64_t quote_bits = + cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"')); + quote_bits = quote_bits & ~odd_ends; + uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( + _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); + quote_mask ^= prev_iter_inside_quote; + // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore + + __m256i mask_20 = _mm256_set1_epi8(0x20); // c==32 + __m256i mask_70 = + _mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits + // but moves any value >= 16 above 128 + + __m256i lut_cntrl = _mm256_setr_epi8( + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00); + + __m256i tmp_ws_lo = _mm256_or_si256( + _mm256_cmpeq_epi8(mask_20, input_lo), + _mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo))); + __m256i tmp_ws_hi = _mm256_or_si256( + _mm256_cmpeq_epi8(mask_20, input_hi), + _mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi))); + uint64_t ws_res_0 = static_cast(_mm256_movemask_epi8(tmp_ws_lo)); + uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); + uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32)); + whitespace &= ~quote_mask; + + if (len - idx < 64) { + whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx); + } + int mask1 = whitespace & 0xFFFF; + int mask2 = (whitespace >> 16) & 0xFFFF; + int mask3 = (whitespace >> 32) & 0xFFFF; + int mask4 = (whitespace >> 48) & 0xFFFF; + int pop1 = hamming((~whitespace) & 0xFFFF); + int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF)); + int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); + int pop4 = hamming((~whitespace)); + __m256i vmask1 = + _mm256_loadu2_m128i(reinterpret_cast(mask128_epi8) + (mask2 & 0x7FFF), + reinterpret_cast(mask128_epi8) + (mask1 & 0x7FFF)); + __m256i vmask2 = + _mm256_loadu2_m128i(reinterpret_cast(mask128_epi8) + (mask4 & 0x7FFF), + reinterpret_cast(mask128_epi8) + (mask3 & 0x7FFF)); + __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); + __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); + _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), reinterpret_cast<__m128i *>(buffer), + result1); + _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), reinterpret_cast<__m128i *>(buffer + pop2), + result2); + memcpy(out, buffer, pop4); + out += pop4; + } + *out = '\0';// NULL termination + return out - initout; +} +} +#endif +/* end file src/jsonminifier.cpp */ +/* begin file src/jsonparser.cpp */ +#ifdef _MSC_VER +#include +#include +#else +#include +#endif + +namespace simdjson { +// Responsible to select the best json_parse implementation +int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) { + // Versions for each implementation +#ifdef __AVX2__ + json_parse_functype* avx_implementation = &json_parse_implementation; +#endif +#if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64)) + json_parse_functype* sse4_2_implementation = &json_parse_implementation; +#endif +#if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64)) + json_parse_functype* neon_implementation = &json_parse_implementation; +#endif + + // Determining which implementation is the more suitable + // Should be done at runtime. Does not make any sense on preprocessor. +#ifdef __AVX2__ + instruction_set best_implementation = instruction_set::avx2; +#elif defined (__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64)) + instruction_set best_implementation = instruction_set::sse4_2; +#elif defined (__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64)) + instruction_set best_implementation = instruction_set::neon; +#else + instruction_set best_implementation = instruction_set::none; +#endif + + // Selecting the best implementation + switch (best_implementation) { +#ifdef __AVX2__ + case instruction_set::avx2 : + json_parse_ptr = avx_implementation; + break; +#endif +#if defined(__SSE4_2__) || (defined(_MSC_VER) && defined(_M_AMD64)) + case instruction_set::sse4_2 : + json_parse_ptr = sse4_2_implementation; + break; +#endif +#if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64)) + case instruction_set::neon : + json_parse_ptr = neon_implementation; + break; +#endif + default : + std::cerr << "No implemented simd instruction set supported" << std::endl; + return simdjson::UNEXPECTED_ERROR; + } + + return json_parse_ptr(buf, len, pj, reallocifneeded); +} + +json_parse_functype *json_parse_ptr = &json_parse_dispatch; + +WARN_UNUSED +ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneeded) { + ParsedJson pj; + bool ok = pj.allocateCapacity(len); + if(ok) { + json_parse(buf, len, pj, reallocifneeded); + } else { + std::cerr << "failure during memory allocation " << std::endl; + } + return pj; +} +} +/* end file src/jsonparser.cpp */ +/* begin file src/stage1_find_marks.cpp */ +// File kept in case we want to reuse it soon. (many configuration files to edit) +/* end file src/stage1_find_marks.cpp */ +/* begin file src/stage2_build_tape.cpp */ +// File kept in case we want to reuse it soon. (many configuration files to edit) +/* end file src/stage2_build_tape.cpp */ +/* begin file src/parsedjson.cpp */ + +namespace simdjson { +ParsedJson::ParsedJson() : + structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr), + ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {} + +ParsedJson::~ParsedJson() { + deallocate(); +} + +ParsedJson::ParsedJson(ParsedJson && p) + : bytecapacity(p.bytecapacity), + depthcapacity(p.depthcapacity), + tapecapacity(p.tapecapacity), + stringcapacity(p.stringcapacity), + current_loc(p.current_loc), + n_structural_indexes(p.n_structural_indexes), + structural_indexes(p.structural_indexes), + tape(p.tape), + containing_scope_offset(p.containing_scope_offset), + ret_address(p.ret_address), + string_buf(p.string_buf), + current_string_buf_loc(p.current_string_buf_loc), + isvalid(p.isvalid) { + p.structural_indexes=nullptr; + p.tape=nullptr; + p.containing_scope_offset=nullptr; + p.ret_address=nullptr; + p.string_buf=nullptr; + p.current_string_buf_loc=nullptr; + } + + + +WARN_UNUSED +bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) { + if ((maxdepth == 0) || (len == 0)) { + std::cerr << "capacities must be non-zero " << std::endl; + return false; + } + if(len > SIMDJSON_MAXSIZE_BYTES) { + return false; + } + if ((len <= bytecapacity) && (depthcapacity < maxdepth)) { + return true; + } + deallocate(); + isvalid = false; + bytecapacity = 0; // will only set it to len after allocations are a success + n_structural_indexes = 0; + uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7; + structural_indexes = new (std::nothrow) uint32_t[max_structures]; + // a pathological input like "[[[[..." would generate len tape elements, so need a capacity of len + 1 + size_t localtapecapacity = ROUNDUP_N(len + 1, 64); + // a document with only zero-length strings... could have len/3 string + // and we would need len/3 * 5 bytes on the string buffer + size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64); + string_buf = new (std::nothrow) uint8_t[localstringcapacity]; + tape = new (std::nothrow) uint64_t[localtapecapacity]; + containing_scope_offset = new (std::nothrow) uint32_t[maxdepth]; +#ifdef SIMDJSON_USE_COMPUTED_GOTO + ret_address = new (std::nothrow) void *[maxdepth]; +#else + ret_address = new (std::nothrow) char[maxdepth]; +#endif + if ((string_buf == nullptr) || (tape == nullptr) || + (containing_scope_offset == nullptr) || (ret_address == nullptr) || (structural_indexes == nullptr)) { + std::cerr << "Could not allocate memory" << std::endl; + delete[] ret_address; + delete[] containing_scope_offset; + delete[] tape; + delete[] string_buf; + delete[] structural_indexes; + + return false; + } + /* + // We do not need to initialize this content for parsing, though we could + // need to initialize it for safety. + memset(string_buf, 0 , localstringcapacity); + memset(structural_indexes, 0, max_structures * sizeof(uint32_t)); + memset(tape, 0, localtapecapacity * sizeof(uint64_t)); + */ + bytecapacity = len; + depthcapacity = maxdepth; + tapecapacity = localtapecapacity; + stringcapacity = localstringcapacity; + return true; +} + +bool ParsedJson::isValid() const { + return isvalid; +} + +int ParsedJson::getErrorCode() const { + return errorcode; +} + +std::string ParsedJson::getErrorMsg() const { + return errorMsg(errorcode); +} + +void ParsedJson::deallocate() { + bytecapacity = 0; + depthcapacity = 0; + tapecapacity = 0; + stringcapacity = 0; + delete[] ret_address; + delete[] containing_scope_offset; + delete[] tape; + delete[] string_buf; + delete[] structural_indexes; + isvalid = false; +} + +void ParsedJson::init() { + current_string_buf_loc = string_buf; + current_loc = 0; + isvalid = false; +} + +WARN_UNUSED +bool ParsedJson::printjson(std::ostream &os) { + if(!isvalid) { + return false; + } + uint32_t string_length; + size_t tapeidx = 0; + uint64_t tape_val = tape[tapeidx]; + uint8_t type = (tape_val >> 56); + size_t howmany = 0; + if (type == 'r') { + howmany = tape_val & JSONVALUEMASK; + } else { + fprintf(stderr, "Error: no starting root node?"); + return false; + } + if (howmany > tapecapacity) { + fprintf(stderr, + "We may be exceeding the tape capacity. Is this a valid document?\n"); + return false; + } + tapeidx++; + bool *inobject = new bool[depthcapacity]; + auto *inobjectidx = new size_t[depthcapacity]; + int depth = 1; // only root at level 0 + inobjectidx[depth] = 0; + inobject[depth] = false; + for (; tapeidx < howmany; tapeidx++) { + tape_val = tape[tapeidx]; + uint64_t payload = tape_val & JSONVALUEMASK; + type = (tape_val >> 56); + if (!inobject[depth]) { + if ((inobjectidx[depth] > 0) && (type != ']')) { + os << ","; + } + inobjectidx[depth]++; + } else { // if (inobject) { + if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) && + (type != '}')) { + os << ","; + } + if (((inobjectidx[depth] & 1) == 1)) { + os << ":"; + } + inobjectidx[depth]++; + } + switch (type) { + case '"': // we have a string + os << '"'; + memcpy(&string_length,string_buf + payload, sizeof(uint32_t)); + print_with_escapes((const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length); + os << '"'; + break; + case 'l': // we have a long int + if (tapeidx + 1 >= howmany) { + delete[] inobject; + delete[] inobjectidx; + return false; + } + os << static_cast(tape[++tapeidx]); + break; + case 'd': // we have a double + if (tapeidx + 1 >= howmany){ + delete[] inobject; + delete[] inobjectidx; + return false; + } + double answer; + memcpy(&answer, &tape[++tapeidx], sizeof(answer)); + os << answer; + break; + case 'n': // we have a null + os << "null"; + break; + case 't': // we have a true + os << "true"; + break; + case 'f': // we have a false + os << "false"; + break; + case '{': // we have an object + os << '{'; + depth++; + inobject[depth] = true; + inobjectidx[depth] = 0; + break; + case '}': // we end an object + depth--; + os << '}'; + break; + case '[': // we start an array + os << '['; + depth++; + inobject[depth] = false; + inobjectidx[depth] = 0; + break; + case ']': // we end an array + depth--; + os << ']'; + break; + case 'r': // we start and end with the root node + fprintf(stderr, "should we be hitting the root node?\n"); + delete[] inobject; + delete[] inobjectidx; + return false; + default: + fprintf(stderr, "bug %c\n", type); + delete[] inobject; + delete[] inobjectidx; + return false; + } + } + delete[] inobject; + delete[] inobjectidx; + return true; +} + +WARN_UNUSED +bool ParsedJson::dump_raw_tape(std::ostream &os) { + if(!isvalid) { + return false; + } + uint32_t string_length; + size_t tapeidx = 0; + uint64_t tape_val = tape[tapeidx]; + uint8_t type = (tape_val >> 56); + os << tapeidx << " : " << type; + tapeidx++; + size_t howmany = 0; + if (type == 'r') { + howmany = tape_val & JSONVALUEMASK; + } else { + fprintf(stderr, "Error: no starting root node?"); + return false; + } + os << "\t// pointing to " << howmany <<" (right after last node)\n"; + uint64_t payload; + for (; tapeidx < howmany; tapeidx++) { + os << tapeidx << " : "; + tape_val = tape[tapeidx]; + payload = tape_val & JSONVALUEMASK; + type = (tape_val >> 56); + switch (type) { + case '"': // we have a string + os << "string \""; + memcpy(&string_length,string_buf + payload, sizeof(uint32_t)); + print_with_escapes((const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length); + os << '"'; + os << '\n'; + break; + case 'l': // we have a long int + if (tapeidx + 1 >= howmany) { + return false; + } + os << "integer " << static_cast(tape[++tapeidx]) << "\n"; + break; + case 'd': // we have a double + os << "float "; + if (tapeidx + 1 >= howmany) { + return false; + } + double answer; + memcpy(&answer, &tape[++tapeidx], sizeof(answer)); + os << answer << '\n'; + break; + case 'n': // we have a null + os << "null\n"; + break; + case 't': // we have a true + os << "true\n"; + break; + case 'f': // we have a false + os << "false\n"; + break; + case '{': // we have an object + os << "{\t// pointing to next tape location " << payload << " (first node after the scope) \n"; + break; + case '}': // we end an object + os << "}\t// pointing to previous tape location " << payload << " (start of the scope) \n"; + break; + case '[': // we start an array + os << "[\t// pointing to next tape location " << payload << " (first node after the scope) \n"; + break; + case ']': // we end an array + os << "]\t// pointing to previous tape location " << payload << " (start of the scope) \n"; + break; + case 'r': // we start and end with the root node + printf("end of root\n"); + return false; + default: + return false; + } + } + tape_val = tape[tapeidx]; + payload = tape_val & JSONVALUEMASK; + type = (tape_val >> 56); + os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n"; + return true; +} +} +/* end file src/parsedjson.cpp */ +/* begin file src/parsedjsoniterator.cpp */ +#include + +namespace simdjson { +ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) { + if(!pj.isValid()) { + throw InvalidJSON(); + } + depthindex = new scopeindex_t[pj.depthcapacity]; + // memory allocation would throw + //if(depthindex == nullptr) { + // return; + //} + depthindex[0].start_of_scope = location; + current_val = pj.tape[location++]; + current_type = (current_val >> 56); + depthindex[0].scope_type = current_type; + if (current_type == 'r') { + tape_length = current_val & JSONVALUEMASK; + if(location < tape_length) { + current_val = pj.tape[location]; + current_type = (current_val >> 56); + depth++; + depthindex[depth].start_of_scope = location; + depthindex[depth].scope_type = current_type; + } + } else { + // should never happen + throw InvalidJSON(); + } +} + +ParsedJson::iterator::~iterator() { + delete[] depthindex; +} + +ParsedJson::iterator::iterator(const iterator &o): + pj(o.pj), depth(o.depth), location(o.location), + tape_length(0), current_type(o.current_type), + current_val(o.current_val), depthindex(nullptr) { + depthindex = new scopeindex_t[pj.depthcapacity]; + // allocation might throw + memcpy(depthindex, o.depthindex, pj.depthcapacity * sizeof(depthindex[0])); + tape_length = o.tape_length; +} + +ParsedJson::iterator::iterator(iterator &&o): + pj(o.pj), depth(o.depth), location(o.location), + tape_length(o.tape_length), current_type(o.current_type), + current_val(o.current_val), depthindex(o.depthindex) { + o.depthindex = nullptr;// we take ownership +} + +bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const { + if(!isOk()) { + return false; + } + switch (current_type) { + case '"': // we have a string + os << '"'; + if(escape_strings) { + print_with_escapes(get_string(), os, get_string_length()); + } else { + // was: os << get_string();, but given that we can include null chars, we have to do something crazier: + std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); + } + os << '"'; + break; + case 'l': // we have a long int + os << get_integer(); + break; + case 'd': + os << get_double(); + break; + case 'n': // we have a null + os << "null"; + break; + case 't': // we have a true + os << "true"; + break; + case 'f': // we have a false + os << "false"; + break; + case '{': // we have an object + case '}': // we end an object + case '[': // we start an array + case ']': // we end an array + os << static_cast(current_type); + break; + default: + return false; + } + return true; +} +} +/* end file src/parsedjsoniterator.cpp */ diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index 2c5acd23..50f27a9f 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Tue Jul 9 19:40:16 EDT 2019. Do not edit! */ +/* auto-generated on Thu Jul 11 09:40:22 EDT 2019. Do not edit! */ /* begin file include/simdjson/simdjson_version.h */ // /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION @@ -37767,7 +37767,6 @@ void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, uint64_t &structurals) { -#ifndef FUNKY_BAD_TABLE const uint8x16_t low_nibble_mask = (uint8x16_t){ 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0}; const uint8x16_t high_nibble_mask = (uint8x16_t){ @@ -37811,77 +37810,6 @@ void find_whitespace_and_structurals( uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask); uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask); whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); -#else - // I think this one is garbage. In order to save the expense - // of another shuffle, I use an equally expensive shift, and - // this gets glued to the end of the dependency chain. Seems a bit - // slower for no good reason. - // - // need to use a weird arrangement. Bytes in this bitvector - // are in conventional order, but bits are reversed as we are - // using a signed left shift (that is a +ve value from 0..7) to - // shift upwards to 0x80 in the bit. So we need to reverse bits. - - // note no structural/whitespace has the high bit on - // so it's OK to put the high 5 bits into our TBL shuffle - // - - // structurals are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c - // or in 5 bit, 3 bit form thats - // (15,3) (15, 5) (7,2) (11,3) (11,5) (5,4) - // bit-reversing (subtract low 3 bits from 7) yields: - // (15,4) (15, 2) (7,5) (11,4) (11,2) (5,3) - - const uint8x16_t structural_bitvec = (uint8x16_t){ - 0, 0, 0, 0, - 0, 8, 0, 32, - 0, 0, 0, 20, - 0, 0, 0, 20}; - // we are also interested in the four whitespace characters - // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d - // (4,0) (1, 2) (1, 1) (1, 5) - // bit-reversing (subtract low 3 bits from 7) yields: - // (4,7) (1, 5) (1, 6) (1, 2) - - const uint8x16_t whitespace_bitvec = (uint8x16_t){ - 0, 100, 0, 0, - 128, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0}; - const uint8x16_t low_3bits_and_mask = vmovq_n_u8(0x7); - const uint8x16_t high_1bit_tst_mask = vmovq_n_u8(0x80); - - int8x16_t low_3bits_0 = vreinterpretq_s8_u8(vandq_u8(in.i0, low_3bits_and_mask)); - uint8x16_t high_5bits_0 = vshrq_n_u8(in.i0, 3); - uint8x16_t shuffle_structural_0 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_0), low_3bits_0); - uint8x16_t shuffle_ws_0 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_0), low_3bits_0); - uint8x16_t tmp_0 = vtstq_u8(shuffle_structural_0, high_1bit_tst_mask); - uint8x16_t tmp_ws_0 = vtstq_u8(shuffle_ws_0, high_1bit_tst_mask); - - int8x16_t low_3bits_1 = vreinterpretq_s8_u8(vandq_u8(in.i1, low_3bits_and_mask)); - uint8x16_t high_5bits_1 = vshrq_n_u8(in.i1, 3); - uint8x16_t shuffle_structural_1 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_1), low_3bits_1); - uint8x16_t shuffle_ws_1 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_1), low_3bits_1); - uint8x16_t tmp_1 = vtstq_u8(shuffle_structural_1, high_1bit_tst_mask); - uint8x16_t tmp_ws_1 = vtstq_u8(shuffle_ws_1, high_1bit_tst_mask); - - int8x16_t low_3bits_2 = vreinterpretq_s8_u8(vandq_u8(in.i2, low_3bits_and_mask)); - uint8x16_t high_5bits_2 = vshrq_n_u8(in.i2, 3); - uint8x16_t shuffle_structural_2 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_2), low_3bits_2); - uint8x16_t shuffle_ws_2 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_2), low_3bits_2); - uint8x16_t tmp_2 = vtstq_u8(shuffle_structural_2, high_1bit_tst_mask); - uint8x16_t tmp_ws_2 = vtstq_u8(shuffle_ws_2, high_1bit_tst_mask); - - int8x16_t low_3bits_3 = vreinterpretq_s8_u8(vandq_u8(in.i3, low_3bits_and_mask)); - uint8x16_t high_5bits_3 = vshrq_n_u8(in.i3, 3); - uint8x16_t shuffle_structural_3 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_3), low_3bits_3); - uint8x16_t shuffle_ws_3 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_3), low_3bits_3); - uint8x16_t tmp_3 = vtstq_u8(shuffle_structural_3, high_1bit_tst_mask); - uint8x16_t tmp_ws_3 = vtstq_u8(shuffle_ws_3, high_1bit_tst_mask); - - structurals = neonmovemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3); - whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); -#endif // FUNKY_BAD_TABLE } #endif // __ARM_NEON