First release (0.0.1)

This commit is contained in:
Daniel Lemire 2019-02-26 10:14:49 -05:00
parent 2228c7c29d
commit a24e701b4e
6 changed files with 433 additions and 372 deletions

View File

@ -24,6 +24,7 @@ $SCRIPTPATH/src/parsedjsoniterator.cpp
# order matters # order matters
ALLCHEADERS=" ALLCHEADERS="
$SCRIPTPATH/include/simdjson/simdjson_version.h
$SCRIPTPATH/include/simdjson/portability.h $SCRIPTPATH/include/simdjson/portability.h
$SCRIPTPATH/include/simdjson/common_defs.h $SCRIPTPATH/include/simdjson/common_defs.h
$SCRIPTPATH/include/simdjson/jsoncharutils.h $SCRIPTPATH/include/simdjson/jsoncharutils.h

View File

@ -0,0 +1,10 @@
// /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand
#ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
#define SIMDJSON_INCLUDE_SIMDJSON_VERSION
#define SIMDJSON_VERSION = 0.0.1,
enum {
SIMDJSON_VERSION_MAJOR = 0,
SIMDJSON_VERSION_MINOR = 0,
SIMDJSON_VERSION_REVISION = 1
};
#endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION

View File

@ -1,4 +1,4 @@
/* auto-generated on Tue 26 Feb 13:29:52 AEDT 2019. Do not edit! */ /* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
#include <iostream> #include <iostream>
#include "simdjson.h" #include "simdjson.h"

View File

@ -1,4 +1,4 @@
/* auto-generated on Tue 26 Feb 13:29:52 AEDT 2019. Do not edit! */ /* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
#include "simdjson.h" #include "simdjson.h"
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
@ -6,9 +6,9 @@
#include "dmalloc.h" #include "dmalloc.h"
#endif #endif
/* begin file /home/geoff/git/simdjson/src/jsonioutil.cpp */ /* begin file /Users/lemire/CVS/github/simdjson/src/jsonioutil.cpp */
#include <cstring> #include <cstring>
#include <stdlib.h> #include <cstdlib>
char * allocate_padded_buffer(size_t length) { char * allocate_padded_buffer(size_t length) {
// we could do a simple malloc // we could do a simple malloc
@ -21,18 +21,19 @@ char * allocate_padded_buffer(size_t length) {
#elif defined(__MINGW32__) || defined(__MINGW64__) #elif defined(__MINGW32__) || defined(__MINGW64__)
padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64); padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64);
#else #else
if (posix_memalign((void **)&padded_buffer, 64, totalpaddedlength) != 0) return NULL; if (posix_memalign(reinterpret_cast<void **>(&padded_buffer), 64, totalpaddedlength) != 0) { return nullptr;
}
#endif #endif
return padded_buffer; return padded_buffer;
} }
std::string_view get_corpus(std::string filename) { std::string_view get_corpus(const std::string& filename) {
std::FILE *fp = std::fopen(filename.c_str(), "rb"); std::FILE *fp = std::fopen(filename.c_str(), "rb");
if (fp) { if (fp != nullptr) {
std::fseek(fp, 0, SEEK_END); std::fseek(fp, 0, SEEK_END);
size_t len = std::ftell(fp); size_t len = std::ftell(fp);
char * buf = allocate_padded_buffer(len); char * buf = allocate_padded_buffer(len);
if(buf == NULL) { if(buf == nullptr) {
std::fclose(fp); std::fclose(fp);
throw std::runtime_error("could not allocate memory"); throw std::runtime_error("could not allocate memory");
} }
@ -47,8 +48,8 @@ std::string_view get_corpus(std::string filename) {
} }
throw std::runtime_error("could not load corpus"); throw std::runtime_error("could not load corpus");
} }
/* end file /home/geoff/git/simdjson/src/jsonioutil.cpp */ /* end file /Users/lemire/CVS/github/simdjson/src/jsonioutil.cpp */
/* begin file /home/geoff/git/simdjson/src/jsonminifier.cpp */ /* begin file /Users/lemire/CVS/github/simdjson/src/jsonminifier.cpp */
#include <cstdint> #include <cstdint>
#ifndef __AVX2__ #ifndef __AVX2__
@ -115,7 +116,7 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi, static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) { __m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask); __m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0); uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask); __m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32); return res_0 | (res_1 << 32);
@ -136,8 +137,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
size_t avxlen = len - 63; size_t avxlen = len - 63;
for (; idx < avxlen; idx += 64) { for (; idx < avxlen; idx += 64) {
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi, uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('\\')); _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t start_edges = bs_bits & ~(bs_bits << 1);
@ -161,7 +162,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
const __m256i low_nibble_mask = _mm256_setr_epi8( const __m256i low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d // 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
@ -187,7 +188,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
__m256i tmp_ws_hi = _mm256_cmpeq_epi8( __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask; whitespace &= ~quote_mask;
@ -200,15 +201,15 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace)); int pop4 = hamming((~whitespace));
__m256i vmask1 = __m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
__m256i vmask2 = __m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1); _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), reinterpret_cast<__m128i *>(out), result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2), _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), reinterpret_cast<__m128i *>(out + pop2),
result2); result2);
out += pop4; out += pop4;
} }
@ -219,8 +220,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint8_t buffer[64]; uint8_t buffer[64];
memset(buffer, 0, 64); memset(buffer, 0, 64);
memcpy(buffer, buf + idx, len - idx); memcpy(buffer, buf + idx, len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
uint64_t bs_bits = uint64_t bs_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\')); cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t start_edges = bs_bits & ~(bs_bits << 1);
@ -262,7 +263,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
__m256i tmp_ws_hi = _mm256_or_si256( __m256i tmp_ws_hi = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_hi), _mm256_cmpeq_epi8(mask_20, input_hi),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi))); _mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask; whitespace &= ~quote_mask;
@ -279,16 +280,16 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace)); int pop4 = hamming((~whitespace));
__m256i vmask1 = __m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
__m256i vmask2 = __m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer, _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), reinterpret_cast<__m128i *>(buffer),
result1); result1);
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2), _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), reinterpret_cast<__m128i *>(buffer + pop2),
result2); result2);
memcpy(out, buffer, pop4); memcpy(out, buffer, pop4);
out += pop4; out += pop4;
@ -298,8 +299,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
} }
#endif #endif
/* end file /home/geoff/git/simdjson/src/jsonminifier.cpp */ /* end file /Users/lemire/CVS/github/simdjson/src/jsonminifier.cpp */
/* begin file /home/geoff/git/simdjson/src/jsonparser.cpp */ /* begin file /Users/lemire/CVS/github/simdjson/src/jsonparser.cpp */
#ifdef _MSC_VER #ifdef _MSC_VER
#include <windows.h> #include <windows.h>
#include <sysinfoapi.h> #include <sysinfoapi.h>
@ -308,10 +309,10 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
#endif #endif
extern bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded);
extern bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded);
extern ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded);
extern ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded);
// parse a document found in buf, need to preallocate ParsedJson. // parse a document found in buf, need to preallocate ParsedJson.
@ -334,8 +335,9 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
#endif #endif
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) { if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
const uint8_t *tmpbuf = buf; const uint8_t *tmpbuf = buf;
buf = (uint8_t *) allocate_padded_buffer(len); buf = reinterpret_cast<uint8_t *>(allocate_padded_buffer(len));
if(buf == NULL) return false; if(buf == nullptr) { return false;
}
memcpy((void*)buf,tmpbuf,len); memcpy((void*)buf,tmpbuf,len);
reallocated = true; reallocated = true;
} }
@ -344,10 +346,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
if (isok) { if (isok) {
isok = unified_machine(buf, len, pj); isok = unified_machine(buf, len, pj);
} else { } else {
if(reallocated) free((void*)buf); if(reallocated) { free((void*)buf);
}
return false; return false;
} }
if(reallocated) free((void*)buf); if(reallocated) { free((void*)buf);
}
return isok; return isok;
} }
@ -363,8 +367,8 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
} }
return pj; return pj;
} }
/* end file /home/geoff/git/simdjson/src/jsonparser.cpp */ /* end file /Users/lemire/CVS/github/simdjson/src/jsonparser.cpp */
/* begin file /home/geoff/git/simdjson/src/stage1_find_marks.cpp */ /* begin file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
#include <cassert> #include <cassert>
#ifndef SIMDJSON_SKIPUTF8VALIDATION #ifndef SIMDJSON_SKIPUTF8VALIDATION
@ -384,7 +388,7 @@ using namespace std;
really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi, really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi,
__m256i mask) { __m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask); __m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0); uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask); __m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32); return res_0 | (res_1 << 32);
@ -401,7 +405,7 @@ WARN_UNUSED
uint32_t base = 0; uint32_t base = 0;
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
__m256i has_error = _mm256_setzero_si256(); __m256i has_error = _mm256_setzero_si256();
struct avx_processed_utf_bytes previous; struct avx_processed_utf_bytes previous{};
previous.rawbytes = _mm256_setzero_si256(); previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256(); previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256(); previous.carried_continuations = _mm256_setzero_si256();
@ -429,8 +433,8 @@ WARN_UNUSED
#ifndef _MSC_VER #ifndef _MSC_VER
__builtin_prefetch(buf + idx + 128); __builtin_prefetch(buf + idx + 128);
#endif #endif
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
__m256i highbit = _mm256_set1_epi8(0x80); __m256i highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) { if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
@ -493,29 +497,29 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals); uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt; uint32_t next_base = base + cnt;
while (structurals) { while (structurals != 0u) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base += 8; base += 8;
} }
base = next_base; base = next_base;
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
// How do we build up a user traversable data structure // How do we build up a user traversable data structure
// first, do a 'shufti' to detect structural JSON characters // first, do a 'shufti' to detect structural JSON characters
@ -553,7 +557,7 @@ WARN_UNUSED
__m256i tmp_hi = _mm256_cmpeq_epi8( __m256i tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo); uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi); uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
structurals = ~(structural_res_0 | (structural_res_1 << 32)); structurals = ~(structural_res_0 | (structural_res_1 << 32));
@ -564,7 +568,7 @@ WARN_UNUSED
__m256i tmp_ws_hi = _mm256_cmpeq_epi8( __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
// mask off anything inside quotes // mask off anything inside quotes
@ -607,8 +611,8 @@ WARN_UNUSED
uint8_t tmpbuf[64]; uint8_t tmpbuf[64];
memset(tmpbuf,0x20,64); memset(tmpbuf,0x20,64);
memcpy(tmpbuf,buf+idx,len - idx); memcpy(tmpbuf,buf+idx,len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(tmpbuf + 0)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(tmpbuf + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 32));
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
__m256i highbit = _mm256_set1_epi8(0x80); __m256i highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) { if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
@ -671,22 +675,22 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals); uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt; uint32_t next_base = base + cnt;
while (structurals) { while (structurals != 0u) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base += 8; base += 8;
} }
@ -727,7 +731,7 @@ WARN_UNUSED
__m256i tmp_hi = _mm256_cmpeq_epi8( __m256i tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo); uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi); uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
structurals = ~(structural_res_0 | (structural_res_1 << 32)); structurals = ~(structural_res_0 | (structural_res_1 << 32));
@ -738,7 +742,7 @@ WARN_UNUSED
__m256i tmp_ws_hi = _mm256_cmpeq_epi8( __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
@ -775,22 +779,22 @@ WARN_UNUSED
} }
uint32_t cnt = hamming(structurals); uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt; uint32_t next_base = base + cnt;
while (structurals) { while (structurals != 0u) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base += 8; base += 8;
} }
@ -798,7 +802,7 @@ WARN_UNUSED
pj.n_structural_indexes = base; pj.n_structural_indexes = base;
// a valid JSON file cannot have zero structural indexes - we should have found something // a valid JSON file cannot have zero structural indexes - we should have found something
if (!pj.n_structural_indexes) { if (pj.n_structural_indexes == 0u) {
return false; return false;
} }
if(base_ptr[pj.n_structural_indexes-1] > len) { if(base_ptr[pj.n_structural_indexes-1] > len) {
@ -812,13 +816,13 @@ WARN_UNUSED
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
return _mm256_testz_si256(has_error, has_error); return _mm256_testz_si256(has_error, has_error) != 0;
#else #else
return true; return true;
#endif #endif
} }
/* end file /home/geoff/git/simdjson/src/stage1_find_marks.cpp */ /* end file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
/* begin file /home/geoff/git/simdjson/src/stage2_build_tape.cpp */ /* begin file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
#ifdef _MSC_VER #ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */ /* Microsoft C/C++-compatible compiler */
#include <intrin.h> #include <intrin.h>
@ -838,7 +842,7 @@ using namespace std;
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_true_atom(const uint8_t *loc) { really_inline bool is_valid_true_atom(const uint8_t *loc) {
uint64_t tv = *(const uint64_t *)"true "; uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
uint64_t mask4 = 0x00000000ffffffff; uint64_t mask4 = 0x00000000ffffffff;
uint32_t error = 0; uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -850,7 +854,7 @@ really_inline bool is_valid_true_atom(const uint8_t *loc) {
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_false_atom(const uint8_t *loc) { really_inline bool is_valid_false_atom(const uint8_t *loc) {
uint64_t fv = *(const uint64_t *)"false "; uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
uint64_t mask5 = 0x000000ffffffffff; uint64_t mask5 = 0x000000ffffffffff;
uint32_t error = 0; uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -862,7 +866,7 @@ really_inline bool is_valid_false_atom(const uint8_t *loc) {
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_null_atom(const uint8_t *loc) { really_inline bool is_valid_null_atom(const uint8_t *loc) {
uint64_t nv = *(const uint64_t *)"null "; uint64_t nv = *reinterpret_cast<const uint64_t *>("null ");
uint64_t mask4 = 0x00000000ffffffff; uint64_t mask4 = 0x00000000ffffffff;
uint32_t error = 0; uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -957,11 +961,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the true value. // this only applies to the JSON document made solely of the true value.
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!is_valid_true_atom((const uint8_t *)copy + idx)) { if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -973,11 +978,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the false value. // this only applies to the JSON document made solely of the false value.
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!is_valid_false_atom((const uint8_t *)copy + idx)) { if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -989,11 +995,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the null value. // this only applies to the JSON document made solely of the null value.
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!is_valid_null_atom((const uint8_t *)copy + idx)) { if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -1014,11 +1021,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this is done only for JSON documents made of a sole number // this is done only for JSON documents made of a sole number
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!parse_number((const uint8_t *)copy, pj, idx, false)) { if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -1029,11 +1037,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this is done only for JSON documents made of a sole number // this is done only for JSON documents made of a sole number
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!parse_number((const uint8_t *)copy, pj, idx, true)) { if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -1325,38 +1334,37 @@ succeed:
fail: fail:
return false; return false;
} }
/* end file /home/geoff/git/simdjson/src/stage2_build_tape.cpp */ /* end file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
/* begin file /home/geoff/git/simdjson/src/parsedjson.cpp */ /* begin file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
ParsedJson::ParsedJson() : bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0), ParsedJson::ParsedJson() :
current_loc(0), n_structural_indexes(0), structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL), ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
ParsedJson::~ParsedJson() { ParsedJson::~ParsedJson() {
deallocate(); deallocate();
} }
ParsedJson::ParsedJson(ParsedJson && p) ParsedJson::ParsedJson(ParsedJson && p)
: bytecapacity(std::move(p.bytecapacity)), : bytecapacity(p.bytecapacity),
depthcapacity(std::move(p.depthcapacity)), depthcapacity(p.depthcapacity),
tapecapacity(std::move(p.tapecapacity)), tapecapacity(p.tapecapacity),
stringcapacity(std::move(p.stringcapacity)), stringcapacity(p.stringcapacity),
current_loc(std::move(p.current_loc)), current_loc(p.current_loc),
n_structural_indexes(std::move(p.n_structural_indexes)), n_structural_indexes(p.n_structural_indexes),
structural_indexes(std::move(p.structural_indexes)), structural_indexes(p.structural_indexes),
tape(std::move(p.tape)), tape(p.tape),
containing_scope_offset(std::move(p.containing_scope_offset)), containing_scope_offset(p.containing_scope_offset),
ret_address(std::move(p.ret_address)), ret_address(p.ret_address),
string_buf(std::move(p.string_buf)), string_buf(p.string_buf),
current_string_buf_loc(std::move(p.current_string_buf_loc)), current_string_buf_loc(p.current_string_buf_loc),
isvalid(std::move(p.isvalid)) { isvalid(p.isvalid) {
p.structural_indexes=NULL; p.structural_indexes=nullptr;
p.tape=NULL; p.tape=nullptr;
p.containing_scope_offset=NULL; p.containing_scope_offset=nullptr;
p.ret_address=NULL; p.ret_address=nullptr;
p.string_buf=NULL; p.string_buf=nullptr;
p.current_string_buf_loc=NULL; p.current_string_buf_loc=nullptr;
} }
@ -1368,8 +1376,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
return false; return false;
} }
if (len > 0) { if (len > 0) {
if ((len <= bytecapacity) && (depthcapacity < maxdepth)) if ((len <= bytecapacity) && (depthcapacity < maxdepth)) {
return true; return true;
}
deallocate(); deallocate();
} }
isvalid = false; isvalid = false;
@ -1387,14 +1396,15 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
#else #else
ret_address = new (std::nothrow) char[maxdepth]; ret_address = new (std::nothrow) char[maxdepth];
#endif #endif
if ((string_buf == NULL) || (tape == NULL) || if ((string_buf == nullptr) || (tape == nullptr) ||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) { (containing_scope_offset == nullptr) || (ret_address == nullptr) || (structural_indexes == nullptr)) {
std::cerr << "Could not allocate memory" << std::endl; std::cerr << "Could not allocate memory" << std::endl;
if(ret_address != NULL) delete[] ret_address; delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset; delete[] containing_scope_offset;
if(tape != NULL) delete[] tape; delete[] tape;
if(string_buf != NULL) delete[] string_buf; delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes; delete[] structural_indexes;
return false; return false;
} }
@ -1414,11 +1424,16 @@ void ParsedJson::deallocate() {
depthcapacity = 0; depthcapacity = 0;
tapecapacity = 0; tapecapacity = 0;
stringcapacity = 0; stringcapacity = 0;
if(ret_address != NULL) delete[] ret_address; {delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset; }
if(tape != NULL) delete[] tape; {delete[] containing_scope_offset;
if(string_buf != NULL) delete[] string_buf; }
if(structural_indexes != NULL) delete[] structural_indexes; {delete[] tape;
}
{delete[] string_buf;
}
{delete[] structural_indexes;
}
isvalid = false; isvalid = false;
} }
@ -1430,7 +1445,8 @@ void ParsedJson::init() {
WARN_UNUSED WARN_UNUSED
bool ParsedJson::printjson(std::ostream &os) { bool ParsedJson::printjson(std::ostream &os) {
if(!isvalid) return false; if(!isvalid) { return false;
}
size_t tapeidx = 0; size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx]; uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
@ -1448,7 +1464,7 @@ bool ParsedJson::printjson(std::ostream &os) {
} }
tapeidx++; tapeidx++;
bool *inobject = new bool[depthcapacity]; bool *inobject = new bool[depthcapacity];
size_t *inobjectidx = new size_t[depthcapacity]; auto *inobjectidx = new size_t[depthcapacity];
int depth = 1; // only root at level 0 int depth = 1; // only root at level 0
inobjectidx[depth] = 0; inobjectidx[depth] = 0;
inobject[depth] = false; inobject[depth] = false;
@ -1457,15 +1473,18 @@ bool ParsedJson::printjson(std::ostream &os) {
uint64_t payload = tape_val & JSONVALUEMASK; uint64_t payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56); type = (tape_val >> 56);
if (!inobject[depth]) { if (!inobject[depth]) {
if ((inobjectidx[depth] > 0) && (type != ']')) if ((inobjectidx[depth] > 0) && (type != ']')) {
os << ","; os << ",";
}
inobjectidx[depth]++; inobjectidx[depth]++;
} else { // if (inobject) { } else { // if (inobject) {
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) && if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
(type != '}')) (type != '}')) {
os << ","; os << ",";
if (((inobjectidx[depth] & 1) == 1)) }
if (((inobjectidx[depth] & 1) == 1)) {
os << ":"; os << ":";
}
inobjectidx[depth]++; inobjectidx[depth]++;
} }
switch (type) { switch (type) {
@ -1475,13 +1494,15 @@ bool ParsedJson::printjson(std::ostream &os) {
os << '"'; os << '"';
break; break;
case 'l': // we have a long int case 'l': // we have a long int
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
os << (int64_t)tape[++tapeidx]; }
os << static_cast<int64_t>(tape[++tapeidx]);
break; break;
case 'd': // we have a double case 'd': // we have a double
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
}
double answer; double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer)); memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer; os << answer;
@ -1534,7 +1555,8 @@ bool ParsedJson::printjson(std::ostream &os) {
WARN_UNUSED WARN_UNUSED
bool ParsedJson::dump_raw_tape(std::ostream &os) { bool ParsedJson::dump_raw_tape(std::ostream &os) {
if(!isvalid) return false; if(!isvalid) { return false;
}
size_t tapeidx = 0; size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx]; uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
@ -1562,14 +1584,16 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
os << '\n'; os << '\n';
break; break;
case 'l': // we have a long int case 'l': // we have a long int
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
os << "integer " << (int64_t)tape[++tapeidx] << "\n"; }
os << "integer " << static_cast<int64_t>(tape[++tapeidx]) << "\n";
break; break;
case 'd': // we have a double case 'd': // we have a double
os << "float "; os << "float ";
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
}
double answer; double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer)); memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer << '\n'; os << answer << '\n';
@ -1608,13 +1632,14 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n"; os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
return true; return true;
} }
/* end file /home/geoff/git/simdjson/src/parsedjson.cpp */ /* end file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
/* begin file /home/geoff/git/simdjson/src/parsedjsoniterator.cpp */ /* begin file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) { ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
if(pj.isValid()) { if(pj.isValid()) {
depthindex = new scopeindex_t[pj.depthcapacity]; depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex == NULL) return; if(depthindex == nullptr) { return;
}
depthindex[0].start_of_scope = location; depthindex[0].start_of_scope = location;
current_val = pj.tape[location++]; current_val = pj.tape[location++];
current_type = (current_val >> 56); current_type = (current_val >> 56);
@ -1639,9 +1664,9 @@ ParsedJson::iterator::~iterator() {
ParsedJson::iterator::iterator(const iterator &o): ParsedJson::iterator::iterator(const iterator &o):
pj(o.pj), depth(o.depth), location(o.location), pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type), tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(NULL) { current_val(o.current_val), depthindex(nullptr) {
depthindex = new scopeindex_t[pj.depthcapacity]; depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex != NULL) { if(depthindex != nullptr) {
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0])); memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
} else { } else {
tape_length = 0; tape_length = 0;
@ -1649,10 +1674,10 @@ ParsedJson::iterator::iterator(const iterator &o):
} }
ParsedJson::iterator::iterator(iterator &&o): ParsedJson::iterator::iterator(iterator &&o):
pj(o.pj), depth(std::move(o.depth)), location(std::move(o.location)), pj(o.pj), depth(o.depth), location(o.location),
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)), tape_length(o.tape_length), current_type(o.current_type),
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) { current_val(o.current_val), depthindex(o.depthindex) {
o.depthindex = NULL;// we take ownership o.depthindex = nullptr;// we take ownership
} }
WARN_UNUSED WARN_UNUSED
@ -1716,19 +1741,21 @@ uint8_t ParsedJson::iterator::get_type() const {
int64_t ParsedJson::iterator::get_integer() const { int64_t ParsedJson::iterator::get_integer() const {
if(location + 1 >= tape_length) return 0;// default value in case of error if(location + 1 >= tape_length) { return 0;// default value in case of error
return (int64_t) pj.tape[location + 1]; }
return static_cast<int64_t>(pj.tape[location + 1]);
} }
double ParsedJson::iterator::get_double() const { double ParsedJson::iterator::get_double() const {
if(location + 1 >= tape_length) return NAN;// default value in case of error if(location + 1 >= tape_length) { return NAN;// default value in case of error
}
double answer; double answer;
memcpy(&answer, & pj.tape[location + 1], sizeof(answer)); memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
return answer; return answer;
} }
const char * ParsedJson::iterator::get_string() const { const char * ParsedJson::iterator::get_string() const {
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ; return reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)) ;
} }
@ -1766,7 +1793,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
assert(is_string()); assert(is_string());
bool rightkey = (strcmp(get_string(),key)==0); bool rightkey = (strcmp(get_string(),key)==0);
next(); next();
if(rightkey) return true; if(rightkey) { return true;
}
} while(next()); } while(next());
assert(up());// not found assert(up());// not found
} }
@ -1790,9 +1818,10 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
current_val = nextval; current_val = nextval;
current_type = nexttype; current_type = nexttype;
return true; return true;
} else { }
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1; size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
if(location + increment >= tape_length) return false; if(location + increment >= tape_length) { return false;
}
uint64_t nextval = pj.tape[location + increment]; uint64_t nextval = pj.tape[location + increment];
uint8_t nexttype = (nextval >> 56); uint8_t nexttype = (nextval >> 56);
if((nexttype == ']') || (nexttype == '}')) { if((nexttype == ']') || (nexttype == '}')) {
@ -1802,12 +1831,13 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
current_val = nextval; current_val = nextval;
current_type = nexttype; current_type = nexttype;
return true; return true;
}
} }
bool ParsedJson::iterator::prev() { bool ParsedJson::iterator::prev() {
if(location - 1 < depthindex[depth].start_of_scope) return false; if(location - 1 < depthindex[depth].start_of_scope) { return false;
}
location -= 1; location -= 1;
current_val = pj.tape[location]; current_val = pj.tape[location];
current_type = (current_val >> 56); current_type = (current_val >> 56);
@ -1840,7 +1870,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
bool ParsedJson::iterator::down() { bool ParsedJson::iterator::down() {
if(location + 1 >= tape_length) return false; if(location + 1 >= tape_length) { return false;
}
if ((current_type == '[') || (current_type == '{')) { if ((current_type == '[') || (current_type == '{')) {
size_t npos = (current_val & JSONVALUEMASK); size_t npos = (current_val & JSONVALUEMASK);
if(npos == location + 2) { if(npos == location + 2) {
@ -1864,7 +1895,8 @@ void ParsedJson::iterator::to_start_scope() {
} }
bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const { bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
if(!isOk()) return false; if(!isOk()) { return false;
}
switch (current_type) { switch (current_type) {
case '"': // we have a string case '"': // we have a string
os << '"'; os << '"';
@ -1894,11 +1926,11 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
case '}': // we end an object case '}': // we end an object
case '[': // we start an array case '[': // we start an array
case ']': // we end an array case ']': // we end an array
os << (char) current_type; os << static_cast<char>(current_type);
break; break;
default: default:
return false; return false;
} }
return true; return true;
} }
/* end file /home/geoff/git/simdjson/src/parsedjsoniterator.cpp */ /* end file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */

View File

@ -1,5 +1,17 @@
/* auto-generated on Tue 26 Feb 13:29:52 AEDT 2019. Do not edit! */ /* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
/* begin file /home/geoff/git/simdjson/include/simdjson/portability.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdjson_version.h */
// /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand
#ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
#define SIMDJSON_INCLUDE_SIMDJSON_VERSION
#define SIMDJSON_VERSION = 0.0.1,
enum {
SIMDJSON_VERSION_MAJOR = 0,
SIMDJSON_VERSION_MINOR = 0,
SIMDJSON_VERSION_REVISION = 1
};
#endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdjson_version.h */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
#ifndef SIMDJSON_PORTABILITY_H #ifndef SIMDJSON_PORTABILITY_H
#define SIMDJSON_PORTABILITY_H #define SIMDJSON_PORTABILITY_H
@ -44,8 +56,8 @@ static inline int hamming(uint64_t input_num) {
} }
#else #else
#include <x86intrin.h>
#include <cstdint> #include <cstdint>
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result); return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
@ -88,7 +100,7 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
#else #else
// somehow, if this is used before including "x86intrin.h", it creates an // somehow, if this is used before including "x86intrin.h", it creates an
// implicit defined warning. // implicit defined warning.
if (posix_memalign(&p, alignment, size) != 0) return NULL; if (posix_memalign(&p, alignment, size) != 0) { return nullptr; }
#endif #endif
return p; return p;
} }
@ -116,7 +128,7 @@ static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
static inline void aligned_free(void *memblock) { static inline void aligned_free(void *memblock) {
if(memblock == NULL) return; if(memblock == nullptr) { return; }
#ifdef _MSC_VER #ifdef _MSC_VER
_aligned_free(memblock); _aligned_free(memblock);
#elif defined(__MINGW32__) || defined(__MINGW64__) #elif defined(__MINGW32__) || defined(__MINGW64__)
@ -126,9 +138,9 @@ static inline void aligned_free(void *memblock) {
#endif #endif
} }
#endif /* end of include PORTABILITY_H */ #endif // SIMDJSON_PORTABILITY_H
/* end file /home/geoff/git/simdjson/include/simdjson/portability.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/common_defs.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
#ifndef SIMDJSON_COMMON_DEFS_H #ifndef SIMDJSON_COMMON_DEFS_H
#define SIMDJSON_COMMON_DEFS_H #define SIMDJSON_COMMON_DEFS_H
@ -186,9 +198,9 @@ static inline void aligned_free(void *memblock) {
#endif // MSC_VER #endif // MSC_VER
#endif // COMMON_DEFS_H #endif // SIMDJSON_COMMON_DEFS_H
/* end file /home/geoff/git/simdjson/include/simdjson/common_defs.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/jsoncharutils.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsoncharutils.h */
#ifndef SIMDJSON_JSONCHARUTILS_H #ifndef SIMDJSON_JSONCHARUTILS_H
#define SIMDJSON_JSONCHARUTILS_H #define SIMDJSON_JSONCHARUTILS_H
@ -286,7 +298,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
if (cp <= 0x7F) { if (cp <= 0x7F) {
c[0] = cp; c[0] = cp;
return 1; // ascii return 1; // ascii
} else if (cp <= 0x7FF) { } if (cp <= 0x7FF) {
c[0] = (cp >> 6) + 192; c[0] = (cp >> 6) + 192;
c[1] = (cp & 63) + 128; c[1] = (cp & 63) + 128;
return 2; // universal plane return 2; // universal plane
@ -310,17 +322,17 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/jsoncharutils.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsoncharutils.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonformatutils.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonformatutils.h */
#ifndef SIMDJSON_JSONFORMATUTILS_H #ifndef SIMDJSON_JSONFORMATUTILS_H
#define SIMDJSON_JSONFORMATUTILS_H #define SIMDJSON_JSONFORMATUTILS_H
#include <stdio.h> #include <cstdio>
#include <iostream>
#include <iomanip> #include <iomanip>
#include <iostream>
static inline void print_with_escapes(const unsigned char *src) { static inline void print_with_escapes(const unsigned char *src) {
while (*src) { while (*src != 0u) {
switch (*src) { switch (*src) {
case '\b': case '\b':
putchar('\\'); putchar('\\');
@ -353,15 +365,16 @@ static inline void print_with_escapes(const unsigned char *src) {
default: default:
if (*src <= 0x1F) { if (*src <= 0x1F) {
printf("\\u%04x", *src); printf("\\u%04x", *src);
} else } else {
putchar(*src); putchar(*src);
}
} }
src++; src++;
} }
} }
static inline void print_with_escapes(const unsigned char *src, std::ostream &os) { static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
while (*src) { while (*src != 0u) {
switch (*src) { switch (*src) {
case '\b': case '\b':
os << '\\'; os << '\\';
@ -394,22 +407,23 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
default: default:
if (*src <= 0x1F) { if (*src <= 0x1F) {
std::ios::fmtflags f(os.flags()); std::ios::fmtflags f(os.flags());
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src; os << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*src);
os.flags(f); os.flags(f);
} else } else {
os << *src; os << *src;
}
} }
src++; src++;
} }
} }
static inline void print_with_escapes(const char *src, std::ostream &os) { static inline void print_with_escapes(const char *src, std::ostream &os) {
print_with_escapes((const unsigned char *)src, os); print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/jsonformatutils.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonformatutils.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonioutil.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonioutil.h */
#ifndef SIMDJSON_JSONIOUTIL_H #ifndef SIMDJSON_JSONIOUTIL_H
#define SIMDJSON_JSONIOUTIL_H #define SIMDJSON_JSONIOUTIL_H
@ -445,12 +459,12 @@ char * allocate_padded_buffer(size_t length);
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio // free((void*)p.data());//use aligned_free if you plan to use VisualStudio
// std::cout << "Could not load the file " << filename << std::endl; // std::cout << "Could not load the file " << filename << std::endl;
// } // }
std::string_view get_corpus(std::string filename); std::string_view get_corpus(const std::string& filename);
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/jsonioutil.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonioutil.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/simdprune_tables.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdprune_tables.h */
#ifndef SIMDJSON_SIMDPRUNE_TABLES_H #ifndef SIMDJSON_SIMDPRUNE_TABLES_H
#define SIMDJSON_SIMDPRUNE_TABLES_H #define SIMDJSON_SIMDPRUNE_TABLES_H
@ -35441,7 +35455,7 @@ static const unsigned char mask128_epi32[] = {
#ifdef __AVX2__ #ifdef __AVX2__
#include <stdint.h> #include <cstdint>
static const uint32_t mask256_epi32[] = { static const uint32_t mask256_epi32[] = {
0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2, 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,
@ -35529,13 +35543,13 @@ static const uint32_t mask256_epi32[] = {
#endif //__AVX2__ #endif //__AVX2__
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/simdprune_tables.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdprune_tables.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/simdutf8check.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdutf8check.h */
#ifndef SIMDJSON_SIMDUTF8CHECK_H #ifndef SIMDJSON_SIMDUTF8CHECK_H
#define SIMDJSON_SIMDUTF8CHECK_H #define SIMDJSON_SIMDUTF8CHECK_H
#include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
@ -35700,7 +35714,7 @@ static struct avx_processed_utf_bytes
avxcheckUTF8Bytes(__m256i current_bytes, avxcheckUTF8Bytes(__m256i current_bytes,
struct avx_processed_utf_bytes *previous, struct avx_processed_utf_bytes *previous,
__m256i *has_error) { __m256i *has_error) {
struct avx_processed_utf_bytes pb; struct avx_processed_utf_bytes pb{};
avx_count_nibbles(current_bytes, &pb); avx_count_nibbles(current_bytes, &pb);
avxcheckSmallerThan0xF4(current_bytes, has_error); avxcheckSmallerThan0xF4(current_bytes, has_error);
@ -35725,8 +35739,8 @@ avxcheckUTF8Bytes(__m256i current_bytes,
#warning "We require AVX2 support!" #warning "We require AVX2 support!"
#endif // __AVX2__ #endif // __AVX2__
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/simdutf8check.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdutf8check.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonminifier.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonminifier.h */
#ifndef SIMDJSON_JSONMINIFIER_H #ifndef SIMDJSON_JSONMINIFIER_H
#define SIMDJSON_JSONMINIFIER_H #define SIMDJSON_JSONMINIFIER_H
@ -35740,7 +35754,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
static inline size_t jsonminify(const char *buf, size_t len, char *out) { static inline size_t jsonminify(const char *buf, size_t len, char *out) {
return jsonminify((const uint8_t *)buf, len, (uint8_t *)out); return jsonminify(reinterpret_cast<const uint8_t *>(buf), len, reinterpret_cast<uint8_t *>(out));
} }
@ -35749,11 +35763,12 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/jsonminifier.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonminifier.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/parsedjson.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/parsedjson.h */
#ifndef SIMDJSON_PARSEDJSON_H #ifndef SIMDJSON_PARSEDJSON_H
#define SIMDJSON_PARSEDJSON_H #define SIMDJSON_PARSEDJSON_H
#include <cinttypes>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <iomanip> #include <iomanip>
@ -35817,12 +35832,12 @@ public:
// this should be considered a private function // this should be considered a private function
really_inline void write_tape(uint64_t val, uint8_t c) { really_inline void write_tape(uint64_t val, uint8_t c) {
tape[current_loc++] = val | (((uint64_t)c) << 56); tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
} }
really_inline void write_tape_s64(int64_t i) { really_inline void write_tape_s64(int64_t i) {
write_tape(0, 'l'); write_tape(0, 'l');
tape[current_loc++] = *((uint64_t *)&i); tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
} }
really_inline void write_tape_double(double d) { really_inline void write_tape_double(double d) {
@ -35942,7 +35957,7 @@ public:
private: private:
iterator& operator=(const iterator& other) ; iterator& operator=(const iterator& other) = delete ;
ParsedJson &pj; ParsedJson &pj;
size_t depth; size_t depth;
@ -35953,13 +35968,13 @@ private:
scopeindex_t *depthindex; scopeindex_t *depthindex;
}; };
size_t bytecapacity; // indicates how many bits are meant to be supported size_t bytecapacity{0}; // indicates how many bits are meant to be supported
size_t depthcapacity; // how deep we can go size_t depthcapacity{0}; // how deep we can go
size_t tapecapacity; size_t tapecapacity{0};
size_t stringcapacity; size_t stringcapacity{0};
uint32_t current_loc; uint32_t current_loc{0};
uint32_t n_structural_indexes; uint32_t n_structural_indexes{0};
uint32_t *structural_indexes; uint32_t *structural_indexes;
@ -35973,10 +35988,13 @@ private:
uint8_t *string_buf; // should be at least bytecapacity uint8_t *string_buf; // should be at least bytecapacity
uint8_t *current_string_buf_loc; uint8_t *current_string_buf_loc;
bool isvalid; bool isvalid{false};
private : private :
ParsedJson(const ParsedJson & p) = delete;
// we don't want the default constructor to be called
ParsedJson(const ParsedJson & p) = delete; // we don't want the default constructor to be called
// we don't want the assignment to be called
ParsedJson & operator=(const ParsedJson&o) = delete; ParsedJson & operator=(const ParsedJson&o) = delete;
}; };
@ -35984,22 +36002,22 @@ private :
// dump bits low to high // dump bits low to high
inline void dumpbits_always(uint64_t v, const std::string &msg) { inline void dumpbits_always(uint64_t v, const std::string &msg) {
for (uint32_t i = 0; i < 64; i++) { for (uint32_t i = 0; i < 64; i++) {
std::cout << (((v >> (uint64_t)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
} }
std::cout << " " << msg.c_str() << "\n"; std::cout << " " << msg.c_str() << "\n";
} }
inline void dumpbits32_always(uint32_t v, const std::string &msg) { inline void dumpbits32_always(uint32_t v, const std::string &msg) {
for (uint32_t i = 0; i < 32; i++) { for (uint32_t i = 0; i < 32; i++) {
std::cout << (((v >> (uint32_t)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
} }
std::cout << " " << msg.c_str() << "\n"; std::cout << " " << msg.c_str() << "\n";
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/parsedjson.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/parsedjson.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/stage1_find_marks.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H #ifndef SIMDJSON_STAGE1_FIND_MARKS_H
#define SIMDJSON_STAGE1_FIND_MARKS_H #define SIMDJSON_STAGE1_FIND_MARKS_H
@ -36009,12 +36027,12 @@ bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
WARN_UNUSED WARN_UNUSED
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) { static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
return find_structural_bits((const uint8_t *)buf, len, pj); return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/stage1_find_marks.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/stringparsing.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
#ifndef SIMDJSON_STRINGPARSING_H #ifndef SIMDJSON_STRINGPARSING_H
#define SIMDJSON_STRINGPARSING_H #define SIMDJSON_STRINGPARSING_H
@ -36099,11 +36117,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
uint8_t *const start_of_string = dst; uint8_t *const start_of_string = dst;
#endif #endif
while (1) { while (1) {
__m256i v = _mm256_loadu_si256((const __m256i *)(src)); __m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
uint32_t bs_bits = auto bs_bits =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))); static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))));
uint32_t quote_bits = auto quote_bits =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))); static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))));
#define CHECKUNESCAPED #define CHECKUNESCAPED
// All Unicode characters may be placed within the // All Unicode characters may be placed within the
// quotation marks, except for the characters that MUST be escaped: // quotation marks, except for the characters that MUST be escaped:
@ -36119,7 +36137,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
uint32_t bs_dist = trailingzeroes(bs_bits); uint32_t bs_dist = trailingzeroes(bs_bits);
// store to dest unconditionally - we can overwrite the bits we don't like // store to dest unconditionally - we can overwrite the bits we don't like
// later // later
_mm256_storeu_si256((__m256i *)(dst), v); _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
if (quote_dist < bs_dist) { if (quote_dist < bs_dist) {
// we encountered quotes first. Move dst to point to quotes and exit // we encountered quotes first. Move dst to point to quotes and exit
dst[quote_dist] = 0; // null terminate and get out dst[quote_dist] = 0; // null terminate and get out
@ -36129,7 +36147,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
#ifdef CHECKUNESCAPED #ifdef CHECKUNESCAPED
// check that there is no unescaped char before the quote // check that there is no unescaped char before the quote
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec); auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0; bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
#ifdef JSON_TEST_STRINGS // for unit testing #ifdef JSON_TEST_STRINGS // for unit testing
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1); if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
@ -36142,11 +36160,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
#endif // JSON_TEST_STRINGS #endif // JSON_TEST_STRINGS
return true; return true;
#endif //CHECKUNESCAPED #endif //CHECKUNESCAPED
} else if (quote_dist > bs_dist) { } if (quote_dist > bs_dist) {
uint8_t escape_char = src[bs_dist + 1]; uint8_t escape_char = src[bs_dist + 1];
#ifdef CHECKUNESCAPED #ifdef CHECKUNESCAPED
// we are going to need the unescaped_bits to check for unescaped chars // we are going to need the unescaped_bits to check for unescaped chars
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec); auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) { if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
#ifdef JSON_TEST_STRINGS // for unit testing #ifdef JSON_TEST_STRINGS // for unit testing
foundBadString(buf + offset); foundBadString(buf + offset);
@ -36172,7 +36190,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
// note this may reach beyond the part of the buffer we've actually // note this may reach beyond the part of the buffer we've actually
// seen. I think this is ok // seen. I think this is ok
uint8_t escape_result = escape_map[escape_char]; uint8_t escape_result = escape_map[escape_char];
if (!escape_result) { if (escape_result == 0u) {
#ifdef JSON_TEST_STRINGS // for unit testing #ifdef JSON_TEST_STRINGS // for unit testing
foundBadString(buf + offset); foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS #endif // JSON_TEST_STRINGS
@ -36205,8 +36223,8 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/stringparsing.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/numberparsing.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
#ifndef SIMDJSON_NUMBERPARSING_H #ifndef SIMDJSON_NUMBERPARSING_H
#define SIMDJSON_NUMBERPARSING_H #define SIMDJSON_NUMBERPARSING_H
@ -36346,7 +36364,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
const __m128i mul_1_10000 = const __m128i mul_1_10000 =
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0); const __m128i input = _mm_sub_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t3 = _mm_packus_epi32(t2, t2);
@ -36371,7 +36389,7 @@ static never_inline bool
parse_float(const uint8_t *const buf, parse_float(const uint8_t *const buf,
ParsedJson &pj, const uint32_t offset, ParsedJson &pj, const uint32_t offset,
bool found_minus) { bool found_minus) {
const char *p = (const char *)(buf + offset); const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false; bool negative = false;
if (found_minus) { if (found_minus) {
++p; ++p;
@ -36485,7 +36503,7 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
ParsedJson &pj, ParsedJson &pj,
const uint32_t offset, const uint32_t offset,
bool found_minus) { bool found_minus) {
const char *p = (const char *)(buf + offset); const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false; bool negative = false;
if (found_minus) { if (found_minus) {
@ -36557,7 +36575,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
pj.write_tape_s64(0); // always write zero pj.write_tape_s64(0); // always write zero
return true; // always succeeds return true; // always succeeds
#else #else
const char *p = (const char *)(buf + offset); const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false; bool negative = false;
if (found_minus) { if (found_minus) {
++p; ++p;
@ -36723,10 +36741,10 @@ static really_inline bool parse_number(const uint8_t *const buf,
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/numberparsing.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/stage2_build_tape.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
#ifndef SIMDJSON_STAGE34_UNIFIED_H #ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
#define SIMDJSON_STAGE34_UNIFIED_H #define SIMDJSON_STAGE2_BUILD_TAPE_H
void init_state_machine(); void init_state_machine();
@ -36737,12 +36755,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
WARN_UNUSED WARN_UNUSED
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) { static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
return unified_machine((const uint8_t *)buf,len,pj); return unified_machine(reinterpret_cast<const uint8_t *>(buf),len,pj);
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/stage2_build_tape.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonparser.h */ /* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
#ifndef SIMDJSON_JSONPARSER_H #ifndef SIMDJSON_JSONPARSER_H
#define SIMDJSON_JSONPARSER_H #define SIMDJSON_JSONPARSER_H
@ -36771,7 +36789,7 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
// all bytes at and after buf + len are ignored (can be garbage). // all bytes at and after buf + len are ignored (can be garbage).
WARN_UNUSED WARN_UNUSED
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) { inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
return json_parse((const uint8_t *) buf, len, pj, reallocifneeded); return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
} }
// Parse a document found in buf, need to preallocate ParsedJson. // Parse a document found in buf, need to preallocate ParsedJson.
@ -36806,7 +36824,7 @@ WARN_UNUSED
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false, // The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after buf + len are ignored (can be garbage). // all bytes at and after buf + len are ignored (can be garbage).
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) { inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
return build_parsed_json((const uint8_t *) buf, len, reallocifneeded); return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
} }
// convenience function // convenience function
@ -36822,4 +36840,4 @@ inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifnee
} }
#endif #endif
/* end file /home/geoff/git/simdjson/include/simdjson/jsonparser.h */ /* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */