First release (0.0.1)
This commit is contained in:
parent
2228c7c29d
commit
a24e701b4e
|
@ -24,6 +24,7 @@ $SCRIPTPATH/src/parsedjsoniterator.cpp
|
||||||
|
|
||||||
# order matters
|
# order matters
|
||||||
ALLCHEADERS="
|
ALLCHEADERS="
|
||||||
|
$SCRIPTPATH/include/simdjson/simdjson_version.h
|
||||||
$SCRIPTPATH/include/simdjson/portability.h
|
$SCRIPTPATH/include/simdjson/portability.h
|
||||||
$SCRIPTPATH/include/simdjson/common_defs.h
|
$SCRIPTPATH/include/simdjson/common_defs.h
|
||||||
$SCRIPTPATH/include/simdjson/jsoncharutils.h
|
$SCRIPTPATH/include/simdjson/jsoncharutils.h
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
// /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand
|
||||||
|
#ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||||
|
#define SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||||
|
#define SIMDJSON_VERSION = 0.0.1,
|
||||||
|
enum {
|
||||||
|
SIMDJSON_VERSION_MAJOR = 0,
|
||||||
|
SIMDJSON_VERSION_MINOR = 0,
|
||||||
|
SIMDJSON_VERSION_REVISION = 1
|
||||||
|
};
|
||||||
|
#endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
|
@ -1,4 +1,4 @@
|
||||||
/* auto-generated on Tue 26 Feb 13:29:52 AEDT 2019. Do not edit! */
|
/* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* auto-generated on Tue 26 Feb 13:29:52 AEDT 2019. Do not edit! */
|
/* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
|
||||||
#include "simdjson.h"
|
#include "simdjson.h"
|
||||||
|
|
||||||
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
||||||
|
@ -6,9 +6,9 @@
|
||||||
#include "dmalloc.h"
|
#include "dmalloc.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* begin file /home/geoff/git/simdjson/src/jsonioutil.cpp */
|
/* begin file /Users/lemire/CVS/github/simdjson/src/jsonioutil.cpp */
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <stdlib.h>
|
#include <cstdlib>
|
||||||
|
|
||||||
char * allocate_padded_buffer(size_t length) {
|
char * allocate_padded_buffer(size_t length) {
|
||||||
// we could do a simple malloc
|
// we could do a simple malloc
|
||||||
|
@ -21,18 +21,19 @@ char * allocate_padded_buffer(size_t length) {
|
||||||
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||||
padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64);
|
padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64);
|
||||||
#else
|
#else
|
||||||
if (posix_memalign((void **)&padded_buffer, 64, totalpaddedlength) != 0) return NULL;
|
if (posix_memalign(reinterpret_cast<void **>(&padded_buffer), 64, totalpaddedlength) != 0) { return nullptr;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return padded_buffer;
|
return padded_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string_view get_corpus(std::string filename) {
|
std::string_view get_corpus(const std::string& filename) {
|
||||||
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
||||||
if (fp) {
|
if (fp != nullptr) {
|
||||||
std::fseek(fp, 0, SEEK_END);
|
std::fseek(fp, 0, SEEK_END);
|
||||||
size_t len = std::ftell(fp);
|
size_t len = std::ftell(fp);
|
||||||
char * buf = allocate_padded_buffer(len);
|
char * buf = allocate_padded_buffer(len);
|
||||||
if(buf == NULL) {
|
if(buf == nullptr) {
|
||||||
std::fclose(fp);
|
std::fclose(fp);
|
||||||
throw std::runtime_error("could not allocate memory");
|
throw std::runtime_error("could not allocate memory");
|
||||||
}
|
}
|
||||||
|
@ -47,8 +48,8 @@ std::string_view get_corpus(std::string filename) {
|
||||||
}
|
}
|
||||||
throw std::runtime_error("could not load corpus");
|
throw std::runtime_error("could not load corpus");
|
||||||
}
|
}
|
||||||
/* end file /home/geoff/git/simdjson/src/jsonioutil.cpp */
|
/* end file /Users/lemire/CVS/github/simdjson/src/jsonioutil.cpp */
|
||||||
/* begin file /home/geoff/git/simdjson/src/jsonminifier.cpp */
|
/* begin file /Users/lemire/CVS/github/simdjson/src/jsonminifier.cpp */
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#ifndef __AVX2__
|
#ifndef __AVX2__
|
||||||
|
|
||||||
|
@ -115,7 +116,7 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
|
||||||
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
||||||
__m256i mask) {
|
__m256i mask) {
|
||||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||||
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
|
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
||||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||||
return res_0 | (res_1 << 32);
|
return res_0 | (res_1 << 32);
|
||||||
|
@ -136,8 +137,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
size_t avxlen = len - 63;
|
size_t avxlen = len - 63;
|
||||||
|
|
||||||
for (; idx < avxlen; idx += 64) {
|
for (; idx < avxlen; idx += 64) {
|
||||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
|
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
||||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
|
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
||||||
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||||
_mm256_set1_epi8('\\'));
|
_mm256_set1_epi8('\\'));
|
||||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||||
|
@ -161,7 +162,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
|
prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
|
||||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||||
// 0 9 a b c d
|
// 0 9 a b c d
|
||||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
||||||
|
@ -187,7 +188,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||||
whitespace &= ~quote_mask;
|
whitespace &= ~quote_mask;
|
||||||
|
@ -200,15 +201,15 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||||
int pop4 = hamming((~whitespace));
|
int pop4 = hamming((~whitespace));
|
||||||
__m256i vmask1 =
|
__m256i vmask1 =
|
||||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
||||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
||||||
__m256i vmask2 =
|
__m256i vmask2 =
|
||||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
||||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
||||||
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
||||||
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
||||||
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), reinterpret_cast<__m128i *>(out), result1);
|
||||||
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), reinterpret_cast<__m128i *>(out + pop2),
|
||||||
result2);
|
result2);
|
||||||
out += pop4;
|
out += pop4;
|
||||||
}
|
}
|
||||||
|
@ -219,8 +220,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
uint8_t buffer[64];
|
uint8_t buffer[64];
|
||||||
memset(buffer, 0, 64);
|
memset(buffer, 0, 64);
|
||||||
memcpy(buffer, buf + idx, len - idx);
|
memcpy(buffer, buf + idx, len - idx);
|
||||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
|
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
|
||||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
|
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
|
||||||
uint64_t bs_bits =
|
uint64_t bs_bits =
|
||||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
||||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||||
|
@ -262,7 +263,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
__m256i tmp_ws_hi = _mm256_or_si256(
|
__m256i tmp_ws_hi = _mm256_or_si256(
|
||||||
_mm256_cmpeq_epi8(mask_20, input_hi),
|
_mm256_cmpeq_epi8(mask_20, input_hi),
|
||||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
||||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||||
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||||
whitespace &= ~quote_mask;
|
whitespace &= ~quote_mask;
|
||||||
|
@ -279,16 +280,16 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||||
int pop4 = hamming((~whitespace));
|
int pop4 = hamming((~whitespace));
|
||||||
__m256i vmask1 =
|
__m256i vmask1 =
|
||||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
||||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
||||||
__m256i vmask2 =
|
__m256i vmask2 =
|
||||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
||||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
||||||
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
||||||
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
||||||
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), reinterpret_cast<__m128i *>(buffer),
|
||||||
result1);
|
result1);
|
||||||
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), reinterpret_cast<__m128i *>(buffer + pop2),
|
||||||
result2);
|
result2);
|
||||||
memcpy(out, buffer, pop4);
|
memcpy(out, buffer, pop4);
|
||||||
out += pop4;
|
out += pop4;
|
||||||
|
@ -298,8 +299,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/src/jsonminifier.cpp */
|
/* end file /Users/lemire/CVS/github/simdjson/src/jsonminifier.cpp */
|
||||||
/* begin file /home/geoff/git/simdjson/src/jsonparser.cpp */
|
/* begin file /Users/lemire/CVS/github/simdjson/src/jsonparser.cpp */
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <sysinfoapi.h>
|
#include <sysinfoapi.h>
|
||||||
|
@ -308,10 +309,10 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
extern bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded);
|
|
||||||
extern bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded);
|
|
||||||
extern ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded);
|
|
||||||
extern ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded);
|
|
||||||
|
|
||||||
|
|
||||||
// parse a document found in buf, need to preallocate ParsedJson.
|
// parse a document found in buf, need to preallocate ParsedJson.
|
||||||
|
@ -334,8 +335,9 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
||||||
#endif
|
#endif
|
||||||
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
|
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
|
||||||
const uint8_t *tmpbuf = buf;
|
const uint8_t *tmpbuf = buf;
|
||||||
buf = (uint8_t *) allocate_padded_buffer(len);
|
buf = reinterpret_cast<uint8_t *>(allocate_padded_buffer(len));
|
||||||
if(buf == NULL) return false;
|
if(buf == nullptr) { return false;
|
||||||
|
}
|
||||||
memcpy((void*)buf,tmpbuf,len);
|
memcpy((void*)buf,tmpbuf,len);
|
||||||
reallocated = true;
|
reallocated = true;
|
||||||
}
|
}
|
||||||
|
@ -344,10 +346,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
||||||
if (isok) {
|
if (isok) {
|
||||||
isok = unified_machine(buf, len, pj);
|
isok = unified_machine(buf, len, pj);
|
||||||
} else {
|
} else {
|
||||||
if(reallocated) free((void*)buf);
|
if(reallocated) { free((void*)buf);
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if(reallocated) free((void*)buf);
|
if(reallocated) { free((void*)buf);
|
||||||
|
}
|
||||||
return isok;
|
return isok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -363,8 +367,8 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
|
||||||
}
|
}
|
||||||
return pj;
|
return pj;
|
||||||
}
|
}
|
||||||
/* end file /home/geoff/git/simdjson/src/jsonparser.cpp */
|
/* end file /Users/lemire/CVS/github/simdjson/src/jsonparser.cpp */
|
||||||
/* begin file /home/geoff/git/simdjson/src/stage1_find_marks.cpp */
|
/* begin file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
#ifndef SIMDJSON_SKIPUTF8VALIDATION
|
#ifndef SIMDJSON_SKIPUTF8VALIDATION
|
||||||
|
@ -384,7 +388,7 @@ using namespace std;
|
||||||
really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi,
|
really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi,
|
||||||
__m256i mask) {
|
__m256i mask) {
|
||||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||||
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
|
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
||||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||||
return res_0 | (res_1 << 32);
|
return res_0 | (res_1 << 32);
|
||||||
|
@ -401,7 +405,7 @@ WARN_UNUSED
|
||||||
uint32_t base = 0;
|
uint32_t base = 0;
|
||||||
#ifdef SIMDJSON_UTF8VALIDATE
|
#ifdef SIMDJSON_UTF8VALIDATE
|
||||||
__m256i has_error = _mm256_setzero_si256();
|
__m256i has_error = _mm256_setzero_si256();
|
||||||
struct avx_processed_utf_bytes previous;
|
struct avx_processed_utf_bytes previous{};
|
||||||
previous.rawbytes = _mm256_setzero_si256();
|
previous.rawbytes = _mm256_setzero_si256();
|
||||||
previous.high_nibbles = _mm256_setzero_si256();
|
previous.high_nibbles = _mm256_setzero_si256();
|
||||||
previous.carried_continuations = _mm256_setzero_si256();
|
previous.carried_continuations = _mm256_setzero_si256();
|
||||||
|
@ -429,8 +433,8 @@ WARN_UNUSED
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
__builtin_prefetch(buf + idx + 128);
|
__builtin_prefetch(buf + idx + 128);
|
||||||
#endif
|
#endif
|
||||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
|
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
||||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
|
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
||||||
#ifdef SIMDJSON_UTF8VALIDATE
|
#ifdef SIMDJSON_UTF8VALIDATE
|
||||||
__m256i highbit = _mm256_set1_epi8(0x80);
|
__m256i highbit = _mm256_set1_epi8(0x80);
|
||||||
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
||||||
|
@ -493,29 +497,29 @@ WARN_UNUSED
|
||||||
|
|
||||||
uint32_t cnt = hamming(structurals);
|
uint32_t cnt = hamming(structurals);
|
||||||
uint32_t next_base = base + cnt;
|
uint32_t next_base = base + cnt;
|
||||||
while (structurals) {
|
while (structurals != 0u) {
|
||||||
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base += 8;
|
base += 8;
|
||||||
}
|
}
|
||||||
base = next_base;
|
base = next_base;
|
||||||
|
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
|
prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
|
||||||
|
|
||||||
// How do we build up a user traversable data structure
|
// How do we build up a user traversable data structure
|
||||||
// first, do a 'shufti' to detect structural JSON characters
|
// first, do a 'shufti' to detect structural JSON characters
|
||||||
|
@ -553,7 +557,7 @@ WARN_UNUSED
|
||||||
__m256i tmp_hi = _mm256_cmpeq_epi8(
|
__m256i tmp_hi = _mm256_cmpeq_epi8(
|
||||||
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
|
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
|
||||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||||
|
|
||||||
|
@ -564,7 +568,7 @@ WARN_UNUSED
|
||||||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||||
// mask off anything inside quotes
|
// mask off anything inside quotes
|
||||||
|
@ -607,8 +611,8 @@ WARN_UNUSED
|
||||||
uint8_t tmpbuf[64];
|
uint8_t tmpbuf[64];
|
||||||
memset(tmpbuf,0x20,64);
|
memset(tmpbuf,0x20,64);
|
||||||
memcpy(tmpbuf,buf+idx,len - idx);
|
memcpy(tmpbuf,buf+idx,len - idx);
|
||||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(tmpbuf + 0));
|
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 0));
|
||||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(tmpbuf + 32));
|
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 32));
|
||||||
#ifdef SIMDJSON_UTF8VALIDATE
|
#ifdef SIMDJSON_UTF8VALIDATE
|
||||||
__m256i highbit = _mm256_set1_epi8(0x80);
|
__m256i highbit = _mm256_set1_epi8(0x80);
|
||||||
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
||||||
|
@ -671,22 +675,22 @@ WARN_UNUSED
|
||||||
|
|
||||||
uint32_t cnt = hamming(structurals);
|
uint32_t cnt = hamming(structurals);
|
||||||
uint32_t next_base = base + cnt;
|
uint32_t next_base = base + cnt;
|
||||||
while (structurals) {
|
while (structurals != 0u) {
|
||||||
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base += 8;
|
base += 8;
|
||||||
}
|
}
|
||||||
|
@ -727,7 +731,7 @@ WARN_UNUSED
|
||||||
__m256i tmp_hi = _mm256_cmpeq_epi8(
|
__m256i tmp_hi = _mm256_cmpeq_epi8(
|
||||||
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
|
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
|
||||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||||
|
|
||||||
|
@ -738,7 +742,7 @@ WARN_UNUSED
|
||||||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||||
|
|
||||||
|
@ -775,22 +779,22 @@ WARN_UNUSED
|
||||||
}
|
}
|
||||||
uint32_t cnt = hamming(structurals);
|
uint32_t cnt = hamming(structurals);
|
||||||
uint32_t next_base = base + cnt;
|
uint32_t next_base = base + cnt;
|
||||||
while (structurals) {
|
while (structurals != 0u) {
|
||||||
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||||
structurals = structurals & (structurals - 1);
|
structurals = structurals & (structurals - 1);
|
||||||
base += 8;
|
base += 8;
|
||||||
}
|
}
|
||||||
|
@ -798,7 +802,7 @@ WARN_UNUSED
|
||||||
|
|
||||||
pj.n_structural_indexes = base;
|
pj.n_structural_indexes = base;
|
||||||
// a valid JSON file cannot have zero structural indexes - we should have found something
|
// a valid JSON file cannot have zero structural indexes - we should have found something
|
||||||
if (!pj.n_structural_indexes) {
|
if (pj.n_structural_indexes == 0u) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if(base_ptr[pj.n_structural_indexes-1] > len) {
|
if(base_ptr[pj.n_structural_indexes-1] > len) {
|
||||||
|
@ -812,13 +816,13 @@ WARN_UNUSED
|
||||||
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
|
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
|
||||||
|
|
||||||
#ifdef SIMDJSON_UTF8VALIDATE
|
#ifdef SIMDJSON_UTF8VALIDATE
|
||||||
return _mm256_testz_si256(has_error, has_error);
|
return _mm256_testz_si256(has_error, has_error) != 0;
|
||||||
#else
|
#else
|
||||||
return true;
|
return true;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
/* end file /home/geoff/git/simdjson/src/stage1_find_marks.cpp */
|
/* end file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
|
||||||
/* begin file /home/geoff/git/simdjson/src/stage2_build_tape.cpp */
|
/* begin file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
/* Microsoft C/C++-compatible compiler */
|
/* Microsoft C/C++-compatible compiler */
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
|
@ -838,7 +842,7 @@ using namespace std;
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
||||||
uint64_t tv = *(const uint64_t *)"true ";
|
uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
|
||||||
uint64_t mask4 = 0x00000000ffffffff;
|
uint64_t mask4 = 0x00000000ffffffff;
|
||||||
uint32_t error = 0;
|
uint32_t error = 0;
|
||||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||||
|
@ -850,7 +854,7 @@ really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
||||||
uint64_t fv = *(const uint64_t *)"false ";
|
uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
|
||||||
uint64_t mask5 = 0x000000ffffffffff;
|
uint64_t mask5 = 0x000000ffffffffff;
|
||||||
uint32_t error = 0;
|
uint32_t error = 0;
|
||||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||||
|
@ -862,7 +866,7 @@ really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
||||||
uint64_t nv = *(const uint64_t *)"null ";
|
uint64_t nv = *reinterpret_cast<const uint64_t *>("null ");
|
||||||
uint64_t mask4 = 0x00000000ffffffff;
|
uint64_t mask4 = 0x00000000ffffffff;
|
||||||
uint32_t error = 0;
|
uint32_t error = 0;
|
||||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||||
|
@ -957,11 +961,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
// we need to make a copy to make sure that the string is NULL terminated.
|
// we need to make a copy to make sure that the string is NULL terminated.
|
||||||
// this only applies to the JSON document made solely of the true value.
|
// this only applies to the JSON document made solely of the true value.
|
||||||
// this will almost never be called in practice
|
// this will almost never be called in practice
|
||||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||||
if(copy == NULL) goto fail;
|
if(copy == nullptr) { goto fail;
|
||||||
|
}
|
||||||
memcpy(copy, buf, len);
|
memcpy(copy, buf, len);
|
||||||
copy[len] = '\0';
|
copy[len] = '\0';
|
||||||
if (!is_valid_true_atom((const uint8_t *)copy + idx)) {
|
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||||
free(copy);
|
free(copy);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -973,11 +978,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
// we need to make a copy to make sure that the string is NULL terminated.
|
// we need to make a copy to make sure that the string is NULL terminated.
|
||||||
// this only applies to the JSON document made solely of the false value.
|
// this only applies to the JSON document made solely of the false value.
|
||||||
// this will almost never be called in practice
|
// this will almost never be called in practice
|
||||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||||
if(copy == NULL) goto fail;
|
if(copy == nullptr) { goto fail;
|
||||||
|
}
|
||||||
memcpy(copy, buf, len);
|
memcpy(copy, buf, len);
|
||||||
copy[len] = '\0';
|
copy[len] = '\0';
|
||||||
if (!is_valid_false_atom((const uint8_t *)copy + idx)) {
|
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||||
free(copy);
|
free(copy);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -989,11 +995,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
// we need to make a copy to make sure that the string is NULL terminated.
|
// we need to make a copy to make sure that the string is NULL terminated.
|
||||||
// this only applies to the JSON document made solely of the null value.
|
// this only applies to the JSON document made solely of the null value.
|
||||||
// this will almost never be called in practice
|
// this will almost never be called in practice
|
||||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||||
if(copy == NULL) goto fail;
|
if(copy == nullptr) { goto fail;
|
||||||
|
}
|
||||||
memcpy(copy, buf, len);
|
memcpy(copy, buf, len);
|
||||||
copy[len] = '\0';
|
copy[len] = '\0';
|
||||||
if (!is_valid_null_atom((const uint8_t *)copy + idx)) {
|
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||||
free(copy);
|
free(copy);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -1014,11 +1021,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
// we need to make a copy to make sure that the string is NULL terminated.
|
// we need to make a copy to make sure that the string is NULL terminated.
|
||||||
// this is done only for JSON documents made of a sole number
|
// this is done only for JSON documents made of a sole number
|
||||||
// this will almost never be called in practice
|
// this will almost never be called in practice
|
||||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||||
if(copy == NULL) goto fail;
|
if(copy == nullptr) { goto fail;
|
||||||
|
}
|
||||||
memcpy(copy, buf, len);
|
memcpy(copy, buf, len);
|
||||||
copy[len] = '\0';
|
copy[len] = '\0';
|
||||||
if (!parse_number((const uint8_t *)copy, pj, idx, false)) {
|
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
|
||||||
free(copy);
|
free(copy);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -1029,11 +1037,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||||
// we need to make a copy to make sure that the string is NULL terminated.
|
// we need to make a copy to make sure that the string is NULL terminated.
|
||||||
// this is done only for JSON documents made of a sole number
|
// this is done only for JSON documents made of a sole number
|
||||||
// this will almost never be called in practice
|
// this will almost never be called in practice
|
||||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||||
if(copy == NULL) goto fail;
|
if(copy == nullptr) { goto fail;
|
||||||
|
}
|
||||||
memcpy(copy, buf, len);
|
memcpy(copy, buf, len);
|
||||||
copy[len] = '\0';
|
copy[len] = '\0';
|
||||||
if (!parse_number((const uint8_t *)copy, pj, idx, true)) {
|
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
|
||||||
free(copy);
|
free(copy);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -1325,38 +1334,37 @@ succeed:
|
||||||
fail:
|
fail:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
/* end file /home/geoff/git/simdjson/src/stage2_build_tape.cpp */
|
/* end file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
|
||||||
/* begin file /home/geoff/git/simdjson/src/parsedjson.cpp */
|
/* begin file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
|
||||||
|
|
||||||
ParsedJson::ParsedJson() : bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
|
ParsedJson::ParsedJson() :
|
||||||
current_loc(0), n_structural_indexes(0),
|
structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
|
||||||
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
|
ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
|
||||||
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
|
|
||||||
|
|
||||||
ParsedJson::~ParsedJson() {
|
ParsedJson::~ParsedJson() {
|
||||||
deallocate();
|
deallocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedJson::ParsedJson(ParsedJson && p)
|
ParsedJson::ParsedJson(ParsedJson && p)
|
||||||
: bytecapacity(std::move(p.bytecapacity)),
|
: bytecapacity(p.bytecapacity),
|
||||||
depthcapacity(std::move(p.depthcapacity)),
|
depthcapacity(p.depthcapacity),
|
||||||
tapecapacity(std::move(p.tapecapacity)),
|
tapecapacity(p.tapecapacity),
|
||||||
stringcapacity(std::move(p.stringcapacity)),
|
stringcapacity(p.stringcapacity),
|
||||||
current_loc(std::move(p.current_loc)),
|
current_loc(p.current_loc),
|
||||||
n_structural_indexes(std::move(p.n_structural_indexes)),
|
n_structural_indexes(p.n_structural_indexes),
|
||||||
structural_indexes(std::move(p.structural_indexes)),
|
structural_indexes(p.structural_indexes),
|
||||||
tape(std::move(p.tape)),
|
tape(p.tape),
|
||||||
containing_scope_offset(std::move(p.containing_scope_offset)),
|
containing_scope_offset(p.containing_scope_offset),
|
||||||
ret_address(std::move(p.ret_address)),
|
ret_address(p.ret_address),
|
||||||
string_buf(std::move(p.string_buf)),
|
string_buf(p.string_buf),
|
||||||
current_string_buf_loc(std::move(p.current_string_buf_loc)),
|
current_string_buf_loc(p.current_string_buf_loc),
|
||||||
isvalid(std::move(p.isvalid)) {
|
isvalid(p.isvalid) {
|
||||||
p.structural_indexes=NULL;
|
p.structural_indexes=nullptr;
|
||||||
p.tape=NULL;
|
p.tape=nullptr;
|
||||||
p.containing_scope_offset=NULL;
|
p.containing_scope_offset=nullptr;
|
||||||
p.ret_address=NULL;
|
p.ret_address=nullptr;
|
||||||
p.string_buf=NULL;
|
p.string_buf=nullptr;
|
||||||
p.current_string_buf_loc=NULL;
|
p.current_string_buf_loc=nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1368,8 +1376,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
if ((len <= bytecapacity) && (depthcapacity < maxdepth))
|
if ((len <= bytecapacity) && (depthcapacity < maxdepth)) {
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
deallocate();
|
deallocate();
|
||||||
}
|
}
|
||||||
isvalid = false;
|
isvalid = false;
|
||||||
|
@ -1387,14 +1396,15 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
|
||||||
#else
|
#else
|
||||||
ret_address = new (std::nothrow) char[maxdepth];
|
ret_address = new (std::nothrow) char[maxdepth];
|
||||||
#endif
|
#endif
|
||||||
if ((string_buf == NULL) || (tape == NULL) ||
|
if ((string_buf == nullptr) || (tape == nullptr) ||
|
||||||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) {
|
(containing_scope_offset == nullptr) || (ret_address == nullptr) || (structural_indexes == nullptr)) {
|
||||||
std::cerr << "Could not allocate memory" << std::endl;
|
std::cerr << "Could not allocate memory" << std::endl;
|
||||||
if(ret_address != NULL) delete[] ret_address;
|
delete[] ret_address;
|
||||||
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
|
delete[] containing_scope_offset;
|
||||||
if(tape != NULL) delete[] tape;
|
delete[] tape;
|
||||||
if(string_buf != NULL) delete[] string_buf;
|
delete[] string_buf;
|
||||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
delete[] structural_indexes;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1414,11 +1424,16 @@ void ParsedJson::deallocate() {
|
||||||
depthcapacity = 0;
|
depthcapacity = 0;
|
||||||
tapecapacity = 0;
|
tapecapacity = 0;
|
||||||
stringcapacity = 0;
|
stringcapacity = 0;
|
||||||
if(ret_address != NULL) delete[] ret_address;
|
{delete[] ret_address;
|
||||||
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
|
}
|
||||||
if(tape != NULL) delete[] tape;
|
{delete[] containing_scope_offset;
|
||||||
if(string_buf != NULL) delete[] string_buf;
|
}
|
||||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
{delete[] tape;
|
||||||
|
}
|
||||||
|
{delete[] string_buf;
|
||||||
|
}
|
||||||
|
{delete[] structural_indexes;
|
||||||
|
}
|
||||||
isvalid = false;
|
isvalid = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1430,7 +1445,8 @@ void ParsedJson::init() {
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
bool ParsedJson::printjson(std::ostream &os) {
|
bool ParsedJson::printjson(std::ostream &os) {
|
||||||
if(!isvalid) return false;
|
if(!isvalid) { return false;
|
||||||
|
}
|
||||||
size_t tapeidx = 0;
|
size_t tapeidx = 0;
|
||||||
uint64_t tape_val = tape[tapeidx];
|
uint64_t tape_val = tape[tapeidx];
|
||||||
uint8_t type = (tape_val >> 56);
|
uint8_t type = (tape_val >> 56);
|
||||||
|
@ -1448,7 +1464,7 @@ bool ParsedJson::printjson(std::ostream &os) {
|
||||||
}
|
}
|
||||||
tapeidx++;
|
tapeidx++;
|
||||||
bool *inobject = new bool[depthcapacity];
|
bool *inobject = new bool[depthcapacity];
|
||||||
size_t *inobjectidx = new size_t[depthcapacity];
|
auto *inobjectidx = new size_t[depthcapacity];
|
||||||
int depth = 1; // only root at level 0
|
int depth = 1; // only root at level 0
|
||||||
inobjectidx[depth] = 0;
|
inobjectidx[depth] = 0;
|
||||||
inobject[depth] = false;
|
inobject[depth] = false;
|
||||||
|
@ -1457,15 +1473,18 @@ bool ParsedJson::printjson(std::ostream &os) {
|
||||||
uint64_t payload = tape_val & JSONVALUEMASK;
|
uint64_t payload = tape_val & JSONVALUEMASK;
|
||||||
type = (tape_val >> 56);
|
type = (tape_val >> 56);
|
||||||
if (!inobject[depth]) {
|
if (!inobject[depth]) {
|
||||||
if ((inobjectidx[depth] > 0) && (type != ']'))
|
if ((inobjectidx[depth] > 0) && (type != ']')) {
|
||||||
os << ",";
|
os << ",";
|
||||||
|
}
|
||||||
inobjectidx[depth]++;
|
inobjectidx[depth]++;
|
||||||
} else { // if (inobject) {
|
} else { // if (inobject) {
|
||||||
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
|
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
|
||||||
(type != '}'))
|
(type != '}')) {
|
||||||
os << ",";
|
os << ",";
|
||||||
if (((inobjectidx[depth] & 1) == 1))
|
}
|
||||||
|
if (((inobjectidx[depth] & 1) == 1)) {
|
||||||
os << ":";
|
os << ":";
|
||||||
|
}
|
||||||
inobjectidx[depth]++;
|
inobjectidx[depth]++;
|
||||||
}
|
}
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
@ -1475,13 +1494,15 @@ bool ParsedJson::printjson(std::ostream &os) {
|
||||||
os << '"';
|
os << '"';
|
||||||
break;
|
break;
|
||||||
case 'l': // we have a long int
|
case 'l': // we have a long int
|
||||||
if (tapeidx + 1 >= howmany)
|
if (tapeidx + 1 >= howmany) {
|
||||||
return false;
|
return false;
|
||||||
os << (int64_t)tape[++tapeidx];
|
}
|
||||||
|
os << static_cast<int64_t>(tape[++tapeidx]);
|
||||||
break;
|
break;
|
||||||
case 'd': // we have a double
|
case 'd': // we have a double
|
||||||
if (tapeidx + 1 >= howmany)
|
if (tapeidx + 1 >= howmany) {
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
double answer;
|
double answer;
|
||||||
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
||||||
os << answer;
|
os << answer;
|
||||||
|
@ -1534,7 +1555,8 @@ bool ParsedJson::printjson(std::ostream &os) {
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
||||||
if(!isvalid) return false;
|
if(!isvalid) { return false;
|
||||||
|
}
|
||||||
size_t tapeidx = 0;
|
size_t tapeidx = 0;
|
||||||
uint64_t tape_val = tape[tapeidx];
|
uint64_t tape_val = tape[tapeidx];
|
||||||
uint8_t type = (tape_val >> 56);
|
uint8_t type = (tape_val >> 56);
|
||||||
|
@ -1562,14 +1584,16 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
||||||
os << '\n';
|
os << '\n';
|
||||||
break;
|
break;
|
||||||
case 'l': // we have a long int
|
case 'l': // we have a long int
|
||||||
if (tapeidx + 1 >= howmany)
|
if (tapeidx + 1 >= howmany) {
|
||||||
return false;
|
return false;
|
||||||
os << "integer " << (int64_t)tape[++tapeidx] << "\n";
|
}
|
||||||
|
os << "integer " << static_cast<int64_t>(tape[++tapeidx]) << "\n";
|
||||||
break;
|
break;
|
||||||
case 'd': // we have a double
|
case 'd': // we have a double
|
||||||
os << "float ";
|
os << "float ";
|
||||||
if (tapeidx + 1 >= howmany)
|
if (tapeidx + 1 >= howmany) {
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
double answer;
|
double answer;
|
||||||
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
||||||
os << answer << '\n';
|
os << answer << '\n';
|
||||||
|
@ -1608,13 +1632,14 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
||||||
os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
|
os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
/* end file /home/geoff/git/simdjson/src/parsedjson.cpp */
|
/* end file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
|
||||||
/* begin file /home/geoff/git/simdjson/src/parsedjsoniterator.cpp */
|
/* begin file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */
|
||||||
|
|
||||||
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) {
|
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
|
||||||
if(pj.isValid()) {
|
if(pj.isValid()) {
|
||||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||||
if(depthindex == NULL) return;
|
if(depthindex == nullptr) { return;
|
||||||
|
}
|
||||||
depthindex[0].start_of_scope = location;
|
depthindex[0].start_of_scope = location;
|
||||||
current_val = pj.tape[location++];
|
current_val = pj.tape[location++];
|
||||||
current_type = (current_val >> 56);
|
current_type = (current_val >> 56);
|
||||||
|
@ -1639,9 +1664,9 @@ ParsedJson::iterator::~iterator() {
|
||||||
ParsedJson::iterator::iterator(const iterator &o):
|
ParsedJson::iterator::iterator(const iterator &o):
|
||||||
pj(o.pj), depth(o.depth), location(o.location),
|
pj(o.pj), depth(o.depth), location(o.location),
|
||||||
tape_length(o.tape_length), current_type(o.current_type),
|
tape_length(o.tape_length), current_type(o.current_type),
|
||||||
current_val(o.current_val), depthindex(NULL) {
|
current_val(o.current_val), depthindex(nullptr) {
|
||||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||||
if(depthindex != NULL) {
|
if(depthindex != nullptr) {
|
||||||
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
|
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
|
||||||
} else {
|
} else {
|
||||||
tape_length = 0;
|
tape_length = 0;
|
||||||
|
@ -1649,10 +1674,10 @@ ParsedJson::iterator::iterator(const iterator &o):
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedJson::iterator::iterator(iterator &&o):
|
ParsedJson::iterator::iterator(iterator &&o):
|
||||||
pj(o.pj), depth(std::move(o.depth)), location(std::move(o.location)),
|
pj(o.pj), depth(o.depth), location(o.location),
|
||||||
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)),
|
tape_length(o.tape_length), current_type(o.current_type),
|
||||||
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) {
|
current_val(o.current_val), depthindex(o.depthindex) {
|
||||||
o.depthindex = NULL;// we take ownership
|
o.depthindex = nullptr;// we take ownership
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
|
@ -1716,19 +1741,21 @@ uint8_t ParsedJson::iterator::get_type() const {
|
||||||
|
|
||||||
|
|
||||||
int64_t ParsedJson::iterator::get_integer() const {
|
int64_t ParsedJson::iterator::get_integer() const {
|
||||||
if(location + 1 >= tape_length) return 0;// default value in case of error
|
if(location + 1 >= tape_length) { return 0;// default value in case of error
|
||||||
return (int64_t) pj.tape[location + 1];
|
}
|
||||||
|
return static_cast<int64_t>(pj.tape[location + 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
double ParsedJson::iterator::get_double() const {
|
double ParsedJson::iterator::get_double() const {
|
||||||
if(location + 1 >= tape_length) return NAN;// default value in case of error
|
if(location + 1 >= tape_length) { return NAN;// default value in case of error
|
||||||
|
}
|
||||||
double answer;
|
double answer;
|
||||||
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
|
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * ParsedJson::iterator::get_string() const {
|
const char * ParsedJson::iterator::get_string() const {
|
||||||
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
|
return reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1766,7 +1793,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
||||||
assert(is_string());
|
assert(is_string());
|
||||||
bool rightkey = (strcmp(get_string(),key)==0);
|
bool rightkey = (strcmp(get_string(),key)==0);
|
||||||
next();
|
next();
|
||||||
if(rightkey) return true;
|
if(rightkey) { return true;
|
||||||
|
}
|
||||||
} while(next());
|
} while(next());
|
||||||
assert(up());// not found
|
assert(up());// not found
|
||||||
}
|
}
|
||||||
|
@ -1790,9 +1818,10 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
||||||
current_val = nextval;
|
current_val = nextval;
|
||||||
current_type = nexttype;
|
current_type = nexttype;
|
||||||
return true;
|
return true;
|
||||||
} else {
|
}
|
||||||
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
|
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
|
||||||
if(location + increment >= tape_length) return false;
|
if(location + increment >= tape_length) { return false;
|
||||||
|
}
|
||||||
uint64_t nextval = pj.tape[location + increment];
|
uint64_t nextval = pj.tape[location + increment];
|
||||||
uint8_t nexttype = (nextval >> 56);
|
uint8_t nexttype = (nextval >> 56);
|
||||||
if((nexttype == ']') || (nexttype == '}')) {
|
if((nexttype == ']') || (nexttype == '}')) {
|
||||||
|
@ -1802,12 +1831,13 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
||||||
current_val = nextval;
|
current_val = nextval;
|
||||||
current_type = nexttype;
|
current_type = nexttype;
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool ParsedJson::iterator::prev() {
|
bool ParsedJson::iterator::prev() {
|
||||||
if(location - 1 < depthindex[depth].start_of_scope) return false;
|
if(location - 1 < depthindex[depth].start_of_scope) { return false;
|
||||||
|
}
|
||||||
location -= 1;
|
location -= 1;
|
||||||
current_val = pj.tape[location];
|
current_val = pj.tape[location];
|
||||||
current_type = (current_val >> 56);
|
current_type = (current_val >> 56);
|
||||||
|
@ -1840,7 +1870,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
||||||
|
|
||||||
|
|
||||||
bool ParsedJson::iterator::down() {
|
bool ParsedJson::iterator::down() {
|
||||||
if(location + 1 >= tape_length) return false;
|
if(location + 1 >= tape_length) { return false;
|
||||||
|
}
|
||||||
if ((current_type == '[') || (current_type == '{')) {
|
if ((current_type == '[') || (current_type == '{')) {
|
||||||
size_t npos = (current_val & JSONVALUEMASK);
|
size_t npos = (current_val & JSONVALUEMASK);
|
||||||
if(npos == location + 2) {
|
if(npos == location + 2) {
|
||||||
|
@ -1864,7 +1895,8 @@ void ParsedJson::iterator::to_start_scope() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
|
bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
|
||||||
if(!isOk()) return false;
|
if(!isOk()) { return false;
|
||||||
|
}
|
||||||
switch (current_type) {
|
switch (current_type) {
|
||||||
case '"': // we have a string
|
case '"': // we have a string
|
||||||
os << '"';
|
os << '"';
|
||||||
|
@ -1894,11 +1926,11 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
|
||||||
case '}': // we end an object
|
case '}': // we end an object
|
||||||
case '[': // we start an array
|
case '[': // we start an array
|
||||||
case ']': // we end an array
|
case ']': // we end an array
|
||||||
os << (char) current_type;
|
os << static_cast<char>(current_type);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
/* end file /home/geoff/git/simdjson/src/parsedjsoniterator.cpp */
|
/* end file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */
|
||||||
|
|
|
@ -1,5 +1,17 @@
|
||||||
/* auto-generated on Tue 26 Feb 13:29:52 AEDT 2019. Do not edit! */
|
/* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/portability.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdjson_version.h */
|
||||||
|
// /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand
|
||||||
|
#ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||||
|
#define SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||||
|
#define SIMDJSON_VERSION = 0.0.1,
|
||||||
|
enum {
|
||||||
|
SIMDJSON_VERSION_MAJOR = 0,
|
||||||
|
SIMDJSON_VERSION_MINOR = 0,
|
||||||
|
SIMDJSON_VERSION_REVISION = 1
|
||||||
|
};
|
||||||
|
#endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||||
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdjson_version.h */
|
||||||
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
|
||||||
#ifndef SIMDJSON_PORTABILITY_H
|
#ifndef SIMDJSON_PORTABILITY_H
|
||||||
#define SIMDJSON_PORTABILITY_H
|
#define SIMDJSON_PORTABILITY_H
|
||||||
|
|
||||||
|
@ -44,8 +56,8 @@ static inline int hamming(uint64_t input_num) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#include <x86intrin.h>
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <x86intrin.h>
|
||||||
|
|
||||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||||
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
|
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
|
||||||
|
@ -88,7 +100,7 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
|
||||||
#else
|
#else
|
||||||
// somehow, if this is used before including "x86intrin.h", it creates an
|
// somehow, if this is used before including "x86intrin.h", it creates an
|
||||||
// implicit defined warning.
|
// implicit defined warning.
|
||||||
if (posix_memalign(&p, alignment, size) != 0) return NULL;
|
if (posix_memalign(&p, alignment, size) != 0) { return nullptr; }
|
||||||
#endif
|
#endif
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
@ -116,7 +128,7 @@ static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
||||||
|
|
||||||
|
|
||||||
static inline void aligned_free(void *memblock) {
|
static inline void aligned_free(void *memblock) {
|
||||||
if(memblock == NULL) return;
|
if(memblock == nullptr) { return; }
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
_aligned_free(memblock);
|
_aligned_free(memblock);
|
||||||
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||||
|
@ -126,9 +138,9 @@ static inline void aligned_free(void *memblock) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* end of include PORTABILITY_H */
|
#endif // SIMDJSON_PORTABILITY_H
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/portability.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/common_defs.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
|
||||||
#ifndef SIMDJSON_COMMON_DEFS_H
|
#ifndef SIMDJSON_COMMON_DEFS_H
|
||||||
#define SIMDJSON_COMMON_DEFS_H
|
#define SIMDJSON_COMMON_DEFS_H
|
||||||
|
|
||||||
|
@ -186,9 +198,9 @@ static inline void aligned_free(void *memblock) {
|
||||||
|
|
||||||
#endif // MSC_VER
|
#endif // MSC_VER
|
||||||
|
|
||||||
#endif // COMMON_DEFS_H
|
#endif // SIMDJSON_COMMON_DEFS_H
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/common_defs.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/jsoncharutils.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsoncharutils.h */
|
||||||
#ifndef SIMDJSON_JSONCHARUTILS_H
|
#ifndef SIMDJSON_JSONCHARUTILS_H
|
||||||
#define SIMDJSON_JSONCHARUTILS_H
|
#define SIMDJSON_JSONCHARUTILS_H
|
||||||
|
|
||||||
|
@ -286,7 +298,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
|
||||||
if (cp <= 0x7F) {
|
if (cp <= 0x7F) {
|
||||||
c[0] = cp;
|
c[0] = cp;
|
||||||
return 1; // ascii
|
return 1; // ascii
|
||||||
} else if (cp <= 0x7FF) {
|
} if (cp <= 0x7FF) {
|
||||||
c[0] = (cp >> 6) + 192;
|
c[0] = (cp >> 6) + 192;
|
||||||
c[1] = (cp & 63) + 128;
|
c[1] = (cp & 63) + 128;
|
||||||
return 2; // universal plane
|
return 2; // universal plane
|
||||||
|
@ -310,17 +322,17 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/jsoncharutils.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsoncharutils.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonformatutils.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonformatutils.h */
|
||||||
#ifndef SIMDJSON_JSONFORMATUTILS_H
|
#ifndef SIMDJSON_JSONFORMATUTILS_H
|
||||||
#define SIMDJSON_JSONFORMATUTILS_H
|
#define SIMDJSON_JSONFORMATUTILS_H
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <cstdio>
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
static inline void print_with_escapes(const unsigned char *src) {
|
static inline void print_with_escapes(const unsigned char *src) {
|
||||||
while (*src) {
|
while (*src != 0u) {
|
||||||
switch (*src) {
|
switch (*src) {
|
||||||
case '\b':
|
case '\b':
|
||||||
putchar('\\');
|
putchar('\\');
|
||||||
|
@ -353,15 +365,16 @@ static inline void print_with_escapes(const unsigned char *src) {
|
||||||
default:
|
default:
|
||||||
if (*src <= 0x1F) {
|
if (*src <= 0x1F) {
|
||||||
printf("\\u%04x", *src);
|
printf("\\u%04x", *src);
|
||||||
} else
|
} else {
|
||||||
putchar(*src);
|
putchar(*src);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
|
static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
|
||||||
while (*src) {
|
while (*src != 0u) {
|
||||||
switch (*src) {
|
switch (*src) {
|
||||||
case '\b':
|
case '\b':
|
||||||
os << '\\';
|
os << '\\';
|
||||||
|
@ -394,22 +407,23 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
|
||||||
default:
|
default:
|
||||||
if (*src <= 0x1F) {
|
if (*src <= 0x1F) {
|
||||||
std::ios::fmtflags f(os.flags());
|
std::ios::fmtflags f(os.flags());
|
||||||
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src;
|
os << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*src);
|
||||||
os.flags(f);
|
os.flags(f);
|
||||||
} else
|
} else {
|
||||||
os << *src;
|
os << *src;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void print_with_escapes(const char *src, std::ostream &os) {
|
static inline void print_with_escapes(const char *src, std::ostream &os) {
|
||||||
print_with_escapes((const unsigned char *)src, os);
|
print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/jsonformatutils.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonformatutils.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonioutil.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonioutil.h */
|
||||||
#ifndef SIMDJSON_JSONIOUTIL_H
|
#ifndef SIMDJSON_JSONIOUTIL_H
|
||||||
#define SIMDJSON_JSONIOUTIL_H
|
#define SIMDJSON_JSONIOUTIL_H
|
||||||
|
|
||||||
|
@ -445,12 +459,12 @@ char * allocate_padded_buffer(size_t length);
|
||||||
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio
|
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio
|
||||||
// std::cout << "Could not load the file " << filename << std::endl;
|
// std::cout << "Could not load the file " << filename << std::endl;
|
||||||
// }
|
// }
|
||||||
std::string_view get_corpus(std::string filename);
|
std::string_view get_corpus(const std::string& filename);
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/jsonioutil.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonioutil.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/simdprune_tables.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdprune_tables.h */
|
||||||
#ifndef SIMDJSON_SIMDPRUNE_TABLES_H
|
#ifndef SIMDJSON_SIMDPRUNE_TABLES_H
|
||||||
#define SIMDJSON_SIMDPRUNE_TABLES_H
|
#define SIMDJSON_SIMDPRUNE_TABLES_H
|
||||||
|
|
||||||
|
@ -35441,7 +35455,7 @@ static const unsigned char mask128_epi32[] = {
|
||||||
|
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <cstdint>
|
||||||
|
|
||||||
static const uint32_t mask256_epi32[] = {
|
static const uint32_t mask256_epi32[] = {
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,
|
0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,
|
||||||
|
@ -35529,13 +35543,13 @@ static const uint32_t mask256_epi32[] = {
|
||||||
#endif //__AVX2__
|
#endif //__AVX2__
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/simdprune_tables.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdprune_tables.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/simdutf8check.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdutf8check.h */
|
||||||
|
|
||||||
#ifndef SIMDJSON_SIMDUTF8CHECK_H
|
#ifndef SIMDJSON_SIMDUTF8CHECK_H
|
||||||
#define SIMDJSON_SIMDUTF8CHECK_H
|
#define SIMDJSON_SIMDUTF8CHECK_H
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -35700,7 +35714,7 @@ static struct avx_processed_utf_bytes
|
||||||
avxcheckUTF8Bytes(__m256i current_bytes,
|
avxcheckUTF8Bytes(__m256i current_bytes,
|
||||||
struct avx_processed_utf_bytes *previous,
|
struct avx_processed_utf_bytes *previous,
|
||||||
__m256i *has_error) {
|
__m256i *has_error) {
|
||||||
struct avx_processed_utf_bytes pb;
|
struct avx_processed_utf_bytes pb{};
|
||||||
avx_count_nibbles(current_bytes, &pb);
|
avx_count_nibbles(current_bytes, &pb);
|
||||||
|
|
||||||
avxcheckSmallerThan0xF4(current_bytes, has_error);
|
avxcheckSmallerThan0xF4(current_bytes, has_error);
|
||||||
|
@ -35725,8 +35739,8 @@ avxcheckUTF8Bytes(__m256i current_bytes,
|
||||||
#warning "We require AVX2 support!"
|
#warning "We require AVX2 support!"
|
||||||
#endif // __AVX2__
|
#endif // __AVX2__
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/simdutf8check.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdutf8check.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonminifier.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonminifier.h */
|
||||||
#ifndef SIMDJSON_JSONMINIFIER_H
|
#ifndef SIMDJSON_JSONMINIFIER_H
|
||||||
#define SIMDJSON_JSONMINIFIER_H
|
#define SIMDJSON_JSONMINIFIER_H
|
||||||
|
|
||||||
|
@ -35740,7 +35754,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
|
||||||
|
|
||||||
|
|
||||||
static inline size_t jsonminify(const char *buf, size_t len, char *out) {
|
static inline size_t jsonminify(const char *buf, size_t len, char *out) {
|
||||||
return jsonminify((const uint8_t *)buf, len, (uint8_t *)out);
|
return jsonminify(reinterpret_cast<const uint8_t *>(buf), len, reinterpret_cast<uint8_t *>(out));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -35749,11 +35763,12 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/jsonminifier.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonminifier.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/parsedjson.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/parsedjson.h */
|
||||||
#ifndef SIMDJSON_PARSEDJSON_H
|
#ifndef SIMDJSON_PARSEDJSON_H
|
||||||
#define SIMDJSON_PARSEDJSON_H
|
#define SIMDJSON_PARSEDJSON_H
|
||||||
|
|
||||||
|
#include <cinttypes>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
@ -35817,12 +35832,12 @@ public:
|
||||||
|
|
||||||
// this should be considered a private function
|
// this should be considered a private function
|
||||||
really_inline void write_tape(uint64_t val, uint8_t c) {
|
really_inline void write_tape(uint64_t val, uint8_t c) {
|
||||||
tape[current_loc++] = val | (((uint64_t)c) << 56);
|
tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void write_tape_s64(int64_t i) {
|
really_inline void write_tape_s64(int64_t i) {
|
||||||
write_tape(0, 'l');
|
write_tape(0, 'l');
|
||||||
tape[current_loc++] = *((uint64_t *)&i);
|
tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline void write_tape_double(double d) {
|
really_inline void write_tape_double(double d) {
|
||||||
|
@ -35942,7 +35957,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
iterator& operator=(const iterator& other) ;
|
iterator& operator=(const iterator& other) = delete ;
|
||||||
|
|
||||||
ParsedJson &pj;
|
ParsedJson &pj;
|
||||||
size_t depth;
|
size_t depth;
|
||||||
|
@ -35953,13 +35968,13 @@ private:
|
||||||
scopeindex_t *depthindex;
|
scopeindex_t *depthindex;
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t bytecapacity; // indicates how many bits are meant to be supported
|
size_t bytecapacity{0}; // indicates how many bits are meant to be supported
|
||||||
|
|
||||||
size_t depthcapacity; // how deep we can go
|
size_t depthcapacity{0}; // how deep we can go
|
||||||
size_t tapecapacity;
|
size_t tapecapacity{0};
|
||||||
size_t stringcapacity;
|
size_t stringcapacity{0};
|
||||||
uint32_t current_loc;
|
uint32_t current_loc{0};
|
||||||
uint32_t n_structural_indexes;
|
uint32_t n_structural_indexes{0};
|
||||||
|
|
||||||
uint32_t *structural_indexes;
|
uint32_t *structural_indexes;
|
||||||
|
|
||||||
|
@ -35973,10 +35988,13 @@ private:
|
||||||
|
|
||||||
uint8_t *string_buf; // should be at least bytecapacity
|
uint8_t *string_buf; // should be at least bytecapacity
|
||||||
uint8_t *current_string_buf_loc;
|
uint8_t *current_string_buf_loc;
|
||||||
bool isvalid;
|
bool isvalid{false};
|
||||||
|
|
||||||
private :
|
private :
|
||||||
ParsedJson(const ParsedJson & p) = delete;
|
|
||||||
|
// we don't want the default constructor to be called
|
||||||
|
ParsedJson(const ParsedJson & p) = delete; // we don't want the default constructor to be called
|
||||||
|
// we don't want the assignment to be called
|
||||||
ParsedJson & operator=(const ParsedJson&o) = delete;
|
ParsedJson & operator=(const ParsedJson&o) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -35984,22 +36002,22 @@ private :
|
||||||
// dump bits low to high
|
// dump bits low to high
|
||||||
inline void dumpbits_always(uint64_t v, const std::string &msg) {
|
inline void dumpbits_always(uint64_t v, const std::string &msg) {
|
||||||
for (uint32_t i = 0; i < 64; i++) {
|
for (uint32_t i = 0; i < 64; i++) {
|
||||||
std::cout << (((v >> (uint64_t)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
std::cout << " " << msg.c_str() << "\n";
|
std::cout << " " << msg.c_str() << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void dumpbits32_always(uint32_t v, const std::string &msg) {
|
inline void dumpbits32_always(uint32_t v, const std::string &msg) {
|
||||||
for (uint32_t i = 0; i < 32; i++) {
|
for (uint32_t i = 0; i < 32; i++) {
|
||||||
std::cout << (((v >> (uint32_t)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
std::cout << " " << msg.c_str() << "\n";
|
std::cout << " " << msg.c_str() << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/parsedjson.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/parsedjson.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/stage1_find_marks.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
|
||||||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
||||||
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
||||||
|
|
||||||
|
@ -36009,12 +36027,12 @@ bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||||
return find_structural_bits((const uint8_t *)buf, len, pj);
|
return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/stage1_find_marks.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/stringparsing.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
|
||||||
#ifndef SIMDJSON_STRINGPARSING_H
|
#ifndef SIMDJSON_STRINGPARSING_H
|
||||||
#define SIMDJSON_STRINGPARSING_H
|
#define SIMDJSON_STRINGPARSING_H
|
||||||
|
|
||||||
|
@ -36099,11 +36117,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
||||||
uint8_t *const start_of_string = dst;
|
uint8_t *const start_of_string = dst;
|
||||||
#endif
|
#endif
|
||||||
while (1) {
|
while (1) {
|
||||||
__m256i v = _mm256_loadu_si256((const __m256i *)(src));
|
__m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
|
||||||
uint32_t bs_bits =
|
auto bs_bits =
|
||||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
|
static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))));
|
||||||
uint32_t quote_bits =
|
auto quote_bits =
|
||||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
|
static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))));
|
||||||
#define CHECKUNESCAPED
|
#define CHECKUNESCAPED
|
||||||
// All Unicode characters may be placed within the
|
// All Unicode characters may be placed within the
|
||||||
// quotation marks, except for the characters that MUST be escaped:
|
// quotation marks, except for the characters that MUST be escaped:
|
||||||
|
@ -36119,7 +36137,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
||||||
uint32_t bs_dist = trailingzeroes(bs_bits);
|
uint32_t bs_dist = trailingzeroes(bs_bits);
|
||||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||||
// later
|
// later
|
||||||
_mm256_storeu_si256((__m256i *)(dst), v);
|
_mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
|
||||||
if (quote_dist < bs_dist) {
|
if (quote_dist < bs_dist) {
|
||||||
// we encountered quotes first. Move dst to point to quotes and exit
|
// we encountered quotes first. Move dst to point to quotes and exit
|
||||||
dst[quote_dist] = 0; // null terminate and get out
|
dst[quote_dist] = 0; // null terminate and get out
|
||||||
|
@ -36129,7 +36147,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
||||||
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
|
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
|
||||||
#ifdef CHECKUNESCAPED
|
#ifdef CHECKUNESCAPED
|
||||||
// check that there is no unescaped char before the quote
|
// check that there is no unescaped char before the quote
|
||||||
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
|
auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
|
||||||
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
|
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
|
||||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||||
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
|
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
|
||||||
|
@ -36142,11 +36160,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
||||||
#endif // JSON_TEST_STRINGS
|
#endif // JSON_TEST_STRINGS
|
||||||
return true;
|
return true;
|
||||||
#endif //CHECKUNESCAPED
|
#endif //CHECKUNESCAPED
|
||||||
} else if (quote_dist > bs_dist) {
|
} if (quote_dist > bs_dist) {
|
||||||
uint8_t escape_char = src[bs_dist + 1];
|
uint8_t escape_char = src[bs_dist + 1];
|
||||||
#ifdef CHECKUNESCAPED
|
#ifdef CHECKUNESCAPED
|
||||||
// we are going to need the unescaped_bits to check for unescaped chars
|
// we are going to need the unescaped_bits to check for unescaped chars
|
||||||
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
|
auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
|
||||||
if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
|
if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
|
||||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||||
foundBadString(buf + offset);
|
foundBadString(buf + offset);
|
||||||
|
@ -36172,7 +36190,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
||||||
// note this may reach beyond the part of the buffer we've actually
|
// note this may reach beyond the part of the buffer we've actually
|
||||||
// seen. I think this is ok
|
// seen. I think this is ok
|
||||||
uint8_t escape_result = escape_map[escape_char];
|
uint8_t escape_result = escape_map[escape_char];
|
||||||
if (!escape_result) {
|
if (escape_result == 0u) {
|
||||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||||
foundBadString(buf + offset);
|
foundBadString(buf + offset);
|
||||||
#endif // JSON_TEST_STRINGS
|
#endif // JSON_TEST_STRINGS
|
||||||
|
@ -36205,8 +36223,8 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/stringparsing.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/numberparsing.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
|
||||||
#ifndef SIMDJSON_NUMBERPARSING_H
|
#ifndef SIMDJSON_NUMBERPARSING_H
|
||||||
#define SIMDJSON_NUMBERPARSING_H
|
#define SIMDJSON_NUMBERPARSING_H
|
||||||
|
|
||||||
|
@ -36346,7 +36364,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
||||||
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
|
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
|
||||||
const __m128i mul_1_10000 =
|
const __m128i mul_1_10000 =
|
||||||
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
|
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
|
||||||
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
|
const __m128i input = _mm_sub_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
|
||||||
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
|
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
|
||||||
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
|
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
|
||||||
const __m128i t3 = _mm_packus_epi32(t2, t2);
|
const __m128i t3 = _mm_packus_epi32(t2, t2);
|
||||||
|
@ -36371,7 +36389,7 @@ static never_inline bool
|
||||||
parse_float(const uint8_t *const buf,
|
parse_float(const uint8_t *const buf,
|
||||||
ParsedJson &pj, const uint32_t offset,
|
ParsedJson &pj, const uint32_t offset,
|
||||||
bool found_minus) {
|
bool found_minus) {
|
||||||
const char *p = (const char *)(buf + offset);
|
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||||
bool negative = false;
|
bool negative = false;
|
||||||
if (found_minus) {
|
if (found_minus) {
|
||||||
++p;
|
++p;
|
||||||
|
@ -36485,7 +36503,7 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
|
||||||
ParsedJson &pj,
|
ParsedJson &pj,
|
||||||
const uint32_t offset,
|
const uint32_t offset,
|
||||||
bool found_minus) {
|
bool found_minus) {
|
||||||
const char *p = (const char *)(buf + offset);
|
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||||
|
|
||||||
bool negative = false;
|
bool negative = false;
|
||||||
if (found_minus) {
|
if (found_minus) {
|
||||||
|
@ -36557,7 +36575,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
||||||
pj.write_tape_s64(0); // always write zero
|
pj.write_tape_s64(0); // always write zero
|
||||||
return true; // always succeeds
|
return true; // always succeeds
|
||||||
#else
|
#else
|
||||||
const char *p = (const char *)(buf + offset);
|
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||||
bool negative = false;
|
bool negative = false;
|
||||||
if (found_minus) {
|
if (found_minus) {
|
||||||
++p;
|
++p;
|
||||||
|
@ -36723,10 +36741,10 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/numberparsing.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/stage2_build_tape.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
|
||||||
#ifndef SIMDJSON_STAGE34_UNIFIED_H
|
#ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||||
#define SIMDJSON_STAGE34_UNIFIED_H
|
#define SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||||
|
|
||||||
|
|
||||||
void init_state_machine();
|
void init_state_machine();
|
||||||
|
@ -36737,12 +36755,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||||
return unified_machine((const uint8_t *)buf,len,pj);
|
return unified_machine(reinterpret_cast<const uint8_t *>(buf),len,pj);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/stage2_build_tape.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
|
||||||
/* begin file /home/geoff/git/simdjson/include/simdjson/jsonparser.h */
|
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
|
||||||
#ifndef SIMDJSON_JSONPARSER_H
|
#ifndef SIMDJSON_JSONPARSER_H
|
||||||
#define SIMDJSON_JSONPARSER_H
|
#define SIMDJSON_JSONPARSER_H
|
||||||
|
|
||||||
|
@ -36771,7 +36789,7 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
||||||
// all bytes at and after buf + len are ignored (can be garbage).
|
// all bytes at and after buf + len are ignored (can be garbage).
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||||
return json_parse((const uint8_t *) buf, len, pj, reallocifneeded);
|
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||||
|
@ -36806,7 +36824,7 @@ WARN_UNUSED
|
||||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||||
// all bytes at and after buf + len are ignored (can be garbage).
|
// all bytes at and after buf + len are ignored (can be garbage).
|
||||||
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
|
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
|
||||||
return build_parsed_json((const uint8_t *) buf, len, reallocifneeded);
|
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
|
||||||
}
|
}
|
||||||
|
|
||||||
// convenience function
|
// convenience function
|
||||||
|
@ -36822,4 +36840,4 @@ inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifnee
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
/* end file /home/geoff/git/simdjson/include/simdjson/jsonparser.h */
|
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
|
||||||
|
|
Loading…
Reference in New Issue