Merge pull request #61 from NewProggie/fix_minor_problems
Fix minor problems
This commit is contained in:
commit
bdc2bc693f
|
@ -1,17 +1,17 @@
|
|||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#include <x86intrin.h>
|
||||
#include <dirent.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
|
@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
|
|||
#ifndef _MSC_VER
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "1vdt")) != -1)
|
||||
while ((c = getopt(argc, argv, "1vdt")) != -1) {
|
||||
switch (c) {
|
||||
case 't':
|
||||
justdata = true;
|
||||
|
@ -67,6 +67,7 @@ int main(int argc, char *argv[]) {
|
|||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
#else
|
||||
int optind = 1;
|
||||
#endif
|
||||
|
@ -78,8 +79,9 @@ int main(int argc, char *argv[]) {
|
|||
if (optind + 1 < argc) {
|
||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
}
|
||||
if (verbose)
|
||||
if (verbose) {
|
||||
cout << "[verbose] loading " << filename << endl;
|
||||
}
|
||||
std::string_view p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
|
@ -87,9 +89,10 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (verbose)
|
||||
if (verbose) {
|
||||
cout << "[verbose] loaded " << filename << " (" << p.size() << " bytes)"
|
||||
<< endl;
|
||||
}
|
||||
#if defined(DEBUG)
|
||||
const uint32_t iterations = 1;
|
||||
#else
|
||||
|
@ -125,8 +128,9 @@ int main(int argc, char *argv[]) {
|
|||
bool isok = true;
|
||||
|
||||
for (uint32_t i = 0; i < iterations; i++) {
|
||||
if (verbose)
|
||||
if (verbose) {
|
||||
cout << "[verbose] iteration # " << i << endl;
|
||||
}
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.start();
|
||||
#endif
|
||||
|
@ -144,8 +148,9 @@ int main(int argc, char *argv[]) {
|
|||
cref0 += results[3];
|
||||
cmis0 += results[4];
|
||||
#endif
|
||||
if (verbose)
|
||||
if (verbose) {
|
||||
cout << "[verbose] allocated memory for parsed JSON " << endl;
|
||||
}
|
||||
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
#ifndef SQUASH_COUNTERS
|
||||
|
@ -248,10 +253,11 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
#endif
|
||||
double min_result = *min_element(res.begin(), res.end());
|
||||
if (!justdata)
|
||||
if (!justdata) {
|
||||
cout << "Min: " << min_result << " bytes read: " << p.size()
|
||||
<< " Gigabytes/second: " << (p.size()) / (min_result * 1000000000.0)
|
||||
<< "\n";
|
||||
}
|
||||
if (jsonoutput) {
|
||||
isok = isok && pj.printjson(std::cout);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#endif //__linux__
|
||||
#endif // _MSC_VER
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "benchmark.h"
|
||||
|
||||
|
||||
|
@ -225,7 +227,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
|
||||
|
||||
jsmntok_t * tokens = new jsmntok_t[p.size()];
|
||||
auto * tokens = make_unique<jsmntok_t[](p.size());
|
||||
if(tokens == NULL) {
|
||||
printf("Failed to alloc memory for jsmn\n");
|
||||
} else {
|
||||
|
@ -234,9 +236,8 @@ int main(int argc, char *argv[]) {
|
|||
memcpy(buffer, p.data(), p.size());
|
||||
buffer[p.size()] = '\0';
|
||||
BEST_TIME("jsmn ",
|
||||
(jsmn_parse(&parser, buffer, p.size(), tokens, p.size()) > 0), true,
|
||||
(jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size()) > 0), true,
|
||||
jsmn_init(&parser), repeat, volume, !justdata);
|
||||
delete[] tokens;
|
||||
}
|
||||
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
|
|
|
@ -42,7 +42,7 @@ struct stat_s {
|
|||
bool valid;
|
||||
};
|
||||
|
||||
typedef struct stat_s stat_t;
|
||||
using stat_t = struct stat_s;
|
||||
|
||||
stat_t simdjson_computestats(const std::string_view &p) {
|
||||
stat_t answer;
|
||||
|
@ -51,9 +51,9 @@ stat_t simdjson_computestats(const std::string_view &p) {
|
|||
if (!answer.valid) {
|
||||
return answer;
|
||||
}
|
||||
answer.backslash_count = count_backslash((const uint8_t *)p.data(), p.size());
|
||||
answer.backslash_count = count_backslash(reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
||||
answer.nonasciibyte_count =
|
||||
count_nonasciibytes((const uint8_t *)p.data(), p.size());
|
||||
count_nonasciibytes(reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
||||
answer.byte_count = p.size();
|
||||
answer.integer_count = 0;
|
||||
answer.float_count = 0;
|
||||
|
@ -115,12 +115,13 @@ stat_t simdjson_computestats(const std::string_view &p) {
|
|||
int main(int argc, char *argv[]) {
|
||||
#ifndef _MSC_VER
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "")) != -1)
|
||||
while ((c = getopt(argc, argv, "")) != -1) {
|
||||
switch (c) {
|
||||
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
#else
|
||||
int optind = 1;
|
||||
#endif
|
||||
|
|
|
@ -56,4 +56,4 @@
|
|||
|
||||
#endif // MSC_VER
|
||||
|
||||
#endif // COMMON_DEFS_H
|
||||
#endif // SIMDJSON_COMMON_DEFS_H
|
||||
|
|
|
@ -97,7 +97,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
|
|||
if (cp <= 0x7F) {
|
||||
c[0] = cp;
|
||||
return 1; // ascii
|
||||
} else if (cp <= 0x7FF) {
|
||||
} if (cp <= 0x7FF) {
|
||||
c[0] = (cp >> 6) + 192;
|
||||
c[1] = (cp & 63) + 128;
|
||||
return 2; // universal plane
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#ifndef SIMDJSON_JSONFORMATUTILS_H
|
||||
#define SIMDJSON_JSONFORMATUTILS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
static inline void print_with_escapes(const unsigned char *src) {
|
||||
while (*src) {
|
||||
while (*src != 0u) {
|
||||
switch (*src) {
|
||||
case '\b':
|
||||
putchar('\\');
|
||||
|
@ -39,15 +39,16 @@ static inline void print_with_escapes(const unsigned char *src) {
|
|||
default:
|
||||
if (*src <= 0x1F) {
|
||||
printf("\\u%04x", *src);
|
||||
} else
|
||||
} else {
|
||||
putchar(*src);
|
||||
}
|
||||
}
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
|
||||
while (*src) {
|
||||
while (*src != 0u) {
|
||||
switch (*src) {
|
||||
case '\b':
|
||||
os << '\\';
|
||||
|
@ -80,17 +81,18 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
|
|||
default:
|
||||
if (*src <= 0x1F) {
|
||||
std::ios::fmtflags f(os.flags());
|
||||
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src;
|
||||
os << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*src);
|
||||
os.flags(f);
|
||||
} else
|
||||
} else {
|
||||
os << *src;
|
||||
}
|
||||
}
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void print_with_escapes(const char *src, std::ostream &os) {
|
||||
print_with_escapes((const unsigned char *)src, os);
|
||||
print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#ifndef SIMDJSON_JSONIOUTIL_H
|
||||
#define SIMDJSON_JSONIOUTIL_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include "simdjson/common_defs.h"
|
||||
|
||||
|
||||
// low-level function to allocate memory with padding so we can read passed the "length" bytes
|
||||
|
@ -34,7 +34,7 @@ char * allocate_padded_buffer(size_t length);
|
|||
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio
|
||||
// std::cout << "Could not load the file " << filename << std::endl;
|
||||
// }
|
||||
std::string_view get_corpus(std::string filename);
|
||||
std::string_view get_corpus(const std::string& filename);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -11,7 +11,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
|
|||
|
||||
|
||||
static inline size_t jsonminify(const char *buf, size_t len, char *out) {
|
||||
return jsonminify((const uint8_t *)buf, len, (uint8_t *)out);
|
||||
return jsonminify(reinterpret_cast<const uint8_t *>(buf), len, reinterpret_cast<uint8_t *>(out));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse((const uint8_t *) buf, len, pj, reallocifneeded);
|
||||
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
|
||||
}
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
|
@ -66,7 +66,7 @@ WARN_UNUSED
|
|||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
|
||||
return build_parsed_json((const uint8_t *) buf, len, reallocifneeded);
|
||||
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
|
||||
}
|
||||
|
||||
// convenience function
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
#ifndef SIMDJSON_NUMBERPARSING_H
|
||||
#define SIMDJSON_NUMBERPARSING_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsoncharutils.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
static const double power_of_ten[] = {
|
||||
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
|
||||
|
@ -141,7 +141,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
|||
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
|
||||
const __m128i mul_1_10000 =
|
||||
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
|
||||
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
|
||||
const __m128i input = _mm_sub_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
|
||||
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
|
||||
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
|
||||
const __m128i t3 = _mm_packus_epi32(t2, t2);
|
||||
|
@ -166,7 +166,7 @@ static never_inline bool
|
|||
parse_float(const uint8_t *const buf,
|
||||
ParsedJson &pj, const uint32_t offset,
|
||||
bool found_minus) {
|
||||
const char *p = (const char *)(buf + offset);
|
||||
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
++p;
|
||||
|
@ -280,7 +280,7 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
ParsedJson &pj,
|
||||
const uint32_t offset,
|
||||
bool found_minus) {
|
||||
const char *p = (const char *)(buf + offset);
|
||||
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
|
@ -352,7 +352,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
pj.write_tape_s64(0); // always write zero
|
||||
return true; // always succeeds
|
||||
#else
|
||||
const char *p = (const char *)(buf + offset);
|
||||
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
++p;
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
#ifndef SIMDJSON_PARSEDJSON_H
|
||||
#define SIMDJSON_PARSEDJSON_H
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/jsonformatutils.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsonformatutils.h"
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF
|
||||
|
||||
|
@ -67,12 +68,12 @@ public:
|
|||
|
||||
// this should be considered a private function
|
||||
really_inline void write_tape(uint64_t val, uint8_t c) {
|
||||
tape[current_loc++] = val | (((uint64_t)c) << 56);
|
||||
tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
|
||||
}
|
||||
|
||||
really_inline void write_tape_s64(int64_t i) {
|
||||
write_tape(0, 'l');
|
||||
tape[current_loc++] = *((uint64_t *)&i);
|
||||
tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
|
||||
}
|
||||
|
||||
really_inline void write_tape_double(double d) {
|
||||
|
@ -192,7 +193,7 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
iterator& operator=(const iterator& other) ;
|
||||
iterator& operator=(const iterator& other) = delete ;
|
||||
|
||||
ParsedJson &pj;
|
||||
size_t depth;
|
||||
|
@ -203,13 +204,13 @@ private:
|
|||
scopeindex_t *depthindex;
|
||||
};
|
||||
|
||||
size_t bytecapacity; // indicates how many bits are meant to be supported
|
||||
size_t bytecapacity{0}; // indicates how many bits are meant to be supported
|
||||
|
||||
size_t depthcapacity; // how deep we can go
|
||||
size_t tapecapacity;
|
||||
size_t stringcapacity;
|
||||
uint32_t current_loc;
|
||||
uint32_t n_structural_indexes;
|
||||
size_t depthcapacity{0}; // how deep we can go
|
||||
size_t tapecapacity{0};
|
||||
size_t stringcapacity{0};
|
||||
uint32_t current_loc{0};
|
||||
uint32_t n_structural_indexes{0};
|
||||
|
||||
uint32_t *structural_indexes;
|
||||
|
||||
|
@ -223,10 +224,13 @@ private:
|
|||
|
||||
uint8_t *string_buf; // should be at least bytecapacity
|
||||
uint8_t *current_string_buf_loc;
|
||||
bool isvalid;
|
||||
bool isvalid{false};
|
||||
|
||||
private :
|
||||
ParsedJson(const ParsedJson & p) = delete;
|
||||
|
||||
// we don't want the default constructor to be called
|
||||
ParsedJson(const ParsedJson & p) = delete; // we don't want the default constructor to be called
|
||||
// we don't want the assignment to be called
|
||||
ParsedJson & operator=(const ParsedJson&o) = delete;
|
||||
};
|
||||
|
||||
|
@ -234,14 +238,14 @@ private :
|
|||
// dump bits low to high
|
||||
inline void dumpbits_always(uint64_t v, const std::string &msg) {
|
||||
for (uint32_t i = 0; i < 64; i++) {
|
||||
std::cout << (((v >> (uint64_t)i) & 0x1ULL) ? "1" : "_");
|
||||
std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
|
||||
}
|
||||
std::cout << " " << msg.c_str() << "\n";
|
||||
}
|
||||
|
||||
inline void dumpbits32_always(uint32_t v, const std::string &msg) {
|
||||
for (uint32_t i = 0; i < 32; i++) {
|
||||
std::cout << (((v >> (uint32_t)i) & 0x1ULL) ? "1" : "_");
|
||||
std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
|
||||
}
|
||||
std::cout << " " << msg.c_str() << "\n";
|
||||
}
|
||||
|
|
|
@ -42,8 +42,8 @@ static inline int hamming(uint64_t input_num) {
|
|||
}
|
||||
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#include <cstdint>
|
||||
#include <x86intrin.h>
|
||||
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
|
||||
|
@ -86,7 +86,7 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
|
|||
#else
|
||||
// somehow, if this is used before including "x86intrin.h", it creates an
|
||||
// implicit defined warning.
|
||||
if (posix_memalign(&p, alignment, size) != 0) return NULL;
|
||||
if (posix_memalign(&p, alignment, size) != 0) { return nullptr; }
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
@ -114,7 +114,7 @@ static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
|||
|
||||
|
||||
static inline void aligned_free(void *memblock) {
|
||||
if(memblock == NULL) return;
|
||||
if(memblock == nullptr) { return; }
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(memblock);
|
||||
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
|
@ -124,4 +124,4 @@ static inline void aligned_free(void *memblock) {
|
|||
#endif
|
||||
}
|
||||
|
||||
#endif /* end of include PORTABILITY_H */
|
||||
#endif // SIMDJSON_PORTABILITY_H
|
||||
|
|
|
@ -34989,7 +34989,7 @@ static const unsigned char mask128_epi32[] = {
|
|||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
static const uint32_t mask256_epi32[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#ifndef SIMDJSON_SIMDUTF8CHECK_H
|
||||
#define SIMDJSON_SIMDUTF8CHECK_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
@ -168,7 +168,7 @@ static struct avx_processed_utf_bytes
|
|||
avxcheckUTF8Bytes(__m256i current_bytes,
|
||||
struct avx_processed_utf_bytes *previous,
|
||||
__m256i *has_error) {
|
||||
struct avx_processed_utf_bytes pb;
|
||||
struct avx_processed_utf_bytes pb{};
|
||||
avx_count_nibbles(current_bytes, &pb);
|
||||
|
||||
avxcheckSmallerThan0xF4(current_bytes, has_error);
|
||||
|
|
|
@ -9,7 +9,7 @@ bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
|
|||
|
||||
WARN_UNUSED
|
||||
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return find_structural_bits((const uint8_t *)buf, len, pj);
|
||||
return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef SIMDJSON_STAGE34_UNIFIED_H
|
||||
#define SIMDJSON_STAGE34_UNIFIED_H
|
||||
#ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||
#define SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
@ -12,7 +12,7 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
|||
|
||||
WARN_UNUSED
|
||||
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return unified_machine((const uint8_t *)buf,len,pj);
|
||||
return unified_machine(reinterpret_cast<const uint8_t *>(buf),len,pj);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
#define SIMDJSON_STRINGPARSING_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/jsoncharutils.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
|
||||
// begin copypasta
|
||||
|
@ -85,11 +85,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
uint8_t *const start_of_string = dst;
|
||||
#endif
|
||||
while (1) {
|
||||
__m256i v = _mm256_loadu_si256((const __m256i *)(src));
|
||||
uint32_t bs_bits =
|
||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
|
||||
uint32_t quote_bits =
|
||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
|
||||
__m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
|
||||
auto bs_bits =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))));
|
||||
auto quote_bits =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))));
|
||||
#define CHECKUNESCAPED
|
||||
// All Unicode characters may be placed within the
|
||||
// quotation marks, except for the characters that MUST be escaped:
|
||||
|
@ -105,7 +105,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
uint32_t bs_dist = trailingzeroes(bs_bits);
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||
// later
|
||||
_mm256_storeu_si256((__m256i *)(dst), v);
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
|
||||
if (quote_dist < bs_dist) {
|
||||
// we encountered quotes first. Move dst to point to quotes and exit
|
||||
dst[quote_dist] = 0; // null terminate and get out
|
||||
|
@ -115,7 +115,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
|
||||
#ifdef CHECKUNESCAPED
|
||||
// check that there is no unescaped char before the quote
|
||||
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
|
||||
auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
|
||||
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
|
||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
|
||||
|
@ -128,11 +128,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
#endif // JSON_TEST_STRINGS
|
||||
return true;
|
||||
#endif //CHECKUNESCAPED
|
||||
} else if (quote_dist > bs_dist) {
|
||||
} if (quote_dist > bs_dist) {
|
||||
uint8_t escape_char = src[bs_dist + 1];
|
||||
#ifdef CHECKUNESCAPED
|
||||
// we are going to need the unescaped_bits to check for unescaped chars
|
||||
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
|
||||
auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
|
||||
if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
|
||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||
foundBadString(buf + offset);
|
||||
|
@ -158,7 +158,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
// note this may reach beyond the part of the buffer we've actually
|
||||
// seen. I think this is ok
|
||||
uint8_t escape_result = escape_map[escape_char];
|
||||
if (!escape_result) {
|
||||
if (escape_result == 0u) {
|
||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||
foundBadString(buf + offset);
|
||||
#endif // JSON_TEST_STRINGS
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include "simdjson/jsonioutil.h"
|
||||
#include <cstring>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
|
||||
char * allocate_padded_buffer(size_t length) {
|
||||
// we could do a simple malloc
|
||||
|
@ -13,18 +13,19 @@ char * allocate_padded_buffer(size_t length) {
|
|||
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64);
|
||||
#else
|
||||
if (posix_memalign((void **)&padded_buffer, 64, totalpaddedlength) != 0) return NULL;
|
||||
if (posix_memalign(reinterpret_cast<void **>(&padded_buffer), 64, totalpaddedlength) != 0) { return nullptr;
|
||||
}
|
||||
#endif
|
||||
return padded_buffer;
|
||||
}
|
||||
|
||||
std::string_view get_corpus(std::string filename) {
|
||||
std::string_view get_corpus(const std::string& filename) {
|
||||
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
||||
if (fp) {
|
||||
if (fp != nullptr) {
|
||||
std::fseek(fp, 0, SEEK_END);
|
||||
size_t len = std::ftell(fp);
|
||||
char * buf = allocate_padded_buffer(len);
|
||||
if(buf == NULL) {
|
||||
if(buf == nullptr) {
|
||||
std::fclose(fp);
|
||||
throw std::runtime_error("could not allocate memory");
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <cstdint>
|
||||
#include "simdjson/portability.h"
|
||||
#include <cstdint>
|
||||
#ifndef __AVX2__
|
||||
|
||||
|
||||
|
@ -66,7 +66,7 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
|
|||
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
||||
__m256i mask) {
|
||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
|
||||
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||
return res_0 | (res_1 << 32);
|
||||
|
@ -87,8 +87,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
size_t avxlen = len - 63;
|
||||
|
||||
for (; idx < avxlen; idx += 64) {
|
||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
|
||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
|
||||
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
||||
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
||||
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||
_mm256_set1_epi8('\\'));
|
||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||
|
@ -112,7 +112,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
|
||||
prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
|
||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 9 a b c d
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
||||
|
@ -138,7 +138,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||
whitespace &= ~quote_mask;
|
||||
|
@ -151,15 +151,15 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||
int pop4 = hamming((~whitespace));
|
||||
__m256i vmask1 =
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
||||
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
||||
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
||||
__m256i vmask2 =
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
||||
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
||||
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
||||
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
||||
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
||||
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
|
||||
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
|
||||
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), reinterpret_cast<__m128i *>(out), result1);
|
||||
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), reinterpret_cast<__m128i *>(out + pop2),
|
||||
result2);
|
||||
out += pop4;
|
||||
}
|
||||
|
@ -170,8 +170,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
uint8_t buffer[64];
|
||||
memset(buffer, 0, 64);
|
||||
memcpy(buffer, buf + idx, len - idx);
|
||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
|
||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
|
||||
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
|
||||
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
|
||||
uint64_t bs_bits =
|
||||
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||
|
@ -213,7 +213,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
__m256i tmp_ws_hi = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi8(mask_20, input_hi),
|
||||
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||
whitespace &= ~quote_mask;
|
||||
|
@ -230,16 +230,16 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||
int pop4 = hamming((~whitespace));
|
||||
__m256i vmask1 =
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
||||
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
||||
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
||||
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
||||
__m256i vmask2 =
|
||||
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
||||
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
||||
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
||||
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
||||
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
||||
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
||||
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
|
||||
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), reinterpret_cast<__m128i *>(buffer),
|
||||
result1);
|
||||
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
|
||||
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), reinterpret_cast<__m128i *>(buffer + pop2),
|
||||
result2);
|
||||
memcpy(out, buffer, pop4);
|
||||
out += pop4;
|
||||
|
|
|
@ -7,10 +7,10 @@
|
|||
#endif
|
||||
|
||||
|
||||
extern bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded);
|
||||
extern bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded);
|
||||
extern ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded);
|
||||
extern ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// parse a document found in buf, need to preallocate ParsedJson.
|
||||
|
@ -33,8 +33,9 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
#endif
|
||||
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
|
||||
const uint8_t *tmpbuf = buf;
|
||||
buf = (uint8_t *) allocate_padded_buffer(len);
|
||||
if(buf == NULL) return false;
|
||||
buf = reinterpret_cast<uint8_t *>(allocate_padded_buffer(len));
|
||||
if(buf == nullptr) { return false;
|
||||
}
|
||||
memcpy((void*)buf,tmpbuf,len);
|
||||
reallocated = true;
|
||||
}
|
||||
|
@ -43,10 +44,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
if (isok) {
|
||||
isok = unified_machine(buf, len, pj);
|
||||
} else {
|
||||
if(reallocated) free((void*)buf);
|
||||
if(reallocated) { free((void*)buf);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if(reallocated) free((void*)buf);
|
||||
if(reallocated) { free((void*)buf);
|
||||
}
|
||||
return isok;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,34 +1,33 @@
|
|||
#include "simdjson/parsedjson.h"
|
||||
|
||||
ParsedJson::ParsedJson() : bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
|
||||
current_loc(0), n_structural_indexes(0),
|
||||
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
|
||||
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
|
||||
ParsedJson::ParsedJson() :
|
||||
structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
|
||||
ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
|
||||
|
||||
ParsedJson::~ParsedJson() {
|
||||
deallocate();
|
||||
}
|
||||
|
||||
ParsedJson::ParsedJson(ParsedJson && p)
|
||||
: bytecapacity(std::move(p.bytecapacity)),
|
||||
depthcapacity(std::move(p.depthcapacity)),
|
||||
tapecapacity(std::move(p.tapecapacity)),
|
||||
stringcapacity(std::move(p.stringcapacity)),
|
||||
current_loc(std::move(p.current_loc)),
|
||||
n_structural_indexes(std::move(p.n_structural_indexes)),
|
||||
structural_indexes(std::move(p.structural_indexes)),
|
||||
tape(std::move(p.tape)),
|
||||
containing_scope_offset(std::move(p.containing_scope_offset)),
|
||||
ret_address(std::move(p.ret_address)),
|
||||
string_buf(std::move(p.string_buf)),
|
||||
current_string_buf_loc(std::move(p.current_string_buf_loc)),
|
||||
isvalid(std::move(p.isvalid)) {
|
||||
p.structural_indexes=NULL;
|
||||
p.tape=NULL;
|
||||
p.containing_scope_offset=NULL;
|
||||
p.ret_address=NULL;
|
||||
p.string_buf=NULL;
|
||||
p.current_string_buf_loc=NULL;
|
||||
: bytecapacity(p.bytecapacity),
|
||||
depthcapacity(p.depthcapacity),
|
||||
tapecapacity(p.tapecapacity),
|
||||
stringcapacity(p.stringcapacity),
|
||||
current_loc(p.current_loc),
|
||||
n_structural_indexes(p.n_structural_indexes),
|
||||
structural_indexes(p.structural_indexes),
|
||||
tape(p.tape),
|
||||
containing_scope_offset(p.containing_scope_offset),
|
||||
ret_address(p.ret_address),
|
||||
string_buf(p.string_buf),
|
||||
current_string_buf_loc(p.current_string_buf_loc),
|
||||
isvalid(p.isvalid) {
|
||||
p.structural_indexes=nullptr;
|
||||
p.tape=nullptr;
|
||||
p.containing_scope_offset=nullptr;
|
||||
p.ret_address=nullptr;
|
||||
p.string_buf=nullptr;
|
||||
p.current_string_buf_loc=nullptr;
|
||||
}
|
||||
|
||||
|
||||
|
@ -40,8 +39,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
|
|||
return false;
|
||||
}
|
||||
if (len > 0) {
|
||||
if ((len <= bytecapacity) && (depthcapacity < maxdepth))
|
||||
if ((len <= bytecapacity) && (depthcapacity < maxdepth)) {
|
||||
return true;
|
||||
}
|
||||
deallocate();
|
||||
}
|
||||
isvalid = false;
|
||||
|
@ -59,14 +59,15 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
|
|||
#else
|
||||
ret_address = new (std::nothrow) char[maxdepth];
|
||||
#endif
|
||||
if ((string_buf == NULL) || (tape == NULL) ||
|
||||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) {
|
||||
if ((string_buf == nullptr) || (tape == nullptr) ||
|
||||
(containing_scope_offset == nullptr) || (ret_address == nullptr) || (structural_indexes == nullptr)) {
|
||||
std::cerr << "Could not allocate memory" << std::endl;
|
||||
if(ret_address != NULL) delete[] ret_address;
|
||||
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
|
||||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
delete[] ret_address;
|
||||
delete[] containing_scope_offset;
|
||||
delete[] tape;
|
||||
delete[] string_buf;
|
||||
delete[] structural_indexes;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -86,11 +87,16 @@ void ParsedJson::deallocate() {
|
|||
depthcapacity = 0;
|
||||
tapecapacity = 0;
|
||||
stringcapacity = 0;
|
||||
if(ret_address != NULL) delete[] ret_address;
|
||||
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
|
||||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
{delete[] ret_address;
|
||||
}
|
||||
{delete[] containing_scope_offset;
|
||||
}
|
||||
{delete[] tape;
|
||||
}
|
||||
{delete[] string_buf;
|
||||
}
|
||||
{delete[] structural_indexes;
|
||||
}
|
||||
isvalid = false;
|
||||
}
|
||||
|
||||
|
@ -102,7 +108,8 @@ void ParsedJson::init() {
|
|||
|
||||
WARN_UNUSED
|
||||
bool ParsedJson::printjson(std::ostream &os) {
|
||||
if(!isvalid) return false;
|
||||
if(!isvalid) { return false;
|
||||
}
|
||||
size_t tapeidx = 0;
|
||||
uint64_t tape_val = tape[tapeidx];
|
||||
uint8_t type = (tape_val >> 56);
|
||||
|
@ -120,7 +127,7 @@ bool ParsedJson::printjson(std::ostream &os) {
|
|||
}
|
||||
tapeidx++;
|
||||
bool *inobject = new bool[depthcapacity];
|
||||
size_t *inobjectidx = new size_t[depthcapacity];
|
||||
auto *inobjectidx = new size_t[depthcapacity];
|
||||
int depth = 1; // only root at level 0
|
||||
inobjectidx[depth] = 0;
|
||||
inobject[depth] = false;
|
||||
|
@ -129,15 +136,18 @@ bool ParsedJson::printjson(std::ostream &os) {
|
|||
uint64_t payload = tape_val & JSONVALUEMASK;
|
||||
type = (tape_val >> 56);
|
||||
if (!inobject[depth]) {
|
||||
if ((inobjectidx[depth] > 0) && (type != ']'))
|
||||
if ((inobjectidx[depth] > 0) && (type != ']')) {
|
||||
os << ",";
|
||||
}
|
||||
inobjectidx[depth]++;
|
||||
} else { // if (inobject) {
|
||||
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
|
||||
(type != '}'))
|
||||
(type != '}')) {
|
||||
os << ",";
|
||||
if (((inobjectidx[depth] & 1) == 1))
|
||||
}
|
||||
if (((inobjectidx[depth] & 1) == 1)) {
|
||||
os << ":";
|
||||
}
|
||||
inobjectidx[depth]++;
|
||||
}
|
||||
switch (type) {
|
||||
|
@ -147,13 +157,15 @@ bool ParsedJson::printjson(std::ostream &os) {
|
|||
os << '"';
|
||||
break;
|
||||
case 'l': // we have a long int
|
||||
if (tapeidx + 1 >= howmany)
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
return false;
|
||||
os << (int64_t)tape[++tapeidx];
|
||||
}
|
||||
os << static_cast<int64_t>(tape[++tapeidx]);
|
||||
break;
|
||||
case 'd': // we have a double
|
||||
if (tapeidx + 1 >= howmany)
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
return false;
|
||||
}
|
||||
double answer;
|
||||
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
||||
os << answer;
|
||||
|
@ -206,7 +218,8 @@ bool ParsedJson::printjson(std::ostream &os) {
|
|||
|
||||
WARN_UNUSED
|
||||
bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
||||
if(!isvalid) return false;
|
||||
if(!isvalid) { return false;
|
||||
}
|
||||
size_t tapeidx = 0;
|
||||
uint64_t tape_val = tape[tapeidx];
|
||||
uint8_t type = (tape_val >> 56);
|
||||
|
@ -234,14 +247,16 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
|||
os << '\n';
|
||||
break;
|
||||
case 'l': // we have a long int
|
||||
if (tapeidx + 1 >= howmany)
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
return false;
|
||||
os << "integer " << (int64_t)tape[++tapeidx] << "\n";
|
||||
}
|
||||
os << "integer " << static_cast<int64_t>(tape[++tapeidx]) << "\n";
|
||||
break;
|
||||
case 'd': // we have a double
|
||||
os << "float ";
|
||||
if (tapeidx + 1 >= howmany)
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
return false;
|
||||
}
|
||||
double answer;
|
||||
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
||||
os << answer << '\n';
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
|
||||
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) {
|
||||
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
|
||||
if(pj.isValid()) {
|
||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||
if(depthindex == NULL) return;
|
||||
if(depthindex == nullptr) { return;
|
||||
}
|
||||
depthindex[0].start_of_scope = location;
|
||||
current_val = pj.tape[location++];
|
||||
current_type = (current_val >> 56);
|
||||
|
@ -29,9 +30,9 @@ ParsedJson::iterator::~iterator() {
|
|||
ParsedJson::iterator::iterator(const iterator &o):
|
||||
pj(o.pj), depth(o.depth), location(o.location),
|
||||
tape_length(o.tape_length), current_type(o.current_type),
|
||||
current_val(o.current_val), depthindex(NULL) {
|
||||
current_val(o.current_val), depthindex(nullptr) {
|
||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||
if(depthindex != NULL) {
|
||||
if(depthindex != nullptr) {
|
||||
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
|
||||
} else {
|
||||
tape_length = 0;
|
||||
|
@ -39,10 +40,10 @@ ParsedJson::iterator::iterator(const iterator &o):
|
|||
}
|
||||
|
||||
ParsedJson::iterator::iterator(iterator &&o):
|
||||
pj(o.pj), depth(std::move(o.depth)), location(std::move(o.location)),
|
||||
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)),
|
||||
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) {
|
||||
o.depthindex = NULL;// we take ownership
|
||||
pj(o.pj), depth(o.depth), location(o.location),
|
||||
tape_length(o.tape_length), current_type(o.current_type),
|
||||
current_val(o.current_val), depthindex(o.depthindex) {
|
||||
o.depthindex = nullptr;// we take ownership
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
|
@ -106,19 +107,21 @@ uint8_t ParsedJson::iterator::get_type() const {
|
|||
|
||||
|
||||
int64_t ParsedJson::iterator::get_integer() const {
|
||||
if(location + 1 >= tape_length) return 0;// default value in case of error
|
||||
return (int64_t) pj.tape[location + 1];
|
||||
if(location + 1 >= tape_length) { return 0;// default value in case of error
|
||||
}
|
||||
return static_cast<int64_t>(pj.tape[location + 1]);
|
||||
}
|
||||
|
||||
double ParsedJson::iterator::get_double() const {
|
||||
if(location + 1 >= tape_length) return NAN;// default value in case of error
|
||||
if(location + 1 >= tape_length) { return NAN;// default value in case of error
|
||||
}
|
||||
double answer;
|
||||
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
|
||||
return answer;
|
||||
}
|
||||
|
||||
const char * ParsedJson::iterator::get_string() const {
|
||||
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
|
||||
return reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)) ;
|
||||
}
|
||||
|
||||
|
||||
|
@ -156,7 +159,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
|||
assert(is_string());
|
||||
bool rightkey = (strcmp(get_string(),key)==0);
|
||||
next();
|
||||
if(rightkey) return true;
|
||||
if(rightkey) { return true;
|
||||
}
|
||||
} while(next());
|
||||
assert(up());// not found
|
||||
}
|
||||
|
@ -180,9 +184,10 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
|||
current_val = nextval;
|
||||
current_type = nexttype;
|
||||
return true;
|
||||
} else {
|
||||
}
|
||||
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
|
||||
if(location + increment >= tape_length) return false;
|
||||
if(location + increment >= tape_length) { return false;
|
||||
}
|
||||
uint64_t nextval = pj.tape[location + increment];
|
||||
uint8_t nexttype = (nextval >> 56);
|
||||
if((nexttype == ']') || (nexttype == '}')) {
|
||||
|
@ -192,12 +197,13 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
|||
current_val = nextval;
|
||||
current_type = nexttype;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool ParsedJson::iterator::prev() {
|
||||
if(location - 1 < depthindex[depth].start_of_scope) return false;
|
||||
if(location - 1 < depthindex[depth].start_of_scope) { return false;
|
||||
}
|
||||
location -= 1;
|
||||
current_val = pj.tape[location];
|
||||
current_type = (current_val >> 56);
|
||||
|
@ -230,7 +236,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
|||
|
||||
|
||||
bool ParsedJson::iterator::down() {
|
||||
if(location + 1 >= tape_length) return false;
|
||||
if(location + 1 >= tape_length) { return false;
|
||||
}
|
||||
if ((current_type == '[') || (current_type == '{')) {
|
||||
size_t npos = (current_val & JSONVALUEMASK);
|
||||
if(npos == location + 2) {
|
||||
|
@ -254,7 +261,8 @@ void ParsedJson::iterator::to_start_scope() {
|
|||
}
|
||||
|
||||
bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
|
||||
if(!isOk()) return false;
|
||||
if(!isOk()) { return false;
|
||||
}
|
||||
switch (current_type) {
|
||||
case '"': // we have a string
|
||||
os << '"';
|
||||
|
@ -284,7 +292,7 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
|
|||
case '}': // we end an object
|
||||
case '[': // we start an array
|
||||
case ']': // we end an array
|
||||
os << (char) current_type;
|
||||
os << static_cast<char>(current_type);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "simdjson/portability.h"
|
||||
#include <cassert>
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include <cassert>
|
||||
|
||||
#ifndef SIMDJSON_SKIPUTF8VALIDATION
|
||||
#define SIMDJSON_UTF8VALIDATE
|
||||
|
@ -21,7 +21,7 @@ using namespace std;
|
|||
really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi,
|
||||
__m256i mask) {
|
||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
|
||||
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||
return res_0 | (res_1 << 32);
|
||||
|
@ -38,7 +38,7 @@ WARN_UNUSED
|
|||
uint32_t base = 0;
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
__m256i has_error = _mm256_setzero_si256();
|
||||
struct avx_processed_utf_bytes previous;
|
||||
struct avx_processed_utf_bytes previous{};
|
||||
previous.rawbytes = _mm256_setzero_si256();
|
||||
previous.high_nibbles = _mm256_setzero_si256();
|
||||
previous.carried_continuations = _mm256_setzero_si256();
|
||||
|
@ -66,8 +66,8 @@ WARN_UNUSED
|
|||
#ifndef _MSC_VER
|
||||
__builtin_prefetch(buf + idx + 128);
|
||||
#endif
|
||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
|
||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
|
||||
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
||||
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
__m256i highbit = _mm256_set1_epi8(0x80);
|
||||
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
||||
|
@ -130,29 +130,29 @@ WARN_UNUSED
|
|||
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
while (structurals != 0u) {
|
||||
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base += 8;
|
||||
}
|
||||
base = next_base;
|
||||
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
|
||||
prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
|
||||
|
||||
// How do we build up a user traversable data structure
|
||||
// first, do a 'shufti' to detect structural JSON characters
|
||||
|
@ -190,7 +190,7 @@ WARN_UNUSED
|
|||
__m256i tmp_hi = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
||||
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
|
@ -201,7 +201,7 @@ WARN_UNUSED
|
|||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||
// mask off anything inside quotes
|
||||
|
@ -244,8 +244,8 @@ WARN_UNUSED
|
|||
uint8_t tmpbuf[64];
|
||||
memset(tmpbuf,0x20,64);
|
||||
memcpy(tmpbuf,buf+idx,len - idx);
|
||||
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(tmpbuf + 0));
|
||||
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(tmpbuf + 32));
|
||||
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 0));
|
||||
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 32));
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
__m256i highbit = _mm256_set1_epi8(0x80);
|
||||
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
||||
|
@ -308,22 +308,22 @@ WARN_UNUSED
|
|||
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
while (structurals != 0u) {
|
||||
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base += 8;
|
||||
}
|
||||
|
@ -364,7 +364,7 @@ WARN_UNUSED
|
|||
__m256i tmp_hi = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
||||
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
|
@ -375,7 +375,7 @@ WARN_UNUSED
|
|||
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
||||
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
||||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||
|
||||
|
@ -412,22 +412,22 @@ WARN_UNUSED
|
|||
}
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
while (structurals != 0u) {
|
||||
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
|
||||
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
|
||||
structurals = structurals & (structurals - 1);
|
||||
base += 8;
|
||||
}
|
||||
|
@ -435,7 +435,7 @@ WARN_UNUSED
|
|||
|
||||
pj.n_structural_indexes = base;
|
||||
// a valid JSON file cannot have zero structural indexes - we should have found something
|
||||
if (!pj.n_structural_indexes) {
|
||||
if (pj.n_structural_indexes == 0u) {
|
||||
return false;
|
||||
}
|
||||
if(base_ptr[pj.n_structural_indexes-1] > len) {
|
||||
|
@ -449,7 +449,7 @@ WARN_UNUSED
|
|||
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
|
||||
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
return _mm256_testz_si256(has_error, has_error);
|
||||
return _mm256_testz_si256(has_error, has_error) != 0;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
|
|
|
@ -22,7 +22,7 @@ using namespace std;
|
|||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
||||
uint64_t tv = *(const uint64_t *)"true ";
|
||||
uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
|
||||
uint64_t mask4 = 0x00000000ffffffff;
|
||||
uint32_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
|
@ -34,7 +34,7 @@ really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
|||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
||||
uint64_t fv = *(const uint64_t *)"false ";
|
||||
uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
|
||||
uint64_t mask5 = 0x000000ffffffffff;
|
||||
uint32_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
|
@ -46,7 +46,7 @@ really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
|||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
||||
uint64_t nv = *(const uint64_t *)"null ";
|
||||
uint64_t nv = *reinterpret_cast<const uint64_t *>("null ");
|
||||
uint64_t mask4 = 0x00000000ffffffff;
|
||||
uint32_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
|
@ -141,11 +141,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// this only applies to the JSON document made solely of the true value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
||||
if(copy == NULL) goto fail;
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!is_valid_true_atom((const uint8_t *)copy + idx)) {
|
||||
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
|
@ -157,11 +158,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// this only applies to the JSON document made solely of the false value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
||||
if(copy == NULL) goto fail;
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!is_valid_false_atom((const uint8_t *)copy + idx)) {
|
||||
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
|
@ -173,11 +175,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// this only applies to the JSON document made solely of the null value.
|
||||
// this will almost never be called in practice
|
||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
||||
if(copy == NULL) goto fail;
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!is_valid_null_atom((const uint8_t *)copy + idx)) {
|
||||
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
|
@ -198,11 +201,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// this is done only for JSON documents made of a sole number
|
||||
// this will almost never be called in practice
|
||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
||||
if(copy == NULL) goto fail;
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!parse_number((const uint8_t *)copy, pj, idx, false)) {
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
|
@ -213,11 +217,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
// we need to make a copy to make sure that the string is NULL terminated.
|
||||
// this is done only for JSON documents made of a sole number
|
||||
// this will almost never be called in practice
|
||||
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
|
||||
if(copy == NULL) goto fail;
|
||||
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
|
||||
if(copy == nullptr) { goto fail;
|
||||
}
|
||||
memcpy(copy, buf, len);
|
||||
copy[len] = '\0';
|
||||
if (!parse_number((const uint8_t *)copy, pj, idx, true)) {
|
||||
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
|
||||
free(copy);
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
@ -106,18 +106,17 @@ int main(int argc, char *argv[]) {
|
|||
void *state;
|
||||
bool ultrajson_correct = ((UJDecode(buffer, p.size(), NULL, &state) == NULL) == false);
|
||||
|
||||
jsmntok_t * tokens = new jsmntok_t[p.size()];
|
||||
auto * tokens = make_unique<jsmntok_t[](p.size());
|
||||
bool jsmn_correct = false;
|
||||
if(tokens == NULL) {
|
||||
if(tokens == nullptr) {
|
||||
printf("Failed to alloc memory for jsmn\n");
|
||||
} else {
|
||||
jsmn_parser parser;
|
||||
jsmn_init(&parser);
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
buffer[p.size()] = '\0';
|
||||
int r = jsmn_parse(&parser, buffer, p.size(), tokens, p.size());
|
||||
delete[] tokens;
|
||||
tokens = NULL;
|
||||
int r = jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size());
|
||||
tokens = nullptr;
|
||||
jsmn_correct = (r > 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include <assert.h>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
|
@ -7,10 +7,10 @@
|
|||
// Microsoft can't be bothered to provide standard utils.
|
||||
#include <dirent_portable.h>
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
|||
*/
|
||||
static bool hasExtension(const char *filename, const char *extension) {
|
||||
const char *ext = strrchr(filename, '.');
|
||||
return (ext && !strcmp(ext, extension));
|
||||
return ((ext != nullptr) && (strcmp(ext, extension) == 0));
|
||||
}
|
||||
|
||||
bool startsWith(const char *pre, const char *str) {
|
||||
|
@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
|
|||
}
|
||||
|
||||
bool contains(const char *pre, const char *str) {
|
||||
return (strstr(str, pre) != NULL);
|
||||
return (strstr(str, pre) != nullptr);
|
||||
}
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@ bool validate(const char *dirname) {
|
|||
const char *extension = ".json";
|
||||
size_t dirlen = strlen(dirname);
|
||||
struct dirent **entry_list;
|
||||
int c = scandir(dirname, &entry_list, 0, alphasort);
|
||||
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
||||
if (c < 0) {
|
||||
fprintf(stderr, "error accessing %s \n", dirname);
|
||||
return false;
|
||||
|
@ -47,16 +47,17 @@ bool validate(const char *dirname) {
|
|||
return false;
|
||||
}
|
||||
bool * isfileasexpected = new bool[c];
|
||||
for(int i = 0; i < c; i++) isfileasexpected[i] = true;
|
||||
for(int i = 0; i < c; i++) { isfileasexpected[i] = true;
|
||||
}
|
||||
size_t howmany = 0;
|
||||
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
||||
for (int i = 0; i < c; i++) {
|
||||
const char *name = entry_list[i]->d_name;
|
||||
if (hasExtension(name, extension)) {
|
||||
printf("validating: file %s ", name);
|
||||
fflush(NULL);
|
||||
fflush(nullptr);
|
||||
size_t filelen = strlen(name);
|
||||
char *fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
||||
char *fullpath = static_cast<char *>(malloc(dirlen + filelen + 1 + 1));
|
||||
strcpy(fullpath, dirname);
|
||||
if (needsep) {
|
||||
fullpath[dirlen] = '/';
|
||||
|
@ -106,11 +107,13 @@ bool validate(const char *dirname) {
|
|||
} else {
|
||||
fprintf(stderr, "There were problems! Consider reviewing the following files:\n");
|
||||
for(int i = 0; i < c; i++) {
|
||||
if(!isfileasexpected[i]) fprintf(stderr, "%s \n", entry_list[i]->d_name);
|
||||
if(!isfileasexpected[i]) { fprintf(stderr, "%s \n", entry_list[i]->d_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < c; ++i)
|
||||
for (int i = 0; i < c; ++i) {
|
||||
free(entry_list[i]);
|
||||
}
|
||||
free(entry_list);
|
||||
delete[] isfileasexpected;
|
||||
return everythingfine;
|
||||
|
|
|
@ -48,7 +48,7 @@ int main(int argc, char *argv[]) {
|
|||
#ifndef _MSC_VER
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "da")) != -1)
|
||||
while ((c = getopt(argc, argv, "da")) != -1) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
rawdump = true;
|
||||
|
@ -59,6 +59,7 @@ int main(int argc, char *argv[]) {
|
|||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
#else
|
||||
int optind = 1;
|
||||
#endif
|
||||
|
|
|
@ -39,7 +39,7 @@ struct stat_s {
|
|||
bool valid;
|
||||
};
|
||||
|
||||
typedef struct stat_s stat_t;
|
||||
using stat_t = struct stat_s;
|
||||
|
||||
|
||||
|
||||
|
@ -50,8 +50,8 @@ stat_t simdjson_computestats(const std::string_view &p) {
|
|||
if (!answer.valid) {
|
||||
return answer;
|
||||
}
|
||||
answer.backslash_count = count_backslash((const uint8_t*)p.data(), p.size());
|
||||
answer.nonasciibyte_count = count_nonasciibytes((const uint8_t*)p.data(), p.size());
|
||||
answer.backslash_count = count_backslash(reinterpret_cast<const uint8_t*>(p.data()), p.size());
|
||||
answer.nonasciibyte_count = count_nonasciibytes(reinterpret_cast<const uint8_t*>(p.data()), p.size());
|
||||
answer.byte_count = p.size();
|
||||
answer.integer_count = 0;
|
||||
answer.float_count = 0;
|
||||
|
|
|
@ -16,7 +16,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
jsonminify(p, (char *)p.data());
|
||||
jsonminify(p, const_cast<char *>(p.data()));
|
||||
printf("%s",p.data());
|
||||
aligned_free((void*)p.data());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue