Merge pull request #61 from NewProggie/fix_minor_problems

Fix minor problems
This commit is contained in:
geofflangdale 2019-02-26 20:50:03 +11:00 committed by GitHub
commit bdc2bc693f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 311 additions and 262 deletions

View File

@ -1,17 +1,17 @@
#include <assert.h> #include <cassert>
#include <ctype.h> #include <cctype>
#ifndef _MSC_VER #ifndef _MSC_VER
#include <dirent.h>
#include <unistd.h> #include <unistd.h>
#include <x86intrin.h> #include <x86intrin.h>
#include <dirent.h>
#else #else
#include <intrin.h> #include <intrin.h>
#endif #endif
#include <inttypes.h> #include <cinttypes>
#include <stdbool.h>
#include <stdio.h> #include <cstdio>
#include <stdlib.h> #include <cstdlib>
#include <string.h> #include <cstring>
#include <algorithm> #include <algorithm>
#include <chrono> #include <chrono>
@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
#ifndef _MSC_VER #ifndef _MSC_VER
int c; int c;
while ((c = getopt(argc, argv, "1vdt")) != -1) while ((c = getopt(argc, argv, "1vdt")) != -1) {
switch (c) { switch (c) {
case 't': case 't':
justdata = true; justdata = true;
@ -67,6 +67,7 @@ int main(int argc, char *argv[]) {
default: default:
abort(); abort();
} }
}
#else #else
int optind = 1; int optind = 1;
#endif #endif
@ -78,8 +79,9 @@ int main(int argc, char *argv[]) {
if (optind + 1 < argc) { if (optind + 1 < argc) {
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl; cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
} }
if (verbose) if (verbose) {
cout << "[verbose] loading " << filename << endl; cout << "[verbose] loading " << filename << endl;
}
std::string_view p; std::string_view p;
try { try {
p = get_corpus(filename); p = get_corpus(filename);
@ -87,9 +89,10 @@ int main(int argc, char *argv[]) {
std::cout << "Could not load the file " << filename << std::endl; std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE; return EXIT_FAILURE;
} }
if (verbose) if (verbose) {
cout << "[verbose] loaded " << filename << " (" << p.size() << " bytes)" cout << "[verbose] loaded " << filename << " (" << p.size() << " bytes)"
<< endl; << endl;
}
#if defined(DEBUG) #if defined(DEBUG)
const uint32_t iterations = 1; const uint32_t iterations = 1;
#else #else
@ -125,8 +128,9 @@ int main(int argc, char *argv[]) {
bool isok = true; bool isok = true;
for (uint32_t i = 0; i < iterations; i++) { for (uint32_t i = 0; i < iterations; i++) {
if (verbose) if (verbose) {
cout << "[verbose] iteration # " << i << endl; cout << "[verbose] iteration # " << i << endl;
}
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
unified.start(); unified.start();
#endif #endif
@ -144,8 +148,9 @@ int main(int argc, char *argv[]) {
cref0 += results[3]; cref0 += results[3];
cmis0 += results[4]; cmis0 += results[4];
#endif #endif
if (verbose) if (verbose) {
cout << "[verbose] allocated memory for parsed JSON " << endl; cout << "[verbose] allocated memory for parsed JSON " << endl;
}
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
@ -248,10 +253,11 @@ int main(int argc, char *argv[]) {
} }
#endif #endif
double min_result = *min_element(res.begin(), res.end()); double min_result = *min_element(res.begin(), res.end());
if (!justdata) if (!justdata) {
cout << "Min: " << min_result << " bytes read: " << p.size() cout << "Min: " << min_result << " bytes read: " << p.size()
<< " Gigabytes/second: " << (p.size()) / (min_result * 1000000000.0) << " Gigabytes/second: " << (p.size()) / (min_result * 1000000000.0)
<< "\n"; << "\n";
}
if (jsonoutput) { if (jsonoutput) {
isok = isok && pj.printjson(std::cout); isok = isok && pj.printjson(std::cout);
} }

View File

@ -7,6 +7,8 @@
#endif //__linux__ #endif //__linux__
#endif // _MSC_VER #endif // _MSC_VER
#include <memory>
#include "benchmark.h" #include "benchmark.h"
@ -225,7 +227,7 @@ int main(int argc, char *argv[]) {
jsmntok_t * tokens = new jsmntok_t[p.size()]; auto * tokens = make_unique<jsmntok_t[](p.size());
if(tokens == NULL) { if(tokens == NULL) {
printf("Failed to alloc memory for jsmn\n"); printf("Failed to alloc memory for jsmn\n");
} else { } else {
@ -234,9 +236,8 @@ int main(int argc, char *argv[]) {
memcpy(buffer, p.data(), p.size()); memcpy(buffer, p.data(), p.size());
buffer[p.size()] = '\0'; buffer[p.size()] = '\0';
BEST_TIME("jsmn ", BEST_TIME("jsmn ",
(jsmn_parse(&parser, buffer, p.size(), tokens, p.size()) > 0), true, (jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size()) > 0), true,
jsmn_init(&parser), repeat, volume, !justdata); jsmn_init(&parser), repeat, volume, !justdata);
delete[] tokens;
} }
memcpy(buffer, p.data(), p.size()); memcpy(buffer, p.data(), p.size());

View File

@ -42,7 +42,7 @@ struct stat_s {
bool valid; bool valid;
}; };
typedef struct stat_s stat_t; using stat_t = struct stat_s;
stat_t simdjson_computestats(const std::string_view &p) { stat_t simdjson_computestats(const std::string_view &p) {
stat_t answer; stat_t answer;
@ -51,9 +51,9 @@ stat_t simdjson_computestats(const std::string_view &p) {
if (!answer.valid) { if (!answer.valid) {
return answer; return answer;
} }
answer.backslash_count = count_backslash((const uint8_t *)p.data(), p.size()); answer.backslash_count = count_backslash(reinterpret_cast<const uint8_t *>(p.data()), p.size());
answer.nonasciibyte_count = answer.nonasciibyte_count =
count_nonasciibytes((const uint8_t *)p.data(), p.size()); count_nonasciibytes(reinterpret_cast<const uint8_t *>(p.data()), p.size());
answer.byte_count = p.size(); answer.byte_count = p.size();
answer.integer_count = 0; answer.integer_count = 0;
answer.float_count = 0; answer.float_count = 0;
@ -115,12 +115,13 @@ stat_t simdjson_computestats(const std::string_view &p) {
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
#ifndef _MSC_VER #ifndef _MSC_VER
int c; int c;
while ((c = getopt(argc, argv, "")) != -1) while ((c = getopt(argc, argv, "")) != -1) {
switch (c) { switch (c) {
default: default:
abort(); abort();
} }
}
#else #else
int optind = 1; int optind = 1;
#endif #endif

View File

@ -56,4 +56,4 @@
#endif // MSC_VER #endif // MSC_VER
#endif // COMMON_DEFS_H #endif // SIMDJSON_COMMON_DEFS_H

View File

@ -97,7 +97,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
if (cp <= 0x7F) { if (cp <= 0x7F) {
c[0] = cp; c[0] = cp;
return 1; // ascii return 1; // ascii
} else if (cp <= 0x7FF) { } if (cp <= 0x7FF) {
c[0] = (cp >> 6) + 192; c[0] = (cp >> 6) + 192;
c[1] = (cp & 63) + 128; c[1] = (cp & 63) + 128;
return 2; // universal plane return 2; // universal plane

View File

@ -1,12 +1,12 @@
#ifndef SIMDJSON_JSONFORMATUTILS_H #ifndef SIMDJSON_JSONFORMATUTILS_H
#define SIMDJSON_JSONFORMATUTILS_H #define SIMDJSON_JSONFORMATUTILS_H
#include <stdio.h> #include <cstdio>
#include <iostream>
#include <iomanip> #include <iomanip>
#include <iostream>
static inline void print_with_escapes(const unsigned char *src) { static inline void print_with_escapes(const unsigned char *src) {
while (*src) { while (*src != 0u) {
switch (*src) { switch (*src) {
case '\b': case '\b':
putchar('\\'); putchar('\\');
@ -39,15 +39,16 @@ static inline void print_with_escapes(const unsigned char *src) {
default: default:
if (*src <= 0x1F) { if (*src <= 0x1F) {
printf("\\u%04x", *src); printf("\\u%04x", *src);
} else } else {
putchar(*src); putchar(*src);
}
} }
src++; src++;
} }
} }
static inline void print_with_escapes(const unsigned char *src, std::ostream &os) { static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
while (*src) { while (*src != 0u) {
switch (*src) { switch (*src) {
case '\b': case '\b':
os << '\\'; os << '\\';
@ -80,17 +81,18 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
default: default:
if (*src <= 0x1F) { if (*src <= 0x1F) {
std::ios::fmtflags f(os.flags()); std::ios::fmtflags f(os.flags());
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src; os << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*src);
os.flags(f); os.flags(f);
} else } else {
os << *src; os << *src;
}
} }
src++; src++;
} }
} }
static inline void print_with_escapes(const char *src, std::ostream &os) { static inline void print_with_escapes(const char *src, std::ostream &os) {
print_with_escapes((const unsigned char *)src, os); print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
} }
#endif #endif

View File

@ -1,12 +1,12 @@
#ifndef SIMDJSON_JSONIOUTIL_H #ifndef SIMDJSON_JSONIOUTIL_H
#define SIMDJSON_JSONIOUTIL_H #define SIMDJSON_JSONIOUTIL_H
#include "simdjson/common_defs.h"
#include <exception> #include <exception>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "simdjson/common_defs.h"
// low-level function to allocate memory with padding so we can read passed the "length" bytes // low-level function to allocate memory with padding so we can read passed the "length" bytes
@ -34,7 +34,7 @@ char * allocate_padded_buffer(size_t length);
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio // free((void*)p.data());//use aligned_free if you plan to use VisualStudio
// std::cout << "Could not load the file " << filename << std::endl; // std::cout << "Could not load the file " << filename << std::endl;
// } // }
std::string_view get_corpus(std::string filename); std::string_view get_corpus(const std::string& filename);
#endif #endif

View File

@ -11,7 +11,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
static inline size_t jsonminify(const char *buf, size_t len, char *out) { static inline size_t jsonminify(const char *buf, size_t len, char *out) {
return jsonminify((const uint8_t *)buf, len, (uint8_t *)out); return jsonminify(reinterpret_cast<const uint8_t *>(buf), len, reinterpret_cast<uint8_t *>(out));
} }

View File

@ -31,7 +31,7 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
// all bytes at and after buf + len are ignored (can be garbage). // all bytes at and after buf + len are ignored (can be garbage).
WARN_UNUSED WARN_UNUSED
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) { inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
return json_parse((const uint8_t *) buf, len, pj, reallocifneeded); return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
} }
// Parse a document found in buf, need to preallocate ParsedJson. // Parse a document found in buf, need to preallocate ParsedJson.
@ -66,7 +66,7 @@ WARN_UNUSED
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false, // The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after buf + len are ignored (can be garbage). // all bytes at and after buf + len are ignored (can be garbage).
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) { inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
return build_parsed_json((const uint8_t *) buf, len, reallocifneeded); return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
} }
// convenience function // convenience function

View File

@ -1,10 +1,10 @@
#ifndef SIMDJSON_NUMBERPARSING_H #ifndef SIMDJSON_NUMBERPARSING_H
#define SIMDJSON_NUMBERPARSING_H #define SIMDJSON_NUMBERPARSING_H
#include "simdjson/portability.h"
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/jsoncharutils.h" #include "simdjson/jsoncharutils.h"
#include "simdjson/parsedjson.h" #include "simdjson/parsedjson.h"
#include "simdjson/portability.h"
static const double power_of_ten[] = { static const double power_of_ten[] = {
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
@ -141,7 +141,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
const __m128i mul_1_10000 = const __m128i mul_1_10000 =
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0); const __m128i input = _mm_sub_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t3 = _mm_packus_epi32(t2, t2);
@ -166,7 +166,7 @@ static never_inline bool
parse_float(const uint8_t *const buf, parse_float(const uint8_t *const buf,
ParsedJson &pj, const uint32_t offset, ParsedJson &pj, const uint32_t offset,
bool found_minus) { bool found_minus) {
const char *p = (const char *)(buf + offset); const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false; bool negative = false;
if (found_minus) { if (found_minus) {
++p; ++p;
@ -280,7 +280,7 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
ParsedJson &pj, ParsedJson &pj,
const uint32_t offset, const uint32_t offset,
bool found_minus) { bool found_minus) {
const char *p = (const char *)(buf + offset); const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false; bool negative = false;
if (found_minus) { if (found_minus) {
@ -352,7 +352,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
pj.write_tape_s64(0); // always write zero pj.write_tape_s64(0); // always write zero
return true; // always succeeds return true; // always succeeds
#else #else
const char *p = (const char *)(buf + offset); const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false; bool negative = false;
if (found_minus) { if (found_minus) {
++p; ++p;

View File

@ -1,14 +1,15 @@
#ifndef SIMDJSON_PARSEDJSON_H #ifndef SIMDJSON_PARSEDJSON_H
#define SIMDJSON_PARSEDJSON_H #define SIMDJSON_PARSEDJSON_H
#include <cinttypes>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include "simdjson/portability.h"
#include "simdjson/jsonformatutils.h"
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/jsonformatutils.h"
#include "simdjson/portability.h"
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF #define JSONVALUEMASK 0xFFFFFFFFFFFFFF
@ -67,12 +68,12 @@ public:
// this should be considered a private function // this should be considered a private function
really_inline void write_tape(uint64_t val, uint8_t c) { really_inline void write_tape(uint64_t val, uint8_t c) {
tape[current_loc++] = val | (((uint64_t)c) << 56); tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
} }
really_inline void write_tape_s64(int64_t i) { really_inline void write_tape_s64(int64_t i) {
write_tape(0, 'l'); write_tape(0, 'l');
tape[current_loc++] = *((uint64_t *)&i); tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
} }
really_inline void write_tape_double(double d) { really_inline void write_tape_double(double d) {
@ -192,7 +193,7 @@ public:
private: private:
iterator& operator=(const iterator& other) ; iterator& operator=(const iterator& other) = delete ;
ParsedJson &pj; ParsedJson &pj;
size_t depth; size_t depth;
@ -203,13 +204,13 @@ private:
scopeindex_t *depthindex; scopeindex_t *depthindex;
}; };
size_t bytecapacity; // indicates how many bits are meant to be supported size_t bytecapacity{0}; // indicates how many bits are meant to be supported
size_t depthcapacity; // how deep we can go size_t depthcapacity{0}; // how deep we can go
size_t tapecapacity; size_t tapecapacity{0};
size_t stringcapacity; size_t stringcapacity{0};
uint32_t current_loc; uint32_t current_loc{0};
uint32_t n_structural_indexes; uint32_t n_structural_indexes{0};
uint32_t *structural_indexes; uint32_t *structural_indexes;
@ -223,10 +224,13 @@ private:
uint8_t *string_buf; // should be at least bytecapacity uint8_t *string_buf; // should be at least bytecapacity
uint8_t *current_string_buf_loc; uint8_t *current_string_buf_loc;
bool isvalid; bool isvalid{false};
private : private :
ParsedJson(const ParsedJson & p) = delete;
// we don't want the default constructor to be called
ParsedJson(const ParsedJson & p) = delete; // we don't want the default constructor to be called
// we don't want the assignment to be called
ParsedJson & operator=(const ParsedJson&o) = delete; ParsedJson & operator=(const ParsedJson&o) = delete;
}; };
@ -234,14 +238,14 @@ private :
// dump bits low to high // dump bits low to high
inline void dumpbits_always(uint64_t v, const std::string &msg) { inline void dumpbits_always(uint64_t v, const std::string &msg) {
for (uint32_t i = 0; i < 64; i++) { for (uint32_t i = 0; i < 64; i++) {
std::cout << (((v >> (uint64_t)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
} }
std::cout << " " << msg.c_str() << "\n"; std::cout << " " << msg.c_str() << "\n";
} }
inline void dumpbits32_always(uint32_t v, const std::string &msg) { inline void dumpbits32_always(uint32_t v, const std::string &msg) {
for (uint32_t i = 0; i < 32; i++) { for (uint32_t i = 0; i < 32; i++) {
std::cout << (((v >> (uint32_t)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
} }
std::cout << " " << msg.c_str() << "\n"; std::cout << " " << msg.c_str() << "\n";
} }

View File

@ -42,8 +42,8 @@ static inline int hamming(uint64_t input_num) {
} }
#else #else
#include <x86intrin.h>
#include <cstdint> #include <cstdint>
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result); return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
@ -86,7 +86,7 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
#else #else
// somehow, if this is used before including "x86intrin.h", it creates an // somehow, if this is used before including "x86intrin.h", it creates an
// implicit defined warning. // implicit defined warning.
if (posix_memalign(&p, alignment, size) != 0) return NULL; if (posix_memalign(&p, alignment, size) != 0) { return nullptr; }
#endif #endif
return p; return p;
} }
@ -114,7 +114,7 @@ static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
static inline void aligned_free(void *memblock) { static inline void aligned_free(void *memblock) {
if(memblock == NULL) return; if(memblock == nullptr) { return; }
#ifdef _MSC_VER #ifdef _MSC_VER
_aligned_free(memblock); _aligned_free(memblock);
#elif defined(__MINGW32__) || defined(__MINGW64__) #elif defined(__MINGW32__) || defined(__MINGW64__)
@ -124,4 +124,4 @@ static inline void aligned_free(void *memblock) {
#endif #endif
} }
#endif /* end of include PORTABILITY_H */ #endif // SIMDJSON_PORTABILITY_H

View File

@ -34989,7 +34989,7 @@ static const unsigned char mask128_epi32[] = {
#ifdef __AVX2__ #ifdef __AVX2__
#include <stdint.h> #include <cstdint>
static const uint32_t mask256_epi32[] = { static const uint32_t mask256_epi32[] = {
0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2, 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,

View File

@ -2,7 +2,7 @@
#ifndef SIMDJSON_SIMDUTF8CHECK_H #ifndef SIMDJSON_SIMDUTF8CHECK_H
#define SIMDJSON_SIMDUTF8CHECK_H #define SIMDJSON_SIMDUTF8CHECK_H
#include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
@ -168,7 +168,7 @@ static struct avx_processed_utf_bytes
avxcheckUTF8Bytes(__m256i current_bytes, avxcheckUTF8Bytes(__m256i current_bytes,
struct avx_processed_utf_bytes *previous, struct avx_processed_utf_bytes *previous,
__m256i *has_error) { __m256i *has_error) {
struct avx_processed_utf_bytes pb; struct avx_processed_utf_bytes pb{};
avx_count_nibbles(current_bytes, &pb); avx_count_nibbles(current_bytes, &pb);
avxcheckSmallerThan0xF4(current_bytes, has_error); avxcheckSmallerThan0xF4(current_bytes, has_error);

View File

@ -9,7 +9,7 @@ bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
WARN_UNUSED WARN_UNUSED
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) { static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
return find_structural_bits((const uint8_t *)buf, len, pj); return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
} }
#endif #endif

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_STAGE34_UNIFIED_H #ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
#define SIMDJSON_STAGE34_UNIFIED_H #define SIMDJSON_STAGE2_BUILD_TAPE_H
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h" #include "simdjson/parsedjson.h"
@ -12,7 +12,7 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
WARN_UNUSED WARN_UNUSED
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) { static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
return unified_machine((const uint8_t *)buf,len,pj); return unified_machine(reinterpret_cast<const uint8_t *>(buf),len,pj);
} }
#endif #endif

View File

@ -2,8 +2,8 @@
#define SIMDJSON_STRINGPARSING_H #define SIMDJSON_STRINGPARSING_H
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
#include "simdjson/jsoncharutils.h" #include "simdjson/jsoncharutils.h"
#include "simdjson/parsedjson.h"
// begin copypasta // begin copypasta
@ -85,11 +85,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
uint8_t *const start_of_string = dst; uint8_t *const start_of_string = dst;
#endif #endif
while (1) { while (1) {
__m256i v = _mm256_loadu_si256((const __m256i *)(src)); __m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
uint32_t bs_bits = auto bs_bits =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))); static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))));
uint32_t quote_bits = auto quote_bits =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))); static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))));
#define CHECKUNESCAPED #define CHECKUNESCAPED
// All Unicode characters may be placed within the // All Unicode characters may be placed within the
// quotation marks, except for the characters that MUST be escaped: // quotation marks, except for the characters that MUST be escaped:
@ -105,7 +105,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
uint32_t bs_dist = trailingzeroes(bs_bits); uint32_t bs_dist = trailingzeroes(bs_bits);
// store to dest unconditionally - we can overwrite the bits we don't like // store to dest unconditionally - we can overwrite the bits we don't like
// later // later
_mm256_storeu_si256((__m256i *)(dst), v); _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
if (quote_dist < bs_dist) { if (quote_dist < bs_dist) {
// we encountered quotes first. Move dst to point to quotes and exit // we encountered quotes first. Move dst to point to quotes and exit
dst[quote_dist] = 0; // null terminate and get out dst[quote_dist] = 0; // null terminate and get out
@ -115,7 +115,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
#ifdef CHECKUNESCAPED #ifdef CHECKUNESCAPED
// check that there is no unescaped char before the quote // check that there is no unescaped char before the quote
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec); auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0; bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
#ifdef JSON_TEST_STRINGS // for unit testing #ifdef JSON_TEST_STRINGS // for unit testing
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1); if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
@ -128,11 +128,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
#endif // JSON_TEST_STRINGS #endif // JSON_TEST_STRINGS
return true; return true;
#endif //CHECKUNESCAPED #endif //CHECKUNESCAPED
} else if (quote_dist > bs_dist) { } if (quote_dist > bs_dist) {
uint8_t escape_char = src[bs_dist + 1]; uint8_t escape_char = src[bs_dist + 1];
#ifdef CHECKUNESCAPED #ifdef CHECKUNESCAPED
// we are going to need the unescaped_bits to check for unescaped chars // we are going to need the unescaped_bits to check for unescaped chars
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec); auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) { if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
#ifdef JSON_TEST_STRINGS // for unit testing #ifdef JSON_TEST_STRINGS // for unit testing
foundBadString(buf + offset); foundBadString(buf + offset);
@ -158,7 +158,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
// note this may reach beyond the part of the buffer we've actually // note this may reach beyond the part of the buffer we've actually
// seen. I think this is ok // seen. I think this is ok
uint8_t escape_result = escape_map[escape_char]; uint8_t escape_result = escape_map[escape_char];
if (!escape_result) { if (escape_result == 0u) {
#ifdef JSON_TEST_STRINGS // for unit testing #ifdef JSON_TEST_STRINGS // for unit testing
foundBadString(buf + offset); foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS #endif // JSON_TEST_STRINGS

View File

@ -1,6 +1,6 @@
#include "simdjson/jsonioutil.h" #include "simdjson/jsonioutil.h"
#include <cstring> #include <cstring>
#include <stdlib.h> #include <cstdlib>
char * allocate_padded_buffer(size_t length) { char * allocate_padded_buffer(size_t length) {
// we could do a simple malloc // we could do a simple malloc
@ -13,18 +13,19 @@ char * allocate_padded_buffer(size_t length) {
#elif defined(__MINGW32__) || defined(__MINGW64__) #elif defined(__MINGW32__) || defined(__MINGW64__)
padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64); padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64);
#else #else
if (posix_memalign((void **)&padded_buffer, 64, totalpaddedlength) != 0) return NULL; if (posix_memalign(reinterpret_cast<void **>(&padded_buffer), 64, totalpaddedlength) != 0) { return nullptr;
}
#endif #endif
return padded_buffer; return padded_buffer;
} }
std::string_view get_corpus(std::string filename) { std::string_view get_corpus(const std::string& filename) {
std::FILE *fp = std::fopen(filename.c_str(), "rb"); std::FILE *fp = std::fopen(filename.c_str(), "rb");
if (fp) { if (fp != nullptr) {
std::fseek(fp, 0, SEEK_END); std::fseek(fp, 0, SEEK_END);
size_t len = std::ftell(fp); size_t len = std::ftell(fp);
char * buf = allocate_padded_buffer(len); char * buf = allocate_padded_buffer(len);
if(buf == NULL) { if(buf == nullptr) {
std::fclose(fp); std::fclose(fp);
throw std::runtime_error("could not allocate memory"); throw std::runtime_error("could not allocate memory");
} }

View File

@ -1,5 +1,5 @@
#include <cstdint>
#include "simdjson/portability.h" #include "simdjson/portability.h"
#include <cstdint>
#ifndef __AVX2__ #ifndef __AVX2__
@ -66,7 +66,7 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi, static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) { __m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask); __m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0); uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask); __m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32); return res_0 | (res_1 << 32);
@ -87,8 +87,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
size_t avxlen = len - 63; size_t avxlen = len - 63;
for (; idx < avxlen; idx += 64) { for (; idx < avxlen; idx += 64) {
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi, uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('\\')); _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t start_edges = bs_bits & ~(bs_bits << 1);
@ -112,7 +112,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
const __m256i low_nibble_mask = _mm256_setr_epi8( const __m256i low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d // 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
@ -138,7 +138,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
__m256i tmp_ws_hi = _mm256_cmpeq_epi8( __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask; whitespace &= ~quote_mask;
@ -151,15 +151,15 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace)); int pop4 = hamming((~whitespace));
__m256i vmask1 = __m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
__m256i vmask2 = __m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1); _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), reinterpret_cast<__m128i *>(out), result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2), _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), reinterpret_cast<__m128i *>(out + pop2),
result2); result2);
out += pop4; out += pop4;
} }
@ -170,8 +170,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint8_t buffer[64]; uint8_t buffer[64];
memset(buffer, 0, 64); memset(buffer, 0, 64);
memcpy(buffer, buf + idx, len - idx); memcpy(buffer, buf + idx, len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
uint64_t bs_bits = uint64_t bs_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\')); cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t start_edges = bs_bits & ~(bs_bits << 1);
@ -213,7 +213,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
__m256i tmp_ws_hi = _mm256_or_si256( __m256i tmp_ws_hi = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_hi), _mm256_cmpeq_epi8(mask_20, input_hi),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi))); _mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask; whitespace &= ~quote_mask;
@ -230,16 +230,16 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace)); int pop4 = hamming((~whitespace));
__m256i vmask1 = __m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
__m256i vmask2 = __m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF), _mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF)); reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer, _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), reinterpret_cast<__m128i *>(buffer),
result1); result1);
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2), _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), reinterpret_cast<__m128i *>(buffer + pop2),
result2); result2);
memcpy(out, buffer, pop4); memcpy(out, buffer, pop4);
out += pop4; out += pop4;

View File

@ -7,10 +7,10 @@
#endif #endif
extern bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded);
extern bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded);
extern ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded);
extern ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded);
// parse a document found in buf, need to preallocate ParsedJson. // parse a document found in buf, need to preallocate ParsedJson.
@ -33,8 +33,9 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
#endif #endif
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) { if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
const uint8_t *tmpbuf = buf; const uint8_t *tmpbuf = buf;
buf = (uint8_t *) allocate_padded_buffer(len); buf = reinterpret_cast<uint8_t *>(allocate_padded_buffer(len));
if(buf == NULL) return false; if(buf == nullptr) { return false;
}
memcpy((void*)buf,tmpbuf,len); memcpy((void*)buf,tmpbuf,len);
reallocated = true; reallocated = true;
} }
@ -43,10 +44,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
if (isok) { if (isok) {
isok = unified_machine(buf, len, pj); isok = unified_machine(buf, len, pj);
} else { } else {
if(reallocated) free((void*)buf); if(reallocated) { free((void*)buf);
}
return false; return false;
} }
if(reallocated) free((void*)buf); if(reallocated) { free((void*)buf);
}
return isok; return isok;
} }

View File

@ -1,34 +1,33 @@
#include "simdjson/parsedjson.h" #include "simdjson/parsedjson.h"
ParsedJson::ParsedJson() : bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0), ParsedJson::ParsedJson() :
current_loc(0), n_structural_indexes(0), structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL), ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
ParsedJson::~ParsedJson() { ParsedJson::~ParsedJson() {
deallocate(); deallocate();
} }
ParsedJson::ParsedJson(ParsedJson && p) ParsedJson::ParsedJson(ParsedJson && p)
: bytecapacity(std::move(p.bytecapacity)), : bytecapacity(p.bytecapacity),
depthcapacity(std::move(p.depthcapacity)), depthcapacity(p.depthcapacity),
tapecapacity(std::move(p.tapecapacity)), tapecapacity(p.tapecapacity),
stringcapacity(std::move(p.stringcapacity)), stringcapacity(p.stringcapacity),
current_loc(std::move(p.current_loc)), current_loc(p.current_loc),
n_structural_indexes(std::move(p.n_structural_indexes)), n_structural_indexes(p.n_structural_indexes),
structural_indexes(std::move(p.structural_indexes)), structural_indexes(p.structural_indexes),
tape(std::move(p.tape)), tape(p.tape),
containing_scope_offset(std::move(p.containing_scope_offset)), containing_scope_offset(p.containing_scope_offset),
ret_address(std::move(p.ret_address)), ret_address(p.ret_address),
string_buf(std::move(p.string_buf)), string_buf(p.string_buf),
current_string_buf_loc(std::move(p.current_string_buf_loc)), current_string_buf_loc(p.current_string_buf_loc),
isvalid(std::move(p.isvalid)) { isvalid(p.isvalid) {
p.structural_indexes=NULL; p.structural_indexes=nullptr;
p.tape=NULL; p.tape=nullptr;
p.containing_scope_offset=NULL; p.containing_scope_offset=nullptr;
p.ret_address=NULL; p.ret_address=nullptr;
p.string_buf=NULL; p.string_buf=nullptr;
p.current_string_buf_loc=NULL; p.current_string_buf_loc=nullptr;
} }
@ -40,8 +39,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
return false; return false;
} }
if (len > 0) { if (len > 0) {
if ((len <= bytecapacity) && (depthcapacity < maxdepth)) if ((len <= bytecapacity) && (depthcapacity < maxdepth)) {
return true; return true;
}
deallocate(); deallocate();
} }
isvalid = false; isvalid = false;
@ -59,14 +59,15 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
#else #else
ret_address = new (std::nothrow) char[maxdepth]; ret_address = new (std::nothrow) char[maxdepth];
#endif #endif
if ((string_buf == NULL) || (tape == NULL) || if ((string_buf == nullptr) || (tape == nullptr) ||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) { (containing_scope_offset == nullptr) || (ret_address == nullptr) || (structural_indexes == nullptr)) {
std::cerr << "Could not allocate memory" << std::endl; std::cerr << "Could not allocate memory" << std::endl;
if(ret_address != NULL) delete[] ret_address; delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset; delete[] containing_scope_offset;
if(tape != NULL) delete[] tape; delete[] tape;
if(string_buf != NULL) delete[] string_buf; delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes; delete[] structural_indexes;
return false; return false;
} }
@ -86,11 +87,16 @@ void ParsedJson::deallocate() {
depthcapacity = 0; depthcapacity = 0;
tapecapacity = 0; tapecapacity = 0;
stringcapacity = 0; stringcapacity = 0;
if(ret_address != NULL) delete[] ret_address; {delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset; }
if(tape != NULL) delete[] tape; {delete[] containing_scope_offset;
if(string_buf != NULL) delete[] string_buf; }
if(structural_indexes != NULL) delete[] structural_indexes; {delete[] tape;
}
{delete[] string_buf;
}
{delete[] structural_indexes;
}
isvalid = false; isvalid = false;
} }
@ -102,7 +108,8 @@ void ParsedJson::init() {
WARN_UNUSED WARN_UNUSED
bool ParsedJson::printjson(std::ostream &os) { bool ParsedJson::printjson(std::ostream &os) {
if(!isvalid) return false; if(!isvalid) { return false;
}
size_t tapeidx = 0; size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx]; uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
@ -120,7 +127,7 @@ bool ParsedJson::printjson(std::ostream &os) {
} }
tapeidx++; tapeidx++;
bool *inobject = new bool[depthcapacity]; bool *inobject = new bool[depthcapacity];
size_t *inobjectidx = new size_t[depthcapacity]; auto *inobjectidx = new size_t[depthcapacity];
int depth = 1; // only root at level 0 int depth = 1; // only root at level 0
inobjectidx[depth] = 0; inobjectidx[depth] = 0;
inobject[depth] = false; inobject[depth] = false;
@ -129,15 +136,18 @@ bool ParsedJson::printjson(std::ostream &os) {
uint64_t payload = tape_val & JSONVALUEMASK; uint64_t payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56); type = (tape_val >> 56);
if (!inobject[depth]) { if (!inobject[depth]) {
if ((inobjectidx[depth] > 0) && (type != ']')) if ((inobjectidx[depth] > 0) && (type != ']')) {
os << ","; os << ",";
}
inobjectidx[depth]++; inobjectidx[depth]++;
} else { // if (inobject) { } else { // if (inobject) {
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) && if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
(type != '}')) (type != '}')) {
os << ","; os << ",";
if (((inobjectidx[depth] & 1) == 1)) }
if (((inobjectidx[depth] & 1) == 1)) {
os << ":"; os << ":";
}
inobjectidx[depth]++; inobjectidx[depth]++;
} }
switch (type) { switch (type) {
@ -147,13 +157,15 @@ bool ParsedJson::printjson(std::ostream &os) {
os << '"'; os << '"';
break; break;
case 'l': // we have a long int case 'l': // we have a long int
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
os << (int64_t)tape[++tapeidx]; }
os << static_cast<int64_t>(tape[++tapeidx]);
break; break;
case 'd': // we have a double case 'd': // we have a double
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
}
double answer; double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer)); memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer; os << answer;
@ -206,7 +218,8 @@ bool ParsedJson::printjson(std::ostream &os) {
WARN_UNUSED WARN_UNUSED
bool ParsedJson::dump_raw_tape(std::ostream &os) { bool ParsedJson::dump_raw_tape(std::ostream &os) {
if(!isvalid) return false; if(!isvalid) { return false;
}
size_t tapeidx = 0; size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx]; uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56); uint8_t type = (tape_val >> 56);
@ -234,14 +247,16 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
os << '\n'; os << '\n';
break; break;
case 'l': // we have a long int case 'l': // we have a long int
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
os << "integer " << (int64_t)tape[++tapeidx] << "\n"; }
os << "integer " << static_cast<int64_t>(tape[++tapeidx]) << "\n";
break; break;
case 'd': // we have a double case 'd': // we have a double
os << "float "; os << "float ";
if (tapeidx + 1 >= howmany) if (tapeidx + 1 >= howmany) {
return false; return false;
}
double answer; double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer)); memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer << '\n'; os << answer << '\n';

View File

@ -1,10 +1,11 @@
#include "simdjson/parsedjson.h" #include "simdjson/parsedjson.h"
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) { ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
if(pj.isValid()) { if(pj.isValid()) {
depthindex = new scopeindex_t[pj.depthcapacity]; depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex == NULL) return; if(depthindex == nullptr) { return;
}
depthindex[0].start_of_scope = location; depthindex[0].start_of_scope = location;
current_val = pj.tape[location++]; current_val = pj.tape[location++];
current_type = (current_val >> 56); current_type = (current_val >> 56);
@ -29,9 +30,9 @@ ParsedJson::iterator::~iterator() {
ParsedJson::iterator::iterator(const iterator &o): ParsedJson::iterator::iterator(const iterator &o):
pj(o.pj), depth(o.depth), location(o.location), pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type), tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(NULL) { current_val(o.current_val), depthindex(nullptr) {
depthindex = new scopeindex_t[pj.depthcapacity]; depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex != NULL) { if(depthindex != nullptr) {
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0])); memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
} else { } else {
tape_length = 0; tape_length = 0;
@ -39,10 +40,10 @@ ParsedJson::iterator::iterator(const iterator &o):
} }
ParsedJson::iterator::iterator(iterator &&o): ParsedJson::iterator::iterator(iterator &&o):
pj(o.pj), depth(std::move(o.depth)), location(std::move(o.location)), pj(o.pj), depth(o.depth), location(o.location),
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)), tape_length(o.tape_length), current_type(o.current_type),
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) { current_val(o.current_val), depthindex(o.depthindex) {
o.depthindex = NULL;// we take ownership o.depthindex = nullptr;// we take ownership
} }
WARN_UNUSED WARN_UNUSED
@ -106,19 +107,21 @@ uint8_t ParsedJson::iterator::get_type() const {
int64_t ParsedJson::iterator::get_integer() const { int64_t ParsedJson::iterator::get_integer() const {
if(location + 1 >= tape_length) return 0;// default value in case of error if(location + 1 >= tape_length) { return 0;// default value in case of error
return (int64_t) pj.tape[location + 1]; }
return static_cast<int64_t>(pj.tape[location + 1]);
} }
double ParsedJson::iterator::get_double() const { double ParsedJson::iterator::get_double() const {
if(location + 1 >= tape_length) return NAN;// default value in case of error if(location + 1 >= tape_length) { return NAN;// default value in case of error
}
double answer; double answer;
memcpy(&answer, & pj.tape[location + 1], sizeof(answer)); memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
return answer; return answer;
} }
const char * ParsedJson::iterator::get_string() const { const char * ParsedJson::iterator::get_string() const {
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ; return reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)) ;
} }
@ -156,7 +159,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
assert(is_string()); assert(is_string());
bool rightkey = (strcmp(get_string(),key)==0); bool rightkey = (strcmp(get_string(),key)==0);
next(); next();
if(rightkey) return true; if(rightkey) { return true;
}
} while(next()); } while(next());
assert(up());// not found assert(up());// not found
} }
@ -180,9 +184,10 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
current_val = nextval; current_val = nextval;
current_type = nexttype; current_type = nexttype;
return true; return true;
} else { }
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1; size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
if(location + increment >= tape_length) return false; if(location + increment >= tape_length) { return false;
}
uint64_t nextval = pj.tape[location + increment]; uint64_t nextval = pj.tape[location + increment];
uint8_t nexttype = (nextval >> 56); uint8_t nexttype = (nextval >> 56);
if((nexttype == ']') || (nexttype == '}')) { if((nexttype == ']') || (nexttype == '}')) {
@ -192,12 +197,13 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
current_val = nextval; current_val = nextval;
current_type = nexttype; current_type = nexttype;
return true; return true;
}
} }
bool ParsedJson::iterator::prev() { bool ParsedJson::iterator::prev() {
if(location - 1 < depthindex[depth].start_of_scope) return false; if(location - 1 < depthindex[depth].start_of_scope) { return false;
}
location -= 1; location -= 1;
current_val = pj.tape[location]; current_val = pj.tape[location];
current_type = (current_val >> 56); current_type = (current_val >> 56);
@ -230,7 +236,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
bool ParsedJson::iterator::down() { bool ParsedJson::iterator::down() {
if(location + 1 >= tape_length) return false; if(location + 1 >= tape_length) { return false;
}
if ((current_type == '[') || (current_type == '{')) { if ((current_type == '[') || (current_type == '{')) {
size_t npos = (current_val & JSONVALUEMASK); size_t npos = (current_val & JSONVALUEMASK);
if(npos == location + 2) { if(npos == location + 2) {
@ -254,7 +261,8 @@ void ParsedJson::iterator::to_start_scope() {
} }
bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const { bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
if(!isOk()) return false; if(!isOk()) { return false;
}
switch (current_type) { switch (current_type) {
case '"': // we have a string case '"': // we have a string
os << '"'; os << '"';
@ -284,7 +292,7 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
case '}': // we end an object case '}': // we end an object
case '[': // we start an array case '[': // we start an array
case ']': // we end an array case ']': // we end an array
os << (char) current_type; os << static_cast<char>(current_type);
break; break;
default: default:
return false; return false;

View File

@ -1,7 +1,7 @@
#include "simdjson/portability.h" #include "simdjson/portability.h"
#include <cassert>
#include "simdjson/common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h" #include "simdjson/parsedjson.h"
#include <cassert>
#ifndef SIMDJSON_SKIPUTF8VALIDATION #ifndef SIMDJSON_SKIPUTF8VALIDATION
#define SIMDJSON_UTF8VALIDATE #define SIMDJSON_UTF8VALIDATE
@ -21,7 +21,7 @@ using namespace std;
really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi, really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi,
__m256i mask) { __m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask); __m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0); uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask); __m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32); return res_0 | (res_1 << 32);
@ -38,7 +38,7 @@ WARN_UNUSED
uint32_t base = 0; uint32_t base = 0;
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
__m256i has_error = _mm256_setzero_si256(); __m256i has_error = _mm256_setzero_si256();
struct avx_processed_utf_bytes previous; struct avx_processed_utf_bytes previous{};
previous.rawbytes = _mm256_setzero_si256(); previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256(); previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256(); previous.carried_continuations = _mm256_setzero_si256();
@ -66,8 +66,8 @@ WARN_UNUSED
#ifndef _MSC_VER #ifndef _MSC_VER
__builtin_prefetch(buf + idx + 128); __builtin_prefetch(buf + idx + 128);
#endif #endif
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
__m256i highbit = _mm256_set1_epi8(0x80); __m256i highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) { if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
@ -130,29 +130,29 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals); uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt; uint32_t next_base = base + cnt;
while (structurals) { while (structurals != 0u) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base += 8; base += 8;
} }
base = next_base; base = next_base;
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
// How do we build up a user traversable data structure // How do we build up a user traversable data structure
// first, do a 'shufti' to detect structural JSON characters // first, do a 'shufti' to detect structural JSON characters
@ -190,7 +190,7 @@ WARN_UNUSED
__m256i tmp_hi = _mm256_cmpeq_epi8( __m256i tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo); uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi); uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
structurals = ~(structural_res_0 | (structural_res_1 << 32)); structurals = ~(structural_res_0 | (structural_res_1 << 32));
@ -201,7 +201,7 @@ WARN_UNUSED
__m256i tmp_ws_hi = _mm256_cmpeq_epi8( __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
// mask off anything inside quotes // mask off anything inside quotes
@ -244,8 +244,8 @@ WARN_UNUSED
uint8_t tmpbuf[64]; uint8_t tmpbuf[64];
memset(tmpbuf,0x20,64); memset(tmpbuf,0x20,64);
memcpy(tmpbuf,buf+idx,len - idx); memcpy(tmpbuf,buf+idx,len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(tmpbuf + 0)); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(tmpbuf + 32)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 32));
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
__m256i highbit = _mm256_set1_epi8(0x80); __m256i highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) { if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
@ -308,22 +308,22 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals); uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt; uint32_t next_base = base + cnt;
while (structurals) { while (structurals != 0u) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base += 8; base += 8;
} }
@ -364,7 +364,7 @@ WARN_UNUSED
__m256i tmp_hi = _mm256_cmpeq_epi8( __m256i tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo); uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi); uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
structurals = ~(structural_res_0 | (structural_res_1 << 32)); structurals = ~(structural_res_0 | (structural_res_1 << 32));
@ -375,7 +375,7 @@ WARN_UNUSED
__m256i tmp_ws_hi = _mm256_cmpeq_epi8( __m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo); uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
@ -412,22 +412,22 @@ WARN_UNUSED
} }
uint32_t cnt = hamming(structurals); uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt; uint32_t next_base = base + cnt;
while (structurals) { while (structurals != 0u) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1); structurals = structurals & (structurals - 1);
base += 8; base += 8;
} }
@ -435,7 +435,7 @@ WARN_UNUSED
pj.n_structural_indexes = base; pj.n_structural_indexes = base;
// a valid JSON file cannot have zero structural indexes - we should have found something // a valid JSON file cannot have zero structural indexes - we should have found something
if (!pj.n_structural_indexes) { if (pj.n_structural_indexes == 0u) {
return false; return false;
} }
if(base_ptr[pj.n_structural_indexes-1] > len) { if(base_ptr[pj.n_structural_indexes-1] > len) {
@ -449,7 +449,7 @@ WARN_UNUSED
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
#ifdef SIMDJSON_UTF8VALIDATE #ifdef SIMDJSON_UTF8VALIDATE
return _mm256_testz_si256(has_error, has_error); return _mm256_testz_si256(has_error, has_error) != 0;
#else #else
return true; return true;
#endif #endif

View File

@ -22,7 +22,7 @@ using namespace std;
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_true_atom(const uint8_t *loc) { really_inline bool is_valid_true_atom(const uint8_t *loc) {
uint64_t tv = *(const uint64_t *)"true "; uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
uint64_t mask4 = 0x00000000ffffffff; uint64_t mask4 = 0x00000000ffffffff;
uint32_t error = 0; uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -34,7 +34,7 @@ really_inline bool is_valid_true_atom(const uint8_t *loc) {
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_false_atom(const uint8_t *loc) { really_inline bool is_valid_false_atom(const uint8_t *loc) {
uint64_t fv = *(const uint64_t *)"false "; uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
uint64_t mask5 = 0x000000ffffffffff; uint64_t mask5 = 0x000000ffffffffff;
uint32_t error = 0; uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -46,7 +46,7 @@ really_inline bool is_valid_false_atom(const uint8_t *loc) {
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_null_atom(const uint8_t *loc) { really_inline bool is_valid_null_atom(const uint8_t *loc) {
uint64_t nv = *(const uint64_t *)"null "; uint64_t nv = *reinterpret_cast<const uint64_t *>("null ");
uint64_t mask4 = 0x00000000ffffffff; uint64_t mask4 = 0x00000000ffffffff;
uint32_t error = 0; uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -141,11 +141,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the true value. // this only applies to the JSON document made solely of the true value.
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!is_valid_true_atom((const uint8_t *)copy + idx)) { if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -157,11 +158,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the false value. // this only applies to the JSON document made solely of the false value.
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!is_valid_false_atom((const uint8_t *)copy + idx)) { if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -173,11 +175,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the null value. // this only applies to the JSON document made solely of the null value.
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!is_valid_null_atom((const uint8_t *)copy + idx)) { if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -198,11 +201,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this is done only for JSON documents made of a sole number // this is done only for JSON documents made of a sole number
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!parse_number((const uint8_t *)copy, pj, idx, false)) { if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
free(copy); free(copy);
goto fail; goto fail;
} }
@ -213,11 +217,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated. // we need to make a copy to make sure that the string is NULL terminated.
// this is done only for JSON documents made of a sole number // this is done only for JSON documents made of a sole number
// this will almost never be called in practice // this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING); char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == NULL) goto fail; if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len); memcpy(copy, buf, len);
copy[len] = '\0'; copy[len] = '\0';
if (!parse_number((const uint8_t *)copy, pj, idx, true)) { if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
free(copy); free(copy);
goto fail; goto fail;
} }

View File

@ -106,18 +106,17 @@ int main(int argc, char *argv[]) {
void *state; void *state;
bool ultrajson_correct = ((UJDecode(buffer, p.size(), NULL, &state) == NULL) == false); bool ultrajson_correct = ((UJDecode(buffer, p.size(), NULL, &state) == NULL) == false);
jsmntok_t * tokens = new jsmntok_t[p.size()]; auto * tokens = make_unique<jsmntok_t[](p.size());
bool jsmn_correct = false; bool jsmn_correct = false;
if(tokens == NULL) { if(tokens == nullptr) {
printf("Failed to alloc memory for jsmn\n"); printf("Failed to alloc memory for jsmn\n");
} else { } else {
jsmn_parser parser; jsmn_parser parser;
jsmn_init(&parser); jsmn_init(&parser);
memcpy(buffer, p.data(), p.size()); memcpy(buffer, p.data(), p.size());
buffer[p.size()] = '\0'; buffer[p.size()] = '\0';
int r = jsmn_parse(&parser, buffer, p.size(), tokens, p.size()); int r = jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size());
delete[] tokens; tokens = nullptr;
tokens = NULL;
jsmn_correct = (r > 0); jsmn_correct = (r > 0);
} }

View File

@ -1,4 +1,4 @@
#include <assert.h> #include <cassert>
#include <cstring> #include <cstring>
#ifndef _MSC_VER #ifndef _MSC_VER
#include <dirent.h> #include <dirent.h>
@ -7,10 +7,10 @@
// Microsoft can't be bothered to provide standard utils. // Microsoft can't be bothered to provide standard utils.
#include <dirent_portable.h> #include <dirent_portable.h>
#endif #endif
#include <inttypes.h> #include <cinttypes>
#include <stdbool.h>
#include <stdio.h> #include <cstdio>
#include <stdlib.h> #include <cstdlib>
#include "simdjson/jsonparser.h" #include "simdjson/jsonparser.h"
@ -19,7 +19,7 @@
*/ */
static bool hasExtension(const char *filename, const char *extension) { static bool hasExtension(const char *filename, const char *extension) {
const char *ext = strrchr(filename, '.'); const char *ext = strrchr(filename, '.');
return (ext && !strcmp(ext, extension)); return ((ext != nullptr) && (strcmp(ext, extension) == 0));
} }
bool startsWith(const char *pre, const char *str) { bool startsWith(const char *pre, const char *str) {
@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
} }
bool contains(const char *pre, const char *str) { bool contains(const char *pre, const char *str) {
return (strstr(str, pre) != NULL); return (strstr(str, pre) != nullptr);
} }
@ -37,7 +37,7 @@ bool validate(const char *dirname) {
const char *extension = ".json"; const char *extension = ".json";
size_t dirlen = strlen(dirname); size_t dirlen = strlen(dirname);
struct dirent **entry_list; struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort); int c = scandir(dirname, &entry_list, nullptr, alphasort);
if (c < 0) { if (c < 0) {
fprintf(stderr, "error accessing %s \n", dirname); fprintf(stderr, "error accessing %s \n", dirname);
return false; return false;
@ -47,16 +47,17 @@ bool validate(const char *dirname) {
return false; return false;
} }
bool * isfileasexpected = new bool[c]; bool * isfileasexpected = new bool[c];
for(int i = 0; i < c; i++) isfileasexpected[i] = true; for(int i = 0; i < c; i++) { isfileasexpected[i] = true;
}
size_t howmany = 0; size_t howmany = 0;
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/'); bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
for (int i = 0; i < c; i++) { for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name; const char *name = entry_list[i]->d_name;
if (hasExtension(name, extension)) { if (hasExtension(name, extension)) {
printf("validating: file %s ", name); printf("validating: file %s ", name);
fflush(NULL); fflush(nullptr);
size_t filelen = strlen(name); size_t filelen = strlen(name);
char *fullpath = (char *)malloc(dirlen + filelen + 1 + 1); char *fullpath = static_cast<char *>(malloc(dirlen + filelen + 1 + 1));
strcpy(fullpath, dirname); strcpy(fullpath, dirname);
if (needsep) { if (needsep) {
fullpath[dirlen] = '/'; fullpath[dirlen] = '/';
@ -106,11 +107,13 @@ bool validate(const char *dirname) {
} else { } else {
fprintf(stderr, "There were problems! Consider reviewing the following files:\n"); fprintf(stderr, "There were problems! Consider reviewing the following files:\n");
for(int i = 0; i < c; i++) { for(int i = 0; i < c; i++) {
if(!isfileasexpected[i]) fprintf(stderr, "%s \n", entry_list[i]->d_name); if(!isfileasexpected[i]) { fprintf(stderr, "%s \n", entry_list[i]->d_name);
}
} }
} }
for (int i = 0; i < c; ++i) for (int i = 0; i < c; ++i) {
free(entry_list[i]); free(entry_list[i]);
}
free(entry_list); free(entry_list);
delete[] isfileasexpected; delete[] isfileasexpected;
return everythingfine; return everythingfine;

View File

@ -48,7 +48,7 @@ int main(int argc, char *argv[]) {
#ifndef _MSC_VER #ifndef _MSC_VER
int c; int c;
while ((c = getopt(argc, argv, "da")) != -1) while ((c = getopt(argc, argv, "da")) != -1) {
switch (c) { switch (c) {
case 'd': case 'd':
rawdump = true; rawdump = true;
@ -59,6 +59,7 @@ int main(int argc, char *argv[]) {
default: default:
abort(); abort();
} }
}
#else #else
int optind = 1; int optind = 1;
#endif #endif

View File

@ -39,7 +39,7 @@ struct stat_s {
bool valid; bool valid;
}; };
typedef struct stat_s stat_t; using stat_t = struct stat_s;
@ -50,8 +50,8 @@ stat_t simdjson_computestats(const std::string_view &p) {
if (!answer.valid) { if (!answer.valid) {
return answer; return answer;
} }
answer.backslash_count = count_backslash((const uint8_t*)p.data(), p.size()); answer.backslash_count = count_backslash(reinterpret_cast<const uint8_t*>(p.data()), p.size());
answer.nonasciibyte_count = count_nonasciibytes((const uint8_t*)p.data(), p.size()); answer.nonasciibyte_count = count_nonasciibytes(reinterpret_cast<const uint8_t*>(p.data()), p.size());
answer.byte_count = p.size(); answer.byte_count = p.size();
answer.integer_count = 0; answer.integer_count = 0;
answer.float_count = 0; answer.float_count = 0;

View File

@ -16,7 +16,7 @@ int main(int argc, char *argv[]) {
std::cout << "Could not load the file " << filename << std::endl; std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE; return EXIT_FAILURE;
} }
jsonminify(p, (char *)p.data()); jsonminify(p, const_cast<char *>(p.data()));
printf("%s",p.data()); printf("%s",p.data());
aligned_free((void*)p.data()); aligned_free((void*)p.data());
} }