Merge pull request #61 from NewProggie/fix_minor_problems

Fix minor problems
This commit is contained in:
geofflangdale 2019-02-26 20:50:03 +11:00 committed by GitHub
commit bdc2bc693f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 311 additions and 262 deletions

View File

@ -1,17 +1,17 @@
#include <assert.h>
#include <ctype.h>
#include <cassert>
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#include <unistd.h>
#include <x86intrin.h>
#include <dirent.h>
#else
#include <intrin.h>
#endif
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <chrono>
@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
#ifndef _MSC_VER
int c;
while ((c = getopt(argc, argv, "1vdt")) != -1)
while ((c = getopt(argc, argv, "1vdt")) != -1) {
switch (c) {
case 't':
justdata = true;
@ -67,6 +67,7 @@ int main(int argc, char *argv[]) {
default:
abort();
}
}
#else
int optind = 1;
#endif
@ -78,8 +79,9 @@ int main(int argc, char *argv[]) {
if (optind + 1 < argc) {
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
}
if (verbose)
if (verbose) {
cout << "[verbose] loading " << filename << endl;
}
std::string_view p;
try {
p = get_corpus(filename);
@ -87,9 +89,10 @@ int main(int argc, char *argv[]) {
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
if (verbose)
if (verbose) {
cout << "[verbose] loaded " << filename << " (" << p.size() << " bytes)"
<< endl;
}
#if defined(DEBUG)
const uint32_t iterations = 1;
#else
@ -125,8 +128,9 @@ int main(int argc, char *argv[]) {
bool isok = true;
for (uint32_t i = 0; i < iterations; i++) {
if (verbose)
if (verbose) {
cout << "[verbose] iteration # " << i << endl;
}
#ifndef SQUASH_COUNTERS
unified.start();
#endif
@ -144,8 +148,9 @@ int main(int argc, char *argv[]) {
cref0 += results[3];
cmis0 += results[4];
#endif
if (verbose)
if (verbose) {
cout << "[verbose] allocated memory for parsed JSON " << endl;
}
auto start = std::chrono::steady_clock::now();
#ifndef SQUASH_COUNTERS
@ -248,10 +253,11 @@ int main(int argc, char *argv[]) {
}
#endif
double min_result = *min_element(res.begin(), res.end());
if (!justdata)
if (!justdata) {
cout << "Min: " << min_result << " bytes read: " << p.size()
<< " Gigabytes/second: " << (p.size()) / (min_result * 1000000000.0)
<< "\n";
}
if (jsonoutput) {
isok = isok && pj.printjson(std::cout);
}

View File

@ -7,6 +7,8 @@
#endif //__linux__
#endif // _MSC_VER
#include <memory>
#include "benchmark.h"
@ -225,7 +227,7 @@ int main(int argc, char *argv[]) {
jsmntok_t * tokens = new jsmntok_t[p.size()];
auto * tokens = make_unique<jsmntok_t[](p.size());
if(tokens == NULL) {
printf("Failed to alloc memory for jsmn\n");
} else {
@ -234,9 +236,8 @@ int main(int argc, char *argv[]) {
memcpy(buffer, p.data(), p.size());
buffer[p.size()] = '\0';
BEST_TIME("jsmn ",
(jsmn_parse(&parser, buffer, p.size(), tokens, p.size()) > 0), true,
(jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size()) > 0), true,
jsmn_init(&parser), repeat, volume, !justdata);
delete[] tokens;
}
memcpy(buffer, p.data(), p.size());

View File

@ -42,7 +42,7 @@ struct stat_s {
bool valid;
};
typedef struct stat_s stat_t;
using stat_t = struct stat_s;
stat_t simdjson_computestats(const std::string_view &p) {
stat_t answer;
@ -51,9 +51,9 @@ stat_t simdjson_computestats(const std::string_view &p) {
if (!answer.valid) {
return answer;
}
answer.backslash_count = count_backslash((const uint8_t *)p.data(), p.size());
answer.backslash_count = count_backslash(reinterpret_cast<const uint8_t *>(p.data()), p.size());
answer.nonasciibyte_count =
count_nonasciibytes((const uint8_t *)p.data(), p.size());
count_nonasciibytes(reinterpret_cast<const uint8_t *>(p.data()), p.size());
answer.byte_count = p.size();
answer.integer_count = 0;
answer.float_count = 0;
@ -115,12 +115,13 @@ stat_t simdjson_computestats(const std::string_view &p) {
int main(int argc, char *argv[]) {
#ifndef _MSC_VER
int c;
while ((c = getopt(argc, argv, "")) != -1)
while ((c = getopt(argc, argv, "")) != -1) {
switch (c) {
default:
abort();
}
}
#else
int optind = 1;
#endif

View File

@ -56,4 +56,4 @@
#endif // MSC_VER
#endif // COMMON_DEFS_H
#endif // SIMDJSON_COMMON_DEFS_H

View File

@ -97,7 +97,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
if (cp <= 0x7F) {
c[0] = cp;
return 1; // ascii
} else if (cp <= 0x7FF) {
} if (cp <= 0x7FF) {
c[0] = (cp >> 6) + 192;
c[1] = (cp & 63) + 128;
return 2; // universal plane

View File

@ -1,12 +1,12 @@
#ifndef SIMDJSON_JSONFORMATUTILS_H
#define SIMDJSON_JSONFORMATUTILS_H
#include <stdio.h>
#include <iostream>
#include <cstdio>
#include <iomanip>
#include <iostream>
static inline void print_with_escapes(const unsigned char *src) {
while (*src) {
while (*src != 0u) {
switch (*src) {
case '\b':
putchar('\\');
@ -39,15 +39,16 @@ static inline void print_with_escapes(const unsigned char *src) {
default:
if (*src <= 0x1F) {
printf("\\u%04x", *src);
} else
} else {
putchar(*src);
}
}
src++;
}
}
static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
while (*src) {
while (*src != 0u) {
switch (*src) {
case '\b':
os << '\\';
@ -80,17 +81,18 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
default:
if (*src <= 0x1F) {
std::ios::fmtflags f(os.flags());
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src;
os << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*src);
os.flags(f);
} else
} else {
os << *src;
}
}
src++;
}
}
static inline void print_with_escapes(const char *src, std::ostream &os) {
print_with_escapes((const unsigned char *)src, os);
print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
}
#endif

View File

@ -1,12 +1,12 @@
#ifndef SIMDJSON_JSONIOUTIL_H
#define SIMDJSON_JSONIOUTIL_H
#include "simdjson/common_defs.h"
#include <exception>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include "simdjson/common_defs.h"
// low-level function to allocate memory with padding so we can read passed the "length" bytes
@ -34,7 +34,7 @@ char * allocate_padded_buffer(size_t length);
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio
// std::cout << "Could not load the file " << filename << std::endl;
// }
std::string_view get_corpus(std::string filename);
std::string_view get_corpus(const std::string& filename);
#endif

View File

@ -11,7 +11,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
static inline size_t jsonminify(const char *buf, size_t len, char *out) {
return jsonminify((const uint8_t *)buf, len, (uint8_t *)out);
return jsonminify(reinterpret_cast<const uint8_t *>(buf), len, reinterpret_cast<uint8_t *>(out));
}

View File

@ -31,7 +31,7 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
// all bytes at and after buf + len are ignored (can be garbage).
WARN_UNUSED
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
return json_parse((const uint8_t *) buf, len, pj, reallocifneeded);
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
}
// Parse a document found in buf, need to preallocate ParsedJson.
@ -66,7 +66,7 @@ WARN_UNUSED
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after buf + len are ignored (can be garbage).
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
return build_parsed_json((const uint8_t *) buf, len, reallocifneeded);
return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len, reallocifneeded);
}
// convenience function

View File

@ -1,10 +1,10 @@
#ifndef SIMDJSON_NUMBERPARSING_H
#define SIMDJSON_NUMBERPARSING_H
#include "simdjson/portability.h"
#include "simdjson/common_defs.h"
#include "simdjson/jsoncharutils.h"
#include "simdjson/parsedjson.h"
#include "simdjson/portability.h"
static const double power_of_ten[] = {
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
@ -141,7 +141,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
const __m128i mul_1_10000 =
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
const __m128i input = _mm_sub_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
const __m128i t3 = _mm_packus_epi32(t2, t2);
@ -166,7 +166,7 @@ static never_inline bool
parse_float(const uint8_t *const buf,
ParsedJson &pj, const uint32_t offset,
bool found_minus) {
const char *p = (const char *)(buf + offset);
const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false;
if (found_minus) {
++p;
@ -280,7 +280,7 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
ParsedJson &pj,
const uint32_t offset,
bool found_minus) {
const char *p = (const char *)(buf + offset);
const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false;
if (found_minus) {
@ -352,7 +352,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
pj.write_tape_s64(0); // always write zero
return true; // always succeeds
#else
const char *p = (const char *)(buf + offset);
const char *p = reinterpret_cast<const char *>(buf + offset);
bool negative = false;
if (found_minus) {
++p;

View File

@ -1,14 +1,15 @@
#ifndef SIMDJSON_PARSEDJSON_H
#define SIMDJSON_PARSEDJSON_H
#include <cinttypes>
#include <cmath>
#include <cstring>
#include <iomanip>
#include <iostream>
#include "simdjson/portability.h"
#include "simdjson/jsonformatutils.h"
#include "simdjson/common_defs.h"
#include "simdjson/jsonformatutils.h"
#include "simdjson/portability.h"
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF
@ -67,12 +68,12 @@ public:
// this should be considered a private function
really_inline void write_tape(uint64_t val, uint8_t c) {
tape[current_loc++] = val | (((uint64_t)c) << 56);
tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
}
really_inline void write_tape_s64(int64_t i) {
write_tape(0, 'l');
tape[current_loc++] = *((uint64_t *)&i);
tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
}
really_inline void write_tape_double(double d) {
@ -192,7 +193,7 @@ public:
private:
iterator& operator=(const iterator& other) ;
iterator& operator=(const iterator& other) = delete ;
ParsedJson &pj;
size_t depth;
@ -203,13 +204,13 @@ private:
scopeindex_t *depthindex;
};
size_t bytecapacity; // indicates how many bits are meant to be supported
size_t bytecapacity{0}; // indicates how many bits are meant to be supported
size_t depthcapacity; // how deep we can go
size_t tapecapacity;
size_t stringcapacity;
uint32_t current_loc;
uint32_t n_structural_indexes;
size_t depthcapacity{0}; // how deep we can go
size_t tapecapacity{0};
size_t stringcapacity{0};
uint32_t current_loc{0};
uint32_t n_structural_indexes{0};
uint32_t *structural_indexes;
@ -223,10 +224,13 @@ private:
uint8_t *string_buf; // should be at least bytecapacity
uint8_t *current_string_buf_loc;
bool isvalid;
bool isvalid{false};
private :
ParsedJson(const ParsedJson & p) = delete;
// we don't want the default constructor to be called
ParsedJson(const ParsedJson & p) = delete; // we don't want the default constructor to be called
// we don't want the assignment to be called
ParsedJson & operator=(const ParsedJson&o) = delete;
};
@ -234,14 +238,14 @@ private :
// dump bits low to high
inline void dumpbits_always(uint64_t v, const std::string &msg) {
for (uint32_t i = 0; i < 64; i++) {
std::cout << (((v >> (uint64_t)i) & 0x1ULL) ? "1" : "_");
std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg.c_str() << "\n";
}
inline void dumpbits32_always(uint32_t v, const std::string &msg) {
for (uint32_t i = 0; i < 32; i++) {
std::cout << (((v >> (uint32_t)i) & 0x1ULL) ? "1" : "_");
std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg.c_str() << "\n";
}

View File

@ -42,8 +42,8 @@ static inline int hamming(uint64_t input_num) {
}
#else
#include <x86intrin.h>
#include <cstdint>
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
@ -86,7 +86,7 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
#else
// somehow, if this is used before including "x86intrin.h", it creates an
// implicit defined warning.
if (posix_memalign(&p, alignment, size) != 0) return NULL;
if (posix_memalign(&p, alignment, size) != 0) { return nullptr; }
#endif
return p;
}
@ -114,7 +114,7 @@ static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
static inline void aligned_free(void *memblock) {
if(memblock == NULL) return;
if(memblock == nullptr) { return; }
#ifdef _MSC_VER
_aligned_free(memblock);
#elif defined(__MINGW32__) || defined(__MINGW64__)
@ -124,4 +124,4 @@ static inline void aligned_free(void *memblock) {
#endif
}
#endif /* end of include PORTABILITY_H */
#endif // SIMDJSON_PORTABILITY_H

View File

@ -34989,7 +34989,7 @@ static const unsigned char mask128_epi32[] = {
#ifdef __AVX2__
#include <stdint.h>
#include <cstdint>
static const uint32_t mask256_epi32[] = {
0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,

View File

@ -2,7 +2,7 @@
#ifndef SIMDJSON_SIMDUTF8CHECK_H
#define SIMDJSON_SIMDUTF8CHECK_H
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@ -168,7 +168,7 @@ static struct avx_processed_utf_bytes
avxcheckUTF8Bytes(__m256i current_bytes,
struct avx_processed_utf_bytes *previous,
__m256i *has_error) {
struct avx_processed_utf_bytes pb;
struct avx_processed_utf_bytes pb{};
avx_count_nibbles(current_bytes, &pb);
avxcheckSmallerThan0xF4(current_bytes, has_error);

View File

@ -9,7 +9,7 @@ bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
WARN_UNUSED
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
return find_structural_bits((const uint8_t *)buf, len, pj);
return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
}
#endif

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_STAGE34_UNIFIED_H
#define SIMDJSON_STAGE34_UNIFIED_H
#ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
#define SIMDJSON_STAGE2_BUILD_TAPE_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
@ -12,7 +12,7 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
WARN_UNUSED
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
return unified_machine((const uint8_t *)buf,len,pj);
return unified_machine(reinterpret_cast<const uint8_t *>(buf),len,pj);
}
#endif

View File

@ -2,8 +2,8 @@
#define SIMDJSON_STRINGPARSING_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
#include "simdjson/jsoncharutils.h"
#include "simdjson/parsedjson.h"
// begin copypasta
@ -85,11 +85,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
uint8_t *const start_of_string = dst;
#endif
while (1) {
__m256i v = _mm256_loadu_si256((const __m256i *)(src));
uint32_t bs_bits =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
uint32_t quote_bits =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
__m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
auto bs_bits =
static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))));
auto quote_bits =
static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))));
#define CHECKUNESCAPED
// All Unicode characters may be placed within the
// quotation marks, except for the characters that MUST be escaped:
@ -105,7 +105,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
uint32_t bs_dist = trailingzeroes(bs_bits);
// store to dest unconditionally - we can overwrite the bits we don't like
// later
_mm256_storeu_si256((__m256i *)(dst), v);
_mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
if (quote_dist < bs_dist) {
// we encountered quotes first. Move dst to point to quotes and exit
dst[quote_dist] = 0; // null terminate and get out
@ -115,7 +115,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
#ifdef CHECKUNESCAPED
// check that there is no unescaped char before the quote
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
#ifdef JSON_TEST_STRINGS // for unit testing
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
@ -128,11 +128,11 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
#endif // JSON_TEST_STRINGS
return true;
#endif //CHECKUNESCAPED
} else if (quote_dist > bs_dist) {
} if (quote_dist > bs_dist) {
uint8_t escape_char = src[bs_dist + 1];
#ifdef CHECKUNESCAPED
// we are going to need the unescaped_bits to check for unescaped chars
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
auto unescaped_bits = static_cast<uint32_t>(_mm256_movemask_epi8(unescaped_vec));
if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
#ifdef JSON_TEST_STRINGS // for unit testing
foundBadString(buf + offset);
@ -158,7 +158,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
// note this may reach beyond the part of the buffer we've actually
// seen. I think this is ok
uint8_t escape_result = escape_map[escape_char];
if (!escape_result) {
if (escape_result == 0u) {
#ifdef JSON_TEST_STRINGS // for unit testing
foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS

View File

@ -1,6 +1,6 @@
#include "simdjson/jsonioutil.h"
#include <cstring>
#include <stdlib.h>
#include <cstdlib>
char * allocate_padded_buffer(size_t length) {
// we could do a simple malloc
@ -13,18 +13,19 @@ char * allocate_padded_buffer(size_t length) {
#elif defined(__MINGW32__) || defined(__MINGW64__)
padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64);
#else
if (posix_memalign((void **)&padded_buffer, 64, totalpaddedlength) != 0) return NULL;
if (posix_memalign(reinterpret_cast<void **>(&padded_buffer), 64, totalpaddedlength) != 0) { return nullptr;
}
#endif
return padded_buffer;
}
std::string_view get_corpus(std::string filename) {
std::string_view get_corpus(const std::string& filename) {
std::FILE *fp = std::fopen(filename.c_str(), "rb");
if (fp) {
if (fp != nullptr) {
std::fseek(fp, 0, SEEK_END);
size_t len = std::ftell(fp);
char * buf = allocate_padded_buffer(len);
if(buf == NULL) {
if(buf == nullptr) {
std::fclose(fp);
throw std::runtime_error("could not allocate memory");
}

View File

@ -1,5 +1,5 @@
#include <cstdint>
#include "simdjson/portability.h"
#include <cstdint>
#ifndef __AVX2__
@ -66,7 +66,7 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
@ -87,8 +87,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
size_t avxlen = len - 63;
for (; idx < avxlen; idx += 64) {
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
@ -112,7 +112,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63);// might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University
const __m256i low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
@ -138,7 +138,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
@ -151,15 +151,15 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), reinterpret_cast<__m128i *>(out), result1);
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), reinterpret_cast<__m128i *>(out + pop2),
result2);
out += pop4;
}
@ -170,8 +170,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint8_t buffer[64];
memset(buffer, 0, 64);
memcpy(buffer, buf + idx, len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
uint64_t bs_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
@ -213,7 +213,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
__m256i tmp_ws_hi = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_hi),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
@ -230,16 +230,16 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = hamming((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
_mm256_loadu2_m128i(reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), reinterpret_cast<__m128i *>(buffer),
result1);
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), reinterpret_cast<__m128i *>(buffer + pop2),
result2);
memcpy(out, buffer, pop4);
out += pop4;

View File

@ -7,10 +7,10 @@
#endif
extern bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded);
extern bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded);
extern ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded);
extern ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded);
// parse a document found in buf, need to preallocate ParsedJson.
@ -33,8 +33,9 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
#endif
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
const uint8_t *tmpbuf = buf;
buf = (uint8_t *) allocate_padded_buffer(len);
if(buf == NULL) return false;
buf = reinterpret_cast<uint8_t *>(allocate_padded_buffer(len));
if(buf == nullptr) { return false;
}
memcpy((void*)buf,tmpbuf,len);
reallocated = true;
}
@ -43,10 +44,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
if (isok) {
isok = unified_machine(buf, len, pj);
} else {
if(reallocated) free((void*)buf);
if(reallocated) { free((void*)buf);
}
return false;
}
if(reallocated) free((void*)buf);
if(reallocated) { free((void*)buf);
}
return isok;
}

View File

@ -1,34 +1,33 @@
#include "simdjson/parsedjson.h"
ParsedJson::ParsedJson() : bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
current_loc(0), n_structural_indexes(0),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
ParsedJson::ParsedJson() :
structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
ParsedJson::~ParsedJson() {
deallocate();
}
ParsedJson::ParsedJson(ParsedJson && p)
: bytecapacity(std::move(p.bytecapacity)),
depthcapacity(std::move(p.depthcapacity)),
tapecapacity(std::move(p.tapecapacity)),
stringcapacity(std::move(p.stringcapacity)),
current_loc(std::move(p.current_loc)),
n_structural_indexes(std::move(p.n_structural_indexes)),
structural_indexes(std::move(p.structural_indexes)),
tape(std::move(p.tape)),
containing_scope_offset(std::move(p.containing_scope_offset)),
ret_address(std::move(p.ret_address)),
string_buf(std::move(p.string_buf)),
current_string_buf_loc(std::move(p.current_string_buf_loc)),
isvalid(std::move(p.isvalid)) {
p.structural_indexes=NULL;
p.tape=NULL;
p.containing_scope_offset=NULL;
p.ret_address=NULL;
p.string_buf=NULL;
p.current_string_buf_loc=NULL;
: bytecapacity(p.bytecapacity),
depthcapacity(p.depthcapacity),
tapecapacity(p.tapecapacity),
stringcapacity(p.stringcapacity),
current_loc(p.current_loc),
n_structural_indexes(p.n_structural_indexes),
structural_indexes(p.structural_indexes),
tape(p.tape),
containing_scope_offset(p.containing_scope_offset),
ret_address(p.ret_address),
string_buf(p.string_buf),
current_string_buf_loc(p.current_string_buf_loc),
isvalid(p.isvalid) {
p.structural_indexes=nullptr;
p.tape=nullptr;
p.containing_scope_offset=nullptr;
p.ret_address=nullptr;
p.string_buf=nullptr;
p.current_string_buf_loc=nullptr;
}
@ -40,8 +39,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
return false;
}
if (len > 0) {
if ((len <= bytecapacity) && (depthcapacity < maxdepth))
if ((len <= bytecapacity) && (depthcapacity < maxdepth)) {
return true;
}
deallocate();
}
isvalid = false;
@ -59,14 +59,15 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
#else
ret_address = new (std::nothrow) char[maxdepth];
#endif
if ((string_buf == NULL) || (tape == NULL) ||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) {
if ((string_buf == nullptr) || (tape == nullptr) ||
(containing_scope_offset == nullptr) || (ret_address == nullptr) || (structural_indexes == nullptr)) {
std::cerr << "Could not allocate memory" << std::endl;
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
delete[] ret_address;
delete[] containing_scope_offset;
delete[] tape;
delete[] string_buf;
delete[] structural_indexes;
return false;
}
@ -86,11 +87,16 @@ void ParsedJson::deallocate() {
depthcapacity = 0;
tapecapacity = 0;
stringcapacity = 0;
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
{delete[] ret_address;
}
{delete[] containing_scope_offset;
}
{delete[] tape;
}
{delete[] string_buf;
}
{delete[] structural_indexes;
}
isvalid = false;
}
@ -102,7 +108,8 @@ void ParsedJson::init() {
WARN_UNUSED
bool ParsedJson::printjson(std::ostream &os) {
if(!isvalid) return false;
if(!isvalid) { return false;
}
size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56);
@ -120,7 +127,7 @@ bool ParsedJson::printjson(std::ostream &os) {
}
tapeidx++;
bool *inobject = new bool[depthcapacity];
size_t *inobjectidx = new size_t[depthcapacity];
auto *inobjectidx = new size_t[depthcapacity];
int depth = 1; // only root at level 0
inobjectidx[depth] = 0;
inobject[depth] = false;
@ -129,15 +136,18 @@ bool ParsedJson::printjson(std::ostream &os) {
uint64_t payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
if (!inobject[depth]) {
if ((inobjectidx[depth] > 0) && (type != ']'))
if ((inobjectidx[depth] > 0) && (type != ']')) {
os << ",";
}
inobjectidx[depth]++;
} else { // if (inobject) {
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
(type != '}'))
(type != '}')) {
os << ",";
if (((inobjectidx[depth] & 1) == 1))
}
if (((inobjectidx[depth] & 1) == 1)) {
os << ":";
}
inobjectidx[depth]++;
}
switch (type) {
@ -147,13 +157,15 @@ bool ParsedJson::printjson(std::ostream &os) {
os << '"';
break;
case 'l': // we have a long int
if (tapeidx + 1 >= howmany)
if (tapeidx + 1 >= howmany) {
return false;
os << (int64_t)tape[++tapeidx];
}
os << static_cast<int64_t>(tape[++tapeidx]);
break;
case 'd': // we have a double
if (tapeidx + 1 >= howmany)
if (tapeidx + 1 >= howmany) {
return false;
}
double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer;
@ -206,7 +218,8 @@ bool ParsedJson::printjson(std::ostream &os) {
WARN_UNUSED
bool ParsedJson::dump_raw_tape(std::ostream &os) {
if(!isvalid) return false;
if(!isvalid) { return false;
}
size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56);
@ -234,14 +247,16 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
os << '\n';
break;
case 'l': // we have a long int
if (tapeidx + 1 >= howmany)
if (tapeidx + 1 >= howmany) {
return false;
os << "integer " << (int64_t)tape[++tapeidx] << "\n";
}
os << "integer " << static_cast<int64_t>(tape[++tapeidx]) << "\n";
break;
case 'd': // we have a double
os << "float ";
if (tapeidx + 1 >= howmany)
if (tapeidx + 1 >= howmany) {
return false;
}
double answer;
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
os << answer << '\n';

View File

@ -1,10 +1,11 @@
#include "simdjson/parsedjson.h"
#include "simdjson/common_defs.h"
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(NULL) {
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
if(pj.isValid()) {
depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex == NULL) return;
if(depthindex == nullptr) { return;
}
depthindex[0].start_of_scope = location;
current_val = pj.tape[location++];
current_type = (current_val >> 56);
@ -29,9 +30,9 @@ ParsedJson::iterator::~iterator() {
ParsedJson::iterator::iterator(const iterator &o):
pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(NULL) {
current_val(o.current_val), depthindex(nullptr) {
depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex != NULL) {
if(depthindex != nullptr) {
memcpy(o.depthindex, depthindex, pj.depthcapacity * sizeof(depthindex[0]));
} else {
tape_length = 0;
@ -39,10 +40,10 @@ ParsedJson::iterator::iterator(const iterator &o):
}
ParsedJson::iterator::iterator(iterator &&o):
pj(o.pj), depth(std::move(o.depth)), location(std::move(o.location)),
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)),
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) {
o.depthindex = NULL;// we take ownership
pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(o.depthindex) {
o.depthindex = nullptr;// we take ownership
}
WARN_UNUSED
@ -106,19 +107,21 @@ uint8_t ParsedJson::iterator::get_type() const {
int64_t ParsedJson::iterator::get_integer() const {
if(location + 1 >= tape_length) return 0;// default value in case of error
return (int64_t) pj.tape[location + 1];
if(location + 1 >= tape_length) { return 0;// default value in case of error
}
return static_cast<int64_t>(pj.tape[location + 1]);
}
double ParsedJson::iterator::get_double() const {
if(location + 1 >= tape_length) return NAN;// default value in case of error
if(location + 1 >= tape_length) { return NAN;// default value in case of error
}
double answer;
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
return answer;
}
const char * ParsedJson::iterator::get_string() const {
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
return reinterpret_cast<const char *>(pj.string_buf + (current_val & JSONVALUEMASK)) ;
}
@ -156,7 +159,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
assert(is_string());
bool rightkey = (strcmp(get_string(),key)==0);
next();
if(rightkey) return true;
if(rightkey) { return true;
}
} while(next());
assert(up());// not found
}
@ -180,9 +184,10 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
current_val = nextval;
current_type = nexttype;
return true;
} else {
}
size_t increment = (current_type == 'd' || current_type == 'l') ? 2 : 1;
if(location + increment >= tape_length) return false;
if(location + increment >= tape_length) { return false;
}
uint64_t nextval = pj.tape[location + increment];
uint8_t nexttype = (nextval >> 56);
if((nexttype == ']') || (nexttype == '}')) {
@ -192,12 +197,13 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
current_val = nextval;
current_type = nexttype;
return true;
}
}
bool ParsedJson::iterator::prev() {
if(location - 1 < depthindex[depth].start_of_scope) return false;
if(location - 1 < depthindex[depth].start_of_scope) { return false;
}
location -= 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
@ -230,7 +236,8 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
bool ParsedJson::iterator::down() {
if(location + 1 >= tape_length) return false;
if(location + 1 >= tape_length) { return false;
}
if ((current_type == '[') || (current_type == '{')) {
size_t npos = (current_val & JSONVALUEMASK);
if(npos == location + 2) {
@ -254,7 +261,8 @@ void ParsedJson::iterator::to_start_scope() {
}
bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
if(!isOk()) return false;
if(!isOk()) { return false;
}
switch (current_type) {
case '"': // we have a string
os << '"';
@ -284,7 +292,7 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
case '}': // we end an object
case '[': // we start an array
case ']': // we end an array
os << (char) current_type;
os << static_cast<char>(current_type);
break;
default:
return false;

View File

@ -1,7 +1,7 @@
#include "simdjson/portability.h"
#include <cassert>
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
#include <cassert>
#ifndef SIMDJSON_SKIPUTF8VALIDATION
#define SIMDJSON_UTF8VALIDATE
@ -21,7 +21,7 @@ using namespace std;
really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi,
__m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
@ -38,7 +38,7 @@ WARN_UNUSED
uint32_t base = 0;
#ifdef SIMDJSON_UTF8VALIDATE
__m256i has_error = _mm256_setzero_si256();
struct avx_processed_utf_bytes previous;
struct avx_processed_utf_bytes previous{};
previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256();
@ -66,8 +66,8 @@ WARN_UNUSED
#ifndef _MSC_VER
__builtin_prefetch(buf + idx + 128);
#endif
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
#ifdef SIMDJSON_UTF8VALIDATE
__m256i highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
@ -130,29 +130,29 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
while (structurals != 0u) {
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base += 8;
}
base = next_base;
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
prev_iter_inside_quote = static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
// How do we build up a user traversable data structure
// first, do a 'shufti' to detect structural JSON characters
@ -190,7 +190,7 @@ WARN_UNUSED
__m256i tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
structurals = ~(structural_res_0 | (structural_res_1 << 32));
@ -201,7 +201,7 @@ WARN_UNUSED
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
// mask off anything inside quotes
@ -244,8 +244,8 @@ WARN_UNUSED
uint8_t tmpbuf[64];
memset(tmpbuf,0x20,64);
memcpy(tmpbuf,buf+idx,len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(tmpbuf + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(tmpbuf + 32));
__m256i input_lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 0));
__m256i input_hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 32));
#ifdef SIMDJSON_UTF8VALIDATE
__m256i highbit = _mm256_set1_epi8(0x80);
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
@ -308,22 +308,22 @@ WARN_UNUSED
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
while (structurals != 0u) {
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base += 8;
}
@ -364,7 +364,7 @@ WARN_UNUSED
__m256i tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
uint64_t structural_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_lo));
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
structurals = ~(structural_res_0 | (structural_res_1 << 32));
@ -375,7 +375,7 @@ WARN_UNUSED
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
@ -412,22 +412,22 @@ WARN_UNUSED
}
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals);
while (structurals != 0u) {
base_ptr[base + 0] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 1] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 2] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 3] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 4] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 5] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 6] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals);
base_ptr[base + 7] = static_cast<uint32_t>(idx) - 64 + trailingzeroes(structurals);
structurals = structurals & (structurals - 1);
base += 8;
}
@ -435,7 +435,7 @@ WARN_UNUSED
pj.n_structural_indexes = base;
// a valid JSON file cannot have zero structural indexes - we should have found something
if (!pj.n_structural_indexes) {
if (pj.n_structural_indexes == 0u) {
return false;
}
if(base_ptr[pj.n_structural_indexes-1] > len) {
@ -449,7 +449,7 @@ WARN_UNUSED
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
#ifdef SIMDJSON_UTF8VALIDATE
return _mm256_testz_si256(has_error, has_error);
return _mm256_testz_si256(has_error, has_error) != 0;
#else
return true;
#endif

View File

@ -22,7 +22,7 @@ using namespace std;
WARN_UNUSED
really_inline bool is_valid_true_atom(const uint8_t *loc) {
uint64_t tv = *(const uint64_t *)"true ";
uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
uint64_t mask4 = 0x00000000ffffffff;
uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -34,7 +34,7 @@ really_inline bool is_valid_true_atom(const uint8_t *loc) {
WARN_UNUSED
really_inline bool is_valid_false_atom(const uint8_t *loc) {
uint64_t fv = *(const uint64_t *)"false ";
uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
uint64_t mask5 = 0x000000ffffffffff;
uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -46,7 +46,7 @@ really_inline bool is_valid_false_atom(const uint8_t *loc) {
WARN_UNUSED
really_inline bool is_valid_null_atom(const uint8_t *loc) {
uint64_t nv = *(const uint64_t *)"null ";
uint64_t nv = *reinterpret_cast<const uint64_t *>("null ");
uint64_t mask4 = 0x00000000ffffffff;
uint32_t error = 0;
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
@ -141,11 +141,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the true value.
// this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
if(copy == NULL) goto fail;
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!is_valid_true_atom((const uint8_t *)copy + idx)) {
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy);
goto fail;
}
@ -157,11 +158,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the false value.
// this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
if(copy == NULL) goto fail;
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!is_valid_false_atom((const uint8_t *)copy + idx)) {
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy);
goto fail;
}
@ -173,11 +175,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated.
// this only applies to the JSON document made solely of the null value.
// this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
if(copy == NULL) goto fail;
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!is_valid_null_atom((const uint8_t *)copy + idx)) {
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
free(copy);
goto fail;
}
@ -198,11 +201,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated.
// this is done only for JSON documents made of a sole number
// this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
if(copy == NULL) goto fail;
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!parse_number((const uint8_t *)copy, pj, idx, false)) {
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, false)) {
free(copy);
goto fail;
}
@ -213,11 +217,12 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
// we need to make a copy to make sure that the string is NULL terminated.
// this is done only for JSON documents made of a sole number
// this will almost never be called in practice
char * copy = (char *) malloc(len + SIMDJSON_PADDING);
if(copy == NULL) goto fail;
char * copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if(copy == nullptr) { goto fail;
}
memcpy(copy, buf, len);
copy[len] = '\0';
if (!parse_number((const uint8_t *)copy, pj, idx, true)) {
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
free(copy);
goto fail;
}

View File

@ -106,18 +106,17 @@ int main(int argc, char *argv[]) {
void *state;
bool ultrajson_correct = ((UJDecode(buffer, p.size(), NULL, &state) == NULL) == false);
jsmntok_t * tokens = new jsmntok_t[p.size()];
auto * tokens = make_unique<jsmntok_t[](p.size());
bool jsmn_correct = false;
if(tokens == NULL) {
if(tokens == nullptr) {
printf("Failed to alloc memory for jsmn\n");
} else {
jsmn_parser parser;
jsmn_init(&parser);
memcpy(buffer, p.data(), p.size());
buffer[p.size()] = '\0';
int r = jsmn_parse(&parser, buffer, p.size(), tokens, p.size());
delete[] tokens;
tokens = NULL;
int r = jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size());
tokens = nullptr;
jsmn_correct = (r > 0);
}

View File

@ -1,4 +1,4 @@
#include <assert.h>
#include <cassert>
#include <cstring>
#ifndef _MSC_VER
#include <dirent.h>
@ -7,10 +7,10 @@
// Microsoft can't be bothered to provide standard utils.
#include <dirent_portable.h>
#endif
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include "simdjson/jsonparser.h"
@ -19,7 +19,7 @@
*/
static bool hasExtension(const char *filename, const char *extension) {
const char *ext = strrchr(filename, '.');
return (ext && !strcmp(ext, extension));
return ((ext != nullptr) && (strcmp(ext, extension) == 0));
}
bool startsWith(const char *pre, const char *str) {
@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
}
bool contains(const char *pre, const char *str) {
return (strstr(str, pre) != NULL);
return (strstr(str, pre) != nullptr);
}
@ -37,7 +37,7 @@ bool validate(const char *dirname) {
const char *extension = ".json";
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort);
int c = scandir(dirname, &entry_list, nullptr, alphasort);
if (c < 0) {
fprintf(stderr, "error accessing %s \n", dirname);
return false;
@ -47,16 +47,17 @@ bool validate(const char *dirname) {
return false;
}
bool * isfileasexpected = new bool[c];
for(int i = 0; i < c; i++) isfileasexpected[i] = true;
for(int i = 0; i < c; i++) { isfileasexpected[i] = true;
}
size_t howmany = 0;
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (hasExtension(name, extension)) {
printf("validating: file %s ", name);
fflush(NULL);
fflush(nullptr);
size_t filelen = strlen(name);
char *fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
char *fullpath = static_cast<char *>(malloc(dirlen + filelen + 1 + 1));
strcpy(fullpath, dirname);
if (needsep) {
fullpath[dirlen] = '/';
@ -106,11 +107,13 @@ bool validate(const char *dirname) {
} else {
fprintf(stderr, "There were problems! Consider reviewing the following files:\n");
for(int i = 0; i < c; i++) {
if(!isfileasexpected[i]) fprintf(stderr, "%s \n", entry_list[i]->d_name);
if(!isfileasexpected[i]) { fprintf(stderr, "%s \n", entry_list[i]->d_name);
}
}
}
for (int i = 0; i < c; ++i)
for (int i = 0; i < c; ++i) {
free(entry_list[i]);
}
free(entry_list);
delete[] isfileasexpected;
return everythingfine;

View File

@ -48,7 +48,7 @@ int main(int argc, char *argv[]) {
#ifndef _MSC_VER
int c;
while ((c = getopt(argc, argv, "da")) != -1)
while ((c = getopt(argc, argv, "da")) != -1) {
switch (c) {
case 'd':
rawdump = true;
@ -59,6 +59,7 @@ int main(int argc, char *argv[]) {
default:
abort();
}
}
#else
int optind = 1;
#endif

View File

@ -39,7 +39,7 @@ struct stat_s {
bool valid;
};
typedef struct stat_s stat_t;
using stat_t = struct stat_s;
@ -50,8 +50,8 @@ stat_t simdjson_computestats(const std::string_view &p) {
if (!answer.valid) {
return answer;
}
answer.backslash_count = count_backslash((const uint8_t*)p.data(), p.size());
answer.nonasciibyte_count = count_nonasciibytes((const uint8_t*)p.data(), p.size());
answer.backslash_count = count_backslash(reinterpret_cast<const uint8_t*>(p.data()), p.size());
answer.nonasciibyte_count = count_nonasciibytes(reinterpret_cast<const uint8_t*>(p.data()), p.size());
answer.byte_count = p.size();
answer.integer_count = 0;
answer.float_count = 0;

View File

@ -16,7 +16,7 @@ int main(int argc, char *argv[]) {
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
jsonminify(p, (char *)p.data());
jsonminify(p, const_cast<char *>(p.data()));
printf("%s",p.data());
aligned_free((void*)p.data());
}