Lots of changes.
This commit is contained in:
parent
4a8e229566
commit
992116b01f
|
@ -1,7 +1,6 @@
|
|||
cmake_minimum_required(VERSION 2.8)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(CMAKE_MACOSX_RPATH OFF)
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "No build type selected, default to Release")
|
||||
|
@ -11,8 +10,11 @@ endif()
|
|||
project(simdjson)
|
||||
set(SIMDJSON_LIB_NAME simdjson)
|
||||
|
||||
|
||||
if(NOT MSVC)
|
||||
option(SIMDJSON_BUILD_STATIC "Build a static library" OFF) # turning it on disables the production of a dynamic library
|
||||
else()
|
||||
option(SIMDJSON_BUILD_STATIC "Build a static library" ON) # turning it on disables the production of a dynamic library
|
||||
endif()
|
||||
option(SIMDJSON_BUILD_LTO "Build library with Link Time Optimization" OFF)
|
||||
option(SIMDJSON_SANITIZE "Sanitize addresses" OFF)
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ of memory allocation with each new JSON document:
|
|||
const char * filename = ... //
|
||||
std::string_view p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
// you no longer need p at this point, can do free((void*)p.data())
|
||||
// you no longer need p at this point, can do aligned_free((void*)p.data())
|
||||
if( ! pj.isValid() ) {
|
||||
// something went wrong
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ double diff(timespec start, timespec end) {
|
|||
clock_gettime(CLOCK_REALTIME, &time1); \
|
||||
RDTSC_START(cycles_start); \
|
||||
if (test != expected) { \
|
||||
printf("not expected (%d , %d )", (int)test, (int)expected); \
|
||||
fprintf(stderr, "not expected (%d , %d )", (int)test, (int)expected); \
|
||||
break; \
|
||||
} \
|
||||
RDTSC_STOP(cycles_final); \
|
||||
|
|
|
@ -279,5 +279,5 @@ int main(int argc, char *argv[]) {
|
|||
!justdata);
|
||||
BEST_TIME("sasjon ", sasjon_computestats(p).size(), size, , repeat, volume,
|
||||
!justdata);
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
}
|
||||
|
|
|
@ -137,7 +137,7 @@ int main(int argc, char *argv[]) {
|
|||
ParsedJson pj;
|
||||
bool isallocok = pj.allocateCapacity(p.size(), 1024);
|
||||
if(!isallocok) {
|
||||
printf("failed to allocate memory\n");
|
||||
fprintf(stderr, "failed to allocate memory\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
BEST_TIME("simdjson orig", json_parse((const uint8_t*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
|
||||
|
@ -145,12 +145,12 @@ int main(int argc, char *argv[]) {
|
|||
ParsedJson pj2;
|
||||
bool isallocok2 = pj2.allocateCapacity(p.size(), 1024);
|
||||
if(!isallocok2) {
|
||||
printf("failed to allocate memory\n");
|
||||
fprintf(stderr, "failed to allocate memory\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
BEST_TIME("simdjson despaced", json_parse((const uint8_t*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata);
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
free(buffer);
|
||||
free(ast_buffer);
|
||||
free(minibuffer);
|
||||
|
|
|
@ -285,9 +285,9 @@ int main(int argc, char *argv[]) {
|
|||
if (dump) {
|
||||
isok = isok && pj.dump_raw_tape(std::cout);
|
||||
}
|
||||
free((void *)p.data());
|
||||
aligned_free((void *)p.data());
|
||||
if (!isok) {
|
||||
printf(" Parsing failed. \n ");
|
||||
fprintf(stderr, " Parsing failed. \n ");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
|
|
|
@ -300,5 +300,5 @@ int main(int argc, char *argv[]) {
|
|||
!justdata);
|
||||
BEST_TIME("sasjon ", sasjon_computestats(p).valid, true, , repeat, volume,
|
||||
!justdata);
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
}
|
||||
|
|
|
@ -158,7 +158,7 @@ int main(int argc, char *argv[]) {
|
|||
if(!justdata) BEST_TIME("memcpy ",
|
||||
(memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat,
|
||||
volume, !justdata);
|
||||
free((void *)p.data());
|
||||
aligned_free((void *)p.data());
|
||||
free(ast_buffer);
|
||||
free(buffer);
|
||||
}
|
||||
|
|
|
@ -113,9 +113,9 @@ stat_t simdjson_computestats(const std::string_view &p) {
|
|||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int c;
|
||||
#ifndef _MSC_VER
|
||||
while ((c = getopt(argc, argv, "")) != -1)
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "")) != -1)
|
||||
switch (c) {
|
||||
|
||||
default:
|
||||
|
|
|
@ -1,21 +1,22 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_COMMON_DEFS_H
|
||||
#define SIMDJSON_COMMON_DEFS_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
// the input buf should be readable up to buf + SIMDJSON_PADDING
|
||||
#define SIMDJSON_PADDING sizeof(__m256i)
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#ifndef _MSC_VER
|
||||
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
|
||||
// also in Intel's compiler), but won't work in MSVC.
|
||||
#define SIMDJSON_USE_COMPUTED_GOTO
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
// Align to N-byte boundary
|
||||
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
|
||||
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
|
||||
|
@ -53,4 +54,6 @@
|
|||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // MSC_VER
|
||||
|
||||
#endif // COMMON_DEFS_H
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_JSONCHARUTILS_H
|
||||
#define SIMDJSON_JSONCHARUTILS_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
@ -96,7 +97,7 @@ uint32_t hex_to_u32_nocheck(const uint8_t *src) {
|
|||
// and clz and table lookups, but JSON documents
|
||||
// have few escaped code points, and the following
|
||||
// function looks cheap.
|
||||
//
|
||||
//
|
||||
// Note: we assume that surrogates are treated separately
|
||||
//
|
||||
inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
|
||||
|
@ -126,3 +127,4 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
|
|||
return 0; // bad r
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_JSONFORMATUTILS_H
|
||||
#define SIMDJSON_JSONFORMATUTILS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
|
@ -79,7 +80,7 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
|
|||
default:
|
||||
if (*src <= 0x1F) {
|
||||
std::ios::fmtflags f(os.flags());
|
||||
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src;
|
||||
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src;
|
||||
os.flags(f);
|
||||
} else
|
||||
os << *src;
|
||||
|
@ -89,5 +90,7 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
|
|||
}
|
||||
|
||||
static inline void print_with_escapes(const char *src, std::ostream &os) {
|
||||
print_with_escapes((const unsigned char *)src, os);
|
||||
}
|
||||
print_with_escapes((const unsigned char *)src, os);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
#ifndef JSONIOUTIL_H
|
||||
#define JSONIOUTIL_H
|
||||
#ifndef SIMDJSON_JSONIOUTIL_H
|
||||
#define SIMDJSON_JSONIOUTIL_H
|
||||
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include "simdjson/common_defs.h"
|
||||
|
||||
|
||||
|
@ -32,7 +31,7 @@ char * allocate_padded_buffer(size_t length);
|
|||
// try {
|
||||
// p = get_corpus(filename);
|
||||
// } catch (const std::exception& e) {
|
||||
// free((void*)p.data());
|
||||
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio
|
||||
// std::cout << "Could not load the file " << filename << std::endl;
|
||||
// }
|
||||
std::string_view get_corpus(std::string filename);
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_JSONMINIFIER_H
|
||||
#define SIMDJSON_JSONMINIFIER_H
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
// Take input from buf and remove useless whitespace, write it to out; buf and
|
||||
// out can be the same pointer. Result is null terminated,
|
||||
// out can be the same pointer. Result is null terminated,
|
||||
// return the string length (minus the null termination).
|
||||
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
|
||||
|
||||
|
@ -16,4 +17,6 @@ static inline size_t jsonminify(const char *buf, size_t len, char *out) {
|
|||
|
||||
static inline size_t jsonminify(const std::string_view & p, char *out) {
|
||||
return jsonminify(p.data(), p.size(), out);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_JSONPARSER_H
|
||||
#define SIMDJSON_JSONPARSER_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
|
@ -11,7 +12,7 @@
|
|||
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
|
@ -22,7 +23,7 @@ WARN_UNUSED
|
|||
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
|
@ -30,12 +31,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
static inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse((const uint8_t *) buf, len, pj, reallocifneeded);
|
||||
}
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
|
@ -43,12 +44,12 @@ static inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool
|
|||
// the input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
static inline bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
inline bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(s.data(), s.size(), pj, reallocifneeded);
|
||||
}
|
||||
|
||||
|
||||
// Build a ParsedJson object. You can check validity
|
||||
// Build a ParsedJson object. You can check validity
|
||||
// by calling pj.isValid(). This does the memory allocation needed for ParsedJson.
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
|
@ -59,24 +60,26 @@ WARN_UNUSED
|
|||
ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneeded = true);
|
||||
|
||||
WARN_UNUSED
|
||||
// Build a ParsedJson object. You can check validity
|
||||
// Build a ParsedJson object. You can check validity
|
||||
// by calling pj.isValid(). This does the memory allocation needed for ParsedJson.
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
static inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
|
||||
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
|
||||
return build_parsed_json((const uint8_t *) buf, len, reallocifneeded);
|
||||
}
|
||||
|
||||
// convenience function
|
||||
WARN_UNUSED
|
||||
// Build a ParsedJson object. You can check validity
|
||||
// Build a ParsedJson object. You can check validity
|
||||
// by calling pj.isValid(). This does the memory allocation needed for ParsedJson.
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
static inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
|
||||
inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
|
||||
return build_parsed_json(s.data(), s.size(), reallocifneeded);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,31 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
|
||||
}
|
||||
# pragma intrinsic(_umul128)
|
||||
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
uint64_t high;
|
||||
*result = _umul128(value1, value2, &high);
|
||||
return high;
|
||||
}
|
||||
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_uaddl_overflow(value1, value2, result);
|
||||
}
|
||||
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_umulll_overflow(value1, value2, result);
|
||||
}
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
#ifndef SIMDJSON_NUMBERPARSING_H
|
||||
#define SIMDJSON_NUMBERPARSING_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsoncharutils.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
@ -187,7 +163,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
|||
// Note: a redesign could avoid this function entirely.
|
||||
//
|
||||
static never_inline bool
|
||||
parse_float(const uint8_t *const buf,
|
||||
parse_float(const uint8_t *const buf,
|
||||
ParsedJson &pj, const uint32_t offset,
|
||||
bool found_minus) {
|
||||
const char *p = (const char *)(buf + offset);
|
||||
|
@ -352,21 +328,14 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
|
|||
return is_structural_or_whitespace(*p);
|
||||
}
|
||||
|
||||
#ifndef likely
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// parse the number at buf + offset
|
||||
// define JSON_TEST_NUMBERS for unit testing
|
||||
static really_inline bool parse_number(const uint8_t *const buf,
|
||||
ParsedJson &pj,
|
||||
const uint32_t offset,
|
||||
static really_inline bool parse_number(const uint8_t *const buf,
|
||||
ParsedJson &pj,
|
||||
const uint32_t offset,
|
||||
bool found_minus) {
|
||||
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes useful to skip parsing
|
||||
pj.write_tape_s64(0); // always write zero
|
||||
|
@ -487,7 +456,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
if (unlikely(digitcount >= 19)) { // this is uncommon!!!
|
||||
// this is almost never going to get called!!!
|
||||
// we start anew, going slowly!!!
|
||||
return parse_float(buf, pj, offset,
|
||||
return parse_float(buf, pj, offset,
|
||||
found_minus);
|
||||
}
|
||||
///////////
|
||||
|
@ -516,7 +485,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
}
|
||||
} else {
|
||||
if (unlikely(digitcount >= 18)) { // this is uncommon!!!
|
||||
return parse_large_integer(buf, pj, offset,
|
||||
return parse_large_integer(buf, pj, offset,
|
||||
found_minus);
|
||||
}
|
||||
pj.write_tape_s64(i);
|
||||
|
@ -527,3 +496,5 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
return is_structural_or_whitespace(*p);
|
||||
#endif // SIMDJSON_SKIPNUMBERPARSING
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,37 +1,19 @@
|
|||
#pragma once
|
||||
#include <math.h>
|
||||
#ifndef SIMDJSON_PARSEDJSON_H
|
||||
#define SIMDJSON_PARSEDJSON_H
|
||||
|
||||
#include <math.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/jsonformatutils.h"
|
||||
|
||||
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF
|
||||
|
||||
#define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
|
||||
|
||||
// portable version of posix_memalign
|
||||
static inline void *aligned_malloc(size_t alignment, size_t size) {
|
||||
void *p;
|
||||
#ifdef _MSC_VER
|
||||
p = _aligned_malloc(size, alignment);
|
||||
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
p = __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
// somehow, if this is used before including "x86intrin.h", it creates an
|
||||
// implicit defined warning.
|
||||
if (posix_memalign(&p, alignment, size) != 0) return NULL;
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
/************
|
||||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
|
@ -76,17 +58,20 @@ public:
|
|||
string_buf = new uint8_t[localstringcapacity];
|
||||
tape = new uint64_t[localtapecapacity];
|
||||
containing_scope_offset = new uint32_t[maxdepth];
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
ret_address = new void *[maxdepth];
|
||||
|
||||
#else
|
||||
ret_address = new char[maxdepth];
|
||||
#endif
|
||||
if ((string_buf == NULL) || (tape == NULL) ||
|
||||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) {
|
||||
std::cerr << "Could not allocate memory" << std::endl;
|
||||
delete[] ret_address;
|
||||
delete[] containing_scope_offset;
|
||||
delete[] tape;
|
||||
delete[] string_buf;
|
||||
delete[] structural_indexes;
|
||||
free(structurals);
|
||||
if(ret_address != NULL) delete[] ret_address;
|
||||
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
|
||||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
aligned_free(structurals);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -108,12 +93,12 @@ public:
|
|||
depthcapacity = 0;
|
||||
tapecapacity = 0;
|
||||
stringcapacity = 0;
|
||||
delete[] ret_address;
|
||||
delete[] containing_scope_offset;
|
||||
delete[] tape;
|
||||
delete[] string_buf;
|
||||
delete[] structural_indexes;
|
||||
free(structurals);
|
||||
if(ret_address != NULL) delete[] ret_address;
|
||||
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
|
||||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
aligned_free(structurals);
|
||||
isvalid = false;
|
||||
}
|
||||
|
||||
|
@ -139,11 +124,11 @@ public:
|
|||
if (type == 'r') {
|
||||
howmany = tape_val & JSONVALUEMASK;
|
||||
} else {
|
||||
printf("Error: no starting root node?");
|
||||
fprintf(stderr, "Error: no starting root node?");
|
||||
return false;
|
||||
}
|
||||
if (howmany > tapecapacity) {
|
||||
printf(
|
||||
fprintf(stderr,
|
||||
"We may be exceeding the tape capacity. Is this a valid document?\n");
|
||||
return false;
|
||||
}
|
||||
|
@ -207,22 +192,22 @@ public:
|
|||
os << '}';
|
||||
break;
|
||||
case '[': // we start an array
|
||||
os << '[';
|
||||
os << '[';
|
||||
depth++;
|
||||
inobject[depth] = false;
|
||||
inobjectidx[depth] = 0;
|
||||
break;
|
||||
case ']': // we end an array
|
||||
depth--;
|
||||
os << ']';
|
||||
os << ']';
|
||||
break;
|
||||
case 'r': // we start and end with the root node
|
||||
printf("should we be hitting the root node?\n");
|
||||
fprintf(stderr, "should we be hitting the root node?\n");
|
||||
delete[] inobject;
|
||||
delete[] inobjectidx;
|
||||
return false;
|
||||
default:
|
||||
printf("bug %c\n", type);
|
||||
fprintf(stderr, "bug %c\n", type);
|
||||
delete[] inobject;
|
||||
delete[] inobjectidx;
|
||||
return false;
|
||||
|
@ -239,25 +224,25 @@ public:
|
|||
size_t tapeidx = 0;
|
||||
uint64_t tape_val = tape[tapeidx];
|
||||
uint8_t type = (tape_val >> 56);
|
||||
os << tapeidx << " : " << type;
|
||||
os << tapeidx << " : " << type;
|
||||
tapeidx++;
|
||||
size_t howmany = 0;
|
||||
if (type == 'r') {
|
||||
howmany = tape_val & JSONVALUEMASK;
|
||||
} else {
|
||||
printf("Error: no starting root node?");
|
||||
fprintf(stderr, "Error: no starting root node?");
|
||||
return false;
|
||||
}
|
||||
os << "\t// pointing to " << howmany <<" (right after last node)\n";
|
||||
uint64_t payload;
|
||||
for (; tapeidx < howmany; tapeidx++) {
|
||||
os << tapeidx << " : ";
|
||||
os << tapeidx << " : ";
|
||||
tape_val = tape[tapeidx];
|
||||
payload = tape_val & JSONVALUEMASK;
|
||||
type = (tape_val >> 56);
|
||||
switch (type) {
|
||||
case '"': // we have a string
|
||||
os << "string \"";
|
||||
os << "string \"";
|
||||
print_with_escapes((const unsigned char *)(string_buf + payload));
|
||||
os << '"';
|
||||
os << '\n';
|
||||
|
@ -377,9 +362,9 @@ public:
|
|||
delete[] depthindex;
|
||||
}
|
||||
|
||||
iterator(const iterator &o):
|
||||
pj(o.pj), depth(o.depth), location(o.location),
|
||||
tape_length(o.tape_length), current_type(o.current_type),
|
||||
iterator(const iterator &o):
|
||||
pj(o.pj), depth(o.depth), location(o.location),
|
||||
tape_length(o.tape_length), current_type(o.current_type),
|
||||
current_val(o.current_val), depthindex(NULL) {
|
||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||
if(depthindex != NULL) {
|
||||
|
@ -389,10 +374,10 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
iterator(iterator &&o):
|
||||
pj(o.pj), depth(o.depth), location(o.location),
|
||||
tape_length(o.tape_length), current_type(o.current_type),
|
||||
current_val(o.current_val), depthindex(o.depthindex) {
|
||||
iterator(iterator &&o):
|
||||
pj(std::move(o.pj)), depth(std::move(o.depth)), location(std::move(o.location)),
|
||||
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)),
|
||||
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) {
|
||||
o.depthindex = NULL;// we take ownship
|
||||
}
|
||||
|
||||
|
@ -400,7 +385,7 @@ public:
|
|||
bool isOk() const {
|
||||
return location < tape_length;
|
||||
}
|
||||
|
||||
|
||||
// useful for debuging purposes
|
||||
size_t get_tape_location() const {
|
||||
return location;
|
||||
|
@ -432,7 +417,7 @@ public:
|
|||
depth++;
|
||||
depthindex[depth].start_of_scope = location;
|
||||
depthindex[depth].scope_type = current_type;
|
||||
}
|
||||
}
|
||||
location = location + 1;
|
||||
current_val = pj.tape[location];
|
||||
current_type = (current_val >> 56);
|
||||
|
@ -471,7 +456,7 @@ public:
|
|||
double answer;
|
||||
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
|
||||
return answer;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_object_or_array() const {
|
||||
return is_object_or_array(get_type());
|
||||
|
@ -488,15 +473,15 @@ public:
|
|||
bool is_string() const {
|
||||
return get_type() == '"';
|
||||
}
|
||||
|
||||
|
||||
bool is_integer() const {
|
||||
return get_type() == 'l';
|
||||
}
|
||||
|
||||
|
||||
bool is_double() const {
|
||||
return get_type() == 'd';
|
||||
}
|
||||
|
||||
|
||||
static bool is_object_or_array(uint8_t type) {
|
||||
return (type == '[' || (type == '{'));
|
||||
}
|
||||
|
@ -524,7 +509,7 @@ public:
|
|||
really_inline const char * get_string() const {
|
||||
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
|
||||
}
|
||||
|
||||
|
||||
// throughout return true if we can do the navigation, false
|
||||
// otherwise
|
||||
|
||||
|
@ -533,10 +518,10 @@ public:
|
|||
// Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { and [.
|
||||
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
|
||||
// valid if we're not at the end of a scope (returns true).
|
||||
really_inline bool next() {
|
||||
really_inline bool next() {
|
||||
if ((current_type == '[') || (current_type == '{')){
|
||||
// we need to jump
|
||||
size_t npos = ( current_val & JSONVALUEMASK);
|
||||
size_t npos = ( current_val & JSONVALUEMASK);
|
||||
if(npos >= tape_length) {
|
||||
return false; // shoud never happen unless at the root
|
||||
}
|
||||
|
@ -563,14 +548,14 @@ public:
|
|||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Withing a given scope (series of nodes at the same depth within either an
|
||||
// array or an object), we move backward.
|
||||
// Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true when starting at the end
|
||||
// of the scope.
|
||||
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
|
||||
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
|
||||
really_inline bool prev() {
|
||||
if(location - 1 < depthindex[depth].start_of_scope) return false;
|
||||
location -= 1;
|
||||
|
@ -578,9 +563,9 @@ public:
|
|||
current_type = (current_val >> 56);
|
||||
if ((current_type == ']') || (current_type == '}')){
|
||||
// we need to jump
|
||||
size_t new_location = ( current_val & JSONVALUEMASK);
|
||||
size_t new_location = ( current_val & JSONVALUEMASK);
|
||||
if(new_location < depthindex[depth].start_of_scope) {
|
||||
return false; // shoud never happen
|
||||
return false; // shoud never happen
|
||||
}
|
||||
location = new_location;
|
||||
current_val = pj.tape[location];
|
||||
|
@ -589,7 +574,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
// Moves back to either the containing array or object (type { or [) from
|
||||
// Moves back to either the containing array or object (type { or [) from
|
||||
// within a contained scope.
|
||||
// Valid unless we are at the first level of the document
|
||||
//
|
||||
|
@ -605,8 +590,8 @@ public:
|
|||
current_type = (current_val >> 56);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Valid if we're at a [ or { and it starts a non-empty scope; moves us to start of
|
||||
// that deeper scope if it not empty.
|
||||
// Thus, given [true, null, {"a":1}, [1,2]], if we are at the { node, we would move to the
|
||||
|
@ -614,7 +599,7 @@ public:
|
|||
really_inline bool down() {
|
||||
if(location + 1 >= tape_length) return false;
|
||||
if ((current_type == '[') || (current_type == '{')) {
|
||||
size_t npos = (current_val & JSONVALUEMASK);
|
||||
size_t npos = (current_val & JSONVALUEMASK);
|
||||
if(npos == location + 2) {
|
||||
return false; // we have an empty scope
|
||||
}
|
||||
|
@ -625,13 +610,13 @@ public:
|
|||
current_val = pj.tape[location];
|
||||
current_type = (current_val >> 56);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// move us to the start of our current scope,
|
||||
// a scope is a series of nodes at the same level
|
||||
void to_start_scope() {
|
||||
void to_start_scope() {
|
||||
location = depthindex[depth].start_of_scope;
|
||||
current_val = pj.tape[location];
|
||||
current_type = (current_val >> 56);
|
||||
|
@ -656,7 +641,7 @@ public:
|
|||
case 'l': // we have a long int
|
||||
os << get_integer();
|
||||
break;
|
||||
case 'd':
|
||||
case 'd':
|
||||
os << get_double();
|
||||
break;
|
||||
case 'n': // we have a null
|
||||
|
@ -689,7 +674,7 @@ private:
|
|||
ParsedJson &pj;
|
||||
size_t depth;
|
||||
size_t location; // our current location on a tape
|
||||
size_t tape_length;
|
||||
size_t tape_length;
|
||||
uint8_t current_type;
|
||||
uint64_t current_val;
|
||||
scopeindex_t *depthindex;
|
||||
|
@ -711,12 +696,40 @@ private:
|
|||
|
||||
uint64_t *tape;
|
||||
uint32_t *containing_scope_offset;
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
void **ret_address;
|
||||
#else
|
||||
char *ret_address;
|
||||
#endif
|
||||
|
||||
uint8_t *string_buf; // should be at least bytecapacity
|
||||
uint8_t *current_string_buf_loc;
|
||||
bool isvalid;
|
||||
ParsedJson(const ParsedJson && p);
|
||||
|
||||
ParsedJson(ParsedJson && p)
|
||||
: bytecapacity(std::move(p.bytecapacity)),
|
||||
depthcapacity(std::move(p.depthcapacity)),
|
||||
tapecapacity(std::move(p.tapecapacity)),
|
||||
stringcapacity(std::move(p.stringcapacity)),
|
||||
current_loc(std::move(p.current_loc)),
|
||||
structurals(std::move(p.structurals)),
|
||||
n_structural_indexes(std::move(p.n_structural_indexes)),
|
||||
structural_indexes(std::move(p.structural_indexes)),
|
||||
tape(std::move(p.tape)),
|
||||
containing_scope_offset(std::move(p.containing_scope_offset)),
|
||||
ret_address(std::move(p.ret_address)),
|
||||
string_buf(std::move(p.string_buf)),
|
||||
current_string_buf_loc(std::move(p.current_string_buf_loc)),
|
||||
isvalid(std::move(p.isvalid)) {
|
||||
p.structurals=NULL;
|
||||
p.structural_indexes=NULL;
|
||||
p.tape=NULL;
|
||||
p.containing_scope_offset=NULL;
|
||||
p.ret_address=NULL;
|
||||
p.string_buf=NULL;
|
||||
p.current_string_buf_loc=NULL;
|
||||
p.isvalid=NULL;
|
||||
}
|
||||
|
||||
private :
|
||||
|
||||
|
@ -743,3 +756,6 @@ inline void dumpbits32_always(uint32_t v, const std::string &msg) {
|
|||
}
|
||||
std::cout << " " << msg.c_str() << "\n";
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
#ifndef SIMDJSON_PORTABILITY_H
|
||||
#define SIMDJSON_PORTABILITY_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#include <iso646.h>
|
||||
#include <cstdint>
|
||||
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
|
||||
}
|
||||
|
||||
# pragma intrinsic(_umul128)
|
||||
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
uint64_t high;
|
||||
*result = _umul128(value1, value2, &high);
|
||||
return high;
|
||||
}
|
||||
|
||||
static inline int trailingzeroes(uint64_t input_num) {
|
||||
return _tzcnt_u64(input_num);
|
||||
}
|
||||
|
||||
static inline int leadingzeroes(uint64_t input_num) {
|
||||
return _lzcnt_u64(input_num);
|
||||
}
|
||||
|
||||
static inline int hamming(uint64_t input_num) {
|
||||
#ifdef _WIN64 // highly recommended!!!
|
||||
return (int)__popcnt64(input_num);
|
||||
#else // if we must support 32-bit Windows
|
||||
return (int)(__popcnt((uint32_t)input_num) +
|
||||
__popcnt((uint32_t)(input_num >> 32)));
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#include <cstdint>
|
||||
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_uaddl_overflow(value1, value2, result);
|
||||
}
|
||||
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_umulll_overflow(value1, value2, result);
|
||||
}
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
static inline int trailingzeroes(uint64_t input_num) {
|
||||
return __tzcnt_u64(input_num);
|
||||
}
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
static inline int leadingzeroes(uint64_t input_num) {
|
||||
return __lzcnt_u64(input_num);
|
||||
}
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
static inline int hamming(uint64_t input_num) {
|
||||
return _popcnt64(input_num);
|
||||
}
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
|
||||
|
||||
// portable version of posix_memalign
|
||||
static inline void *aligned_malloc(size_t alignment, size_t size) {
|
||||
void *p;
|
||||
#ifdef _MSC_VER
|
||||
p = _aligned_malloc(size, alignment);
|
||||
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
p = __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
// somehow, if this is used before including "x86intrin.h", it creates an
|
||||
// implicit defined warning.
|
||||
if (posix_memalign(&p, alignment, size) != 0) return NULL;
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
#ifndef __clang__
|
||||
#ifndef _MSC_VER
|
||||
static __m256i inline _mm256_loadu2_m128i(__m128i const *__addr_hi,
|
||||
__m128i const *__addr_lo) {
|
||||
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
|
||||
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
|
||||
}
|
||||
|
||||
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
||||
__m256i __a) {
|
||||
__m128i __v128;
|
||||
|
||||
__v128 = _mm256_castsi256_si128(__a);
|
||||
_mm_storeu_si128(__addr_lo, __v128);
|
||||
__v128 = _mm256_extractf128_si256(__a, 1);
|
||||
_mm_storeu_si128(__addr_hi, __v128);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
static inline void aligned_free(void *memblock) {
|
||||
if(memblock == NULL) return;
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(memblock);
|
||||
#elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
__mingw_aligned_free(memblock);
|
||||
#else
|
||||
free(memblock);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* end of include PORTABILITY_H */
|
|
@ -1,18 +1,8 @@
|
|||
/**
|
||||
* (c) Daniel Lemire
|
||||
* License: Apache License 2.0
|
||||
*/
|
||||
#ifndef SIMDJSON_SIMDPRUNE_TABLES_H
|
||||
#define SIMDJSON_SIMDPRUNE_TABLES_H
|
||||
|
||||
#ifndef SIMDPRUNE_TABLES_H
|
||||
#define SIMDPRUNE_TABLES_H
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#include <iso646.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#include <cstdint>
|
||||
#ifdef __AVX__
|
||||
|
||||
static const unsigned char mask128_epi8[] = {
|
||||
|
|
|
@ -1,16 +1,12 @@
|
|||
|
||||
#ifndef SIMDUTF8CHECK_H
|
||||
#define SIMDUTF8CHECK_H
|
||||
#ifndef SIMDJSON_SIMDUTF8CHECK_H
|
||||
#define SIMDJSON_SIMDUTF8CHECK_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#include <string.h>
|
||||
#include "simdjson/portability.h"
|
||||
/*
|
||||
* legal utf-8 byte sequence
|
||||
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "parsedjson.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
WARN_UNUSED
|
||||
bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
@ -10,3 +11,5 @@ WARN_UNUSED
|
|||
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return find_structural_bits((const uint8_t *)buf, len, pj);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_STAGE2_FLATTEN_H
|
||||
#define SIMDJSON_STAGE2_FLATTEN_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
WARN_UNUSED
|
||||
bool flatten_indexes(size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_STAGE34_UNIFIED_H
|
||||
#define SIMDJSON_STAGE34_UNIFIED_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
@ -13,3 +14,4 @@ static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj)
|
|||
return unified_machine((const uint8_t *)buf,len,pj);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#ifndef SIMDJSON_STRINGPARSING_H
|
||||
#define SIMDJSON_STRINGPARSING_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
@ -53,7 +54,7 @@ really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, uint8_t **d
|
|||
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
|
||||
*src_ptr += 6;
|
||||
}
|
||||
size_t offset = codepoint_to_utf8(code_point, *dst_ptr);
|
||||
size_t offset = codepoint_to_utf8(code_point, *dst_ptr);
|
||||
*dst_ptr += offset;
|
||||
return offset > 0;
|
||||
}
|
||||
|
@ -87,8 +88,8 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
__m256i unescaped_vec = _mm256_cmpeq_epi8(_mm256_max_epu8(unitsep,v),unitsep);// could do it with saturated subtraction
|
||||
#endif // CHECKUNESCAPED
|
||||
|
||||
uint32_t quote_dist = __tzcnt_u64(quote_bits);
|
||||
uint32_t bs_dist = __tzcnt_u64(bs_bits);
|
||||
uint32_t quote_dist = trailingzeroes(quote_bits);
|
||||
uint32_t bs_dist = trailingzeroes(bs_bits);
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||
// later
|
||||
_mm256_storeu_si256((__m256i *)(dst), v);
|
||||
|
@ -104,7 +105,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
|
||||
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
|
||||
#ifdef JSON_TEST_STRINGS // for unit testing
|
||||
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
|
||||
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
|
||||
else foundBadString(buf + offset);
|
||||
#endif // JSON_TEST_STRINGS
|
||||
return is_ok;
|
||||
|
@ -176,3 +177,4 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -32,7 +32,7 @@ std::string_view get_corpus(std::string filename) {
|
|||
size_t readb = std::fread(buf, 1, len, fp);
|
||||
std::fclose(fp);
|
||||
if(readb != len) {
|
||||
free(buf);
|
||||
aligned_free(buf);
|
||||
throw std::runtime_error("could not read the data");
|
||||
}
|
||||
return std::string_view(buf,len);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <cstdint>
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#ifndef __AVX2__
|
||||
|
||||
|
||||
|
@ -59,41 +59,8 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
|
|||
|
||||
#else
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
|
||||
}
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_uaddl_overflow(value1, value2, (unsigned long long *)result);
|
||||
}
|
||||
#endif // _MSC_VER
|
||||
|
||||
#include "simdjson/simdprune_tables.h"
|
||||
#include <cstring>
|
||||
#ifndef __clang__
|
||||
#ifndef _MSC_VER
|
||||
static __m256i inline _mm256_loadu2_m128i(__m128i const *__addr_hi,
|
||||
__m128i const *__addr_lo) {
|
||||
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
|
||||
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
|
||||
}
|
||||
|
||||
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
||||
__m256i __a) {
|
||||
__m128i __v128;
|
||||
|
||||
__v128 = _mm256_castsi256_si128(__a);
|
||||
_mm_storeu_si128(__addr_lo, __v128);
|
||||
__v128 = _mm256_extractf128_si256(__a, 1);
|
||||
_mm_storeu_si128(__addr_hi, __v128);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// a straightforward comparison of a mask against input.
|
||||
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
||||
|
|
|
@ -5,6 +5,14 @@
|
|||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
|
||||
extern bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded);
|
||||
extern bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded);
|
||||
extern ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded);
|
||||
extern ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded);
|
||||
|
||||
|
||||
// parse a document found in buf, need to preallocate ParsedJson.
|
||||
WARN_UNUSED
|
||||
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
|
||||
|
|
|
@ -1,19 +1,5 @@
|
|||
#include <cstdint>
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
|
||||
}
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_uaddl_overflow(value1, value2, result);
|
||||
}
|
||||
#endif // _MSC_VER
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include <cassert>
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
|
@ -23,7 +9,7 @@ static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *re
|
|||
|
||||
// It seems that many parsers do UTF-8 validation.
|
||||
// RapidJSON does not do it by default, but a flag
|
||||
// allows it.
|
||||
// allows it.
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
#include "simdjson/simdutf8check.h"
|
||||
#endif
|
||||
|
@ -50,9 +36,9 @@ WARN_UNUSED
|
|||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
__m256i has_error = _mm256_setzero_si256();
|
||||
struct avx_processed_utf_bytes previous;
|
||||
previous.rawbytes = _mm256_setzero_si256();
|
||||
previous.high_nibbles = _mm256_setzero_si256();
|
||||
previous.carried_continuations = _mm256_setzero_si256();
|
||||
previous.rawbytes = _mm256_setzero_si256();
|
||||
previous.high_nibbles = _mm256_setzero_si256();
|
||||
previous.carried_continuations = _mm256_setzero_si256();
|
||||
#endif
|
||||
|
||||
// Useful constant masks
|
||||
|
@ -70,7 +56,7 @@ WARN_UNUSED
|
|||
// effectively the very first char is considered to follow "whitespace" for the
|
||||
// purposes of psuedo-structural character detection
|
||||
uint64_t prev_iter_ends_pseudo_pred = 1ULL;
|
||||
size_t lenminus64 = len < 64 ? 0 : len - 64;
|
||||
size_t lenminus64 = len < 64 ? 0 : len - 64;
|
||||
size_t idx = 0;
|
||||
for (; idx < lenminus64; idx += 64) {
|
||||
#ifndef _MSC_VER
|
||||
|
@ -87,7 +73,7 @@ WARN_UNUSED
|
|||
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 1)),has_error);
|
||||
|
||||
|
||||
} else {
|
||||
// it is not ascii so we have to do heavy work
|
||||
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
|
||||
|
@ -223,7 +209,7 @@ WARN_UNUSED
|
|||
/// but otherwise the string needs to be properly padded or else we
|
||||
/// risk invalidating the UTF-8 checks.
|
||||
////////////
|
||||
if (idx < len) {
|
||||
if (idx < len) {
|
||||
uint8_t tmpbuf[64];
|
||||
memset(tmpbuf,0x20,64);
|
||||
memcpy(tmpbuf,buf+idx,len - idx);
|
||||
|
@ -238,7 +224,7 @@ WARN_UNUSED
|
|||
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 1)),has_error);
|
||||
|
||||
|
||||
} else {
|
||||
// it is not ascii so we have to do heavy work
|
||||
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
|
||||
|
@ -264,7 +250,6 @@ WARN_UNUSED
|
|||
// indicates whether the sense of any edge going to the next iteration
|
||||
// should be flipped
|
||||
//bool iter_ends_odd_backslash =
|
||||
// __builtin_uaddll_overflow(bs_bits, odd_starts, (unsigned long long *) &odd_carries);
|
||||
add_overflow(bs_bits, odd_starts, &odd_carries);
|
||||
|
||||
odd_carries |=
|
||||
|
|
|
@ -1,42 +1,6 @@
|
|||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
|
||||
#ifndef __clang__ // if one compiles with MSVC *with* clang, then these
|
||||
// intrinsics are defined!!!
|
||||
static inline int __builtin_ctzll(unsigned long long input_num) {
|
||||
unsigned long index;
|
||||
#ifdef _WIN64 // highly recommended!!!
|
||||
_BitScanForward64(&index, input_num);
|
||||
#else // if we must support 32-bit Windows
|
||||
if ((uint32_t)input_num != 0) {
|
||||
_BitScanForward(&index, (uint32_t)input_num);
|
||||
}
|
||||
else {
|
||||
_BitScanForward(&index, (uint32_t)(input_num >> 32));
|
||||
index += 32;
|
||||
}
|
||||
#endif
|
||||
return index;
|
||||
}
|
||||
static inline int __builtin_popcountll(unsigned long long input_num) {
|
||||
#ifdef _WIN64 // highly recommended!!!
|
||||
return (int)__popcnt64(input_num);
|
||||
#else // if we must support 32-bit Windows
|
||||
return (int)(__popcnt((uint32_t)input_num) +
|
||||
__popcnt((uint32_t)(input_num >> 32)));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif// not clang
|
||||
|
||||
#else
|
||||
// we have a normal compiler
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
|
@ -50,7 +14,7 @@ static inline int __builtin_popcountll(unsigned long long input_num) {
|
|||
#endif
|
||||
|
||||
#define SET_BIT(i) \
|
||||
base_ptr[base + i] = (uint32_t)idx + __builtin_ctzll(s); \
|
||||
base_ptr[base + i] = (uint32_t)idx + trailingzeroes(s); \
|
||||
s = s & (s - 1);
|
||||
|
||||
#define SET_BIT1 SET_BIT(0)
|
||||
|
@ -89,7 +53,7 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
|
|||
counters[k] = 0;
|
||||
for (size_t idx = 0; idx < len; idx += 64) {
|
||||
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
|
||||
uint32_t cnt = __builtin_popcountll(s);
|
||||
uint32_t cnt = hamming(s);
|
||||
total++;
|
||||
counters[cnt]++;
|
||||
}
|
||||
|
@ -104,7 +68,7 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
|
|||
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
|
||||
#ifdef SUPPRESS_CHEESY_FLATTEN
|
||||
while (s) {
|
||||
base_ptr[base++] = (uint32_t)idx + __builtin_ctzll(s);
|
||||
base_ptr[base++] = (uint32_t)idx + trailingzeroes(s);
|
||||
s &= s - 1ULL;
|
||||
}
|
||||
#elif defined(NO_PDEP_PLEASE)
|
||||
|
@ -113,14 +77,14 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
|
|||
while (s) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + __builtin_ctzll(s);
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
s = s & (s - 1);
|
||||
}*/
|
||||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
#else
|
||||
uint32_t cnt = __builtin_popcountll(s);
|
||||
uint32_t cnt = hamming(s);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (s) {
|
||||
// spoil the suspense by reducing dependency chains; actually a win even
|
||||
|
@ -128,18 +92,18 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
|
|||
uint64_t s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
|
||||
uint64_t s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
|
||||
|
||||
base_ptr[base + 0] = (uint32_t)idx + __builtin_ctzll(s);
|
||||
base_ptr[base + 0] = (uint32_t)idx + trailingzeroes(s);
|
||||
uint64_t s1 = s & (s - 1ULL);
|
||||
base_ptr[base + 1] = (uint32_t)idx + __builtin_ctzll(s1);
|
||||
base_ptr[base + 1] = (uint32_t)idx + trailingzeroes(s1);
|
||||
uint64_t s2 = s1 & (s1 - 1ULL);
|
||||
base_ptr[base + 2] =
|
||||
(uint32_t)idx + __builtin_ctzll(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
|
||||
base_ptr[base + 3] = (uint32_t)idx + __builtin_ctzll(s3);
|
||||
(uint32_t)idx + trailingzeroes(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
|
||||
base_ptr[base + 3] = (uint32_t)idx + trailingzeroes(s3);
|
||||
uint64_t s4 = s3 & (s3 - 1ULL);
|
||||
|
||||
base_ptr[base + 4] =
|
||||
(uint32_t)idx + __builtin_ctzll(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
|
||||
base_ptr[base + 5] = (uint32_t)idx + __builtin_ctzll(s5);
|
||||
(uint32_t)idx + trailingzeroes(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
|
||||
base_ptr[base + 5] = (uint32_t)idx + trailingzeroes(s5);
|
||||
uint64_t s6 = s5 & (s5 - 1ULL);
|
||||
s = s6;
|
||||
base += 6;
|
||||
|
|
|
@ -61,9 +61,6 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
|||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
|
||||
// also in Intel's compiler), but won't work in MSVC. This would need to be
|
||||
// reimplemented differently if one wants to be standard compliant.
|
||||
WARN_UNUSED
|
||||
bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
uint32_t i = 0; // index of the structural character (0,1,2,3...)
|
||||
|
@ -73,7 +70,7 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
uint32_t depth = 0; // could have an arbitrary starting depth
|
||||
pj.init();
|
||||
if(pj.bytecapacity < len) {
|
||||
printf("insufficient capacity\n");
|
||||
fprintf(stderr, "insufficient capacity\n");
|
||||
return false;
|
||||
}
|
||||
// this macro reads the next structural character, updating idx, i and c.
|
||||
|
@ -85,7 +82,11 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
|
||||
|
||||
////////////////////////////// START STATE /////////////////////////////
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
pj.ret_address[depth] = &&start_continue;
|
||||
#else
|
||||
pj.ret_address[depth] = 's';
|
||||
#endif
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
|
||||
// the root is used, if nothing else, to capture the size of the tape
|
||||
|
@ -98,7 +99,11 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
switch (c) {
|
||||
case '{':
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
pj.ret_address[depth] = &&start_continue;
|
||||
#else
|
||||
pj.ret_address[depth] = 's';
|
||||
#endif
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
|
@ -107,7 +112,11 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
goto object_begin;
|
||||
case '[':
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
pj.ret_address[depth] = &&start_continue;
|
||||
#else
|
||||
pj.ret_address[depth] = 's';
|
||||
#endif
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
|
@ -299,7 +308,11 @@ object_key_state:
|
|||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
|
||||
// we have not yet encountered } so we need to come back for it
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
pj.ret_address[depth] = &&object_continue;
|
||||
#else
|
||||
pj.ret_address[depth] = 'o';
|
||||
#endif
|
||||
// we found an object inside an object, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
|
@ -312,7 +325,11 @@ object_key_state:
|
|||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
|
||||
// we have not yet encountered } so we need to come back for it
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
pj.ret_address[depth] = &&object_continue;
|
||||
#else
|
||||
pj.ret_address[depth] = 'o';
|
||||
#endif
|
||||
// we found an array inside an object, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
|
@ -352,7 +369,15 @@ scope_end:
|
|||
pj.annotate_previousloc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
// goto saved_state
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOT
|
||||
goto *pj.ret_address[depth];
|
||||
#else
|
||||
if(pj.ret_address[depth] == 'a') {
|
||||
goto array_continue;
|
||||
} else if (pj.ret_address[depth] == 'o') {
|
||||
goto object_continue;
|
||||
} else goto start_continue;
|
||||
#endif
|
||||
|
||||
////////////////////////////// ARRAY STATES /////////////////////////////
|
||||
array_begin:
|
||||
|
@ -415,7 +440,11 @@ main_array_switch:
|
|||
// we have not yet encountered ] so we need to come back for it
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
pj.ret_address[depth] = &&array_continue;
|
||||
#else
|
||||
pj.ret_address[depth] = 'a';
|
||||
#endif
|
||||
// we found an object inside an array, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
|
@ -428,7 +457,11 @@ main_array_switch:
|
|||
// we have not yet encountered ] so we need to come back for it
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
pj.ret_address[depth] = &&array_continue;
|
||||
#else
|
||||
pj.ret_address[depth] = 'a';
|
||||
#endif
|
||||
// we found an array inside an array, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
|
@ -457,11 +490,11 @@ array_continue:
|
|||
succeed:
|
||||
depth --;
|
||||
if(depth != 0) {
|
||||
printf("internal bug\n");
|
||||
fprintf(stderr, "internal bug\n");
|
||||
abort();
|
||||
}
|
||||
if(pj.containing_scope_offset[depth] != 0) {
|
||||
printf("internal bug\n");
|
||||
fprintf(stderr, "internal bug\n");
|
||||
abort();
|
||||
}
|
||||
pj.annotate_previousloc(pj.containing_scope_offset[depth],
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
if(MSVC)
|
||||
target_include_directories(${SIMDJSON_LIB_NAME}
|
||||
PUBLIC ${PROJECT_SOURCE_DIR}/windows
|
||||
)
|
||||
endif()
|
||||
|
||||
add_cpp_test(jsoncheck)
|
||||
add_test(jsoncheck jsoncheck)
|
||||
|
|
|
@ -108,7 +108,7 @@ int main(int argc, char *argv[]) {
|
|||
printf("fastjson : %s \n", fastjson_correct ? "correct":"invalid");
|
||||
printf("gason : %s \n", gason_correct ? "correct":"invalid");
|
||||
printf("ultrajson : %s \n", ultrajson_correct ? "correct":"invalid");
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
free(buffer);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include <unistd.h>
|
||||
#else
|
||||
// Microsoft can't be bothered to provide standard utils.
|
||||
#include <simdjson/dirent_portable.h>
|
||||
#include <dirent_portable.h>
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
|
@ -39,7 +39,7 @@ bool validate(const char *dirname) {
|
|||
struct dirent **entry_list;
|
||||
int c = scandir(dirname, &entry_list, 0, alphasort);
|
||||
if (c < 0) {
|
||||
printf("error accessing %s \n", dirname);
|
||||
fprintf(stderr, "error accessing %s \n", dirname);
|
||||
return false;
|
||||
}
|
||||
if (c == 0) {
|
||||
|
@ -67,19 +67,19 @@ bool validate(const char *dirname) {
|
|||
std::string_view p;
|
||||
try {
|
||||
p = get_corpus(fullpath);
|
||||
} catch (const std::exception& e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
ParsedJson pj;
|
||||
bool allocok = pj.allocateCapacity(p.size(), 1024);
|
||||
if(!allocok) {
|
||||
std::cerr<< "can't allocate memory"<<std::endl;
|
||||
std::cerr << "can't allocate memory"<<std::endl;
|
||||
return false;
|
||||
}
|
||||
++howmany;
|
||||
bool isok = json_parse(p, pj);
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
printf("%s\n", isok ? "ok" : "invalid");
|
||||
if(contains("EXCLUDE",name)) {
|
||||
// skipping
|
||||
|
@ -96,7 +96,7 @@ bool validate(const char *dirname) {
|
|||
printf("warning: file %s should fail but it passes.\n", name);
|
||||
everythingfine = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(fullpath);
|
||||
}
|
||||
}
|
||||
|
@ -104,16 +104,15 @@ bool validate(const char *dirname) {
|
|||
if(everythingfine) {
|
||||
printf("All ok!\n");
|
||||
} else {
|
||||
printf("There were problems! Consider reviewing the following files:\n");
|
||||
fprintf(stderr, "There were problems! Consider reviewing the following files:\n");
|
||||
for(int i = 0; i < c; i++) {
|
||||
if(!isfileasexpected[i]) printf("%s \n", entry_list[i]->d_name);
|
||||
if(!isfileasexpected[i]) fprintf(stderr, "%s \n", entry_list[i]->d_name);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < c; ++i)
|
||||
free(entry_list[i]);
|
||||
free(entry_list);
|
||||
delete[] isfileasexpected;
|
||||
|
||||
return everythingfine;
|
||||
}
|
||||
|
||||
|
|
|
@ -57,8 +57,8 @@ inline void foundInteger(int64_t result, const uint8_t *buf) {
|
|||
char *endptr;
|
||||
long long expected = strtoll((const char *)buf, &endptr, 10);
|
||||
if ((endptr == (const char *)buf) || (expected != result)) {
|
||||
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
|
||||
printf(" while parsing %s \n", fullpath);
|
||||
fprintf(stderr, "Error: parsed %" PRId64 " out of %.32s, ", result, buf);
|
||||
fprintf(stderr, " while parsing %s \n", fullpath);
|
||||
parse_error |= PARSE_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -68,23 +68,23 @@ inline void foundFloat(double result, const uint8_t *buf) {
|
|||
float_count++;
|
||||
double expected = strtod((const char *)buf, &endptr);
|
||||
if (endptr == (const char *)buf) {
|
||||
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||
fprintf(stderr, "parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||
result, buf);
|
||||
printf(" while parsing %s \n", fullpath);
|
||||
fprintf(stderr, " while parsing %s \n", fullpath);
|
||||
parse_error |= PARSE_ERROR;
|
||||
}
|
||||
if( fpclassify(expected) != fpclassify(result) ) {
|
||||
printf("floats not in the same category expected: %f observed: %f \n", expected, result);
|
||||
printf("%.128s\n", buf);
|
||||
fprintf(stderr, "floats not in the same category expected: %f observed: %f \n", expected, result);
|
||||
fprintf(stderr, "%.128s\n", buf);
|
||||
parse_error |= PARSE_ERROR;
|
||||
}
|
||||
// we want to get some reasonable relative accuracy
|
||||
else if (fabs(expected - result) / fmin(fabs(expected), fabs(result)) >
|
||||
1e-14) {
|
||||
printf("parsed %.128e from \n", result);
|
||||
printf(" %.200s whereas strtod gives\n", buf);
|
||||
printf(" %.128e,", expected);
|
||||
printf(" while parsing %s \n", fullpath);
|
||||
fprintf(stderr, "parsed %.128e from \n", result);
|
||||
fprintf(stderr, " %.200s whereas strtod gives\n", buf);
|
||||
fprintf(stderr, " %.128e,", expected);
|
||||
fprintf(stderr, " while parsing %s \n", fullpath);
|
||||
parse_error |= PARSE_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -154,7 +154,7 @@ bool validate(const char *dirname) {
|
|||
float_count, invalid_count,
|
||||
int_count + float_count + invalid_count);
|
||||
}
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
free(fullpath);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -341,7 +341,7 @@ bool validate(const char *dirname) {
|
|||
bigbuffer = (char *) malloc(p.size());
|
||||
if(bigbuffer == NULL) {
|
||||
std::cerr << "can't allocate memory" << std::endl;
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
return false;
|
||||
}
|
||||
bad_string = 0;
|
||||
|
@ -350,7 +350,7 @@ bool validate(const char *dirname) {
|
|||
empty_string = 0;
|
||||
bool isok = json_parse(p, pj);
|
||||
free(bigbuffer);
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
if (good_string > 0) {
|
||||
printf("File %40s %s --- bad strings: %10zu \tgood strings: %10zu\t "
|
||||
"empty strings: %10zu "
|
||||
|
@ -368,7 +368,7 @@ bool validate(const char *dirname) {
|
|||
}
|
||||
printf("%zu strings checked.\n", total_strings);
|
||||
if (probable_bug) {
|
||||
printf("STRING PARSING FAILS?\n");
|
||||
fprintf(stderr, "STRING PARSING FAILS?\n");
|
||||
} else {
|
||||
printf("All ok.\n");
|
||||
}
|
||||
|
|
|
@ -18,9 +18,10 @@ endif()
|
|||
if(NOT MSVC)
|
||||
set (OPT_FLAGS "${OPT_FLAGS} -mavx2 -mbmi2 -mpclmul")
|
||||
else()
|
||||
set (OPT_FLAGS "${OPT_FLAGS} /arch:AVX2")
|
||||
set (OPT_FLAGS "${OPT_FLAGS} /arch:AVX2 /std:c++latest")
|
||||
endif()
|
||||
|
||||
|
||||
if(NOT MSVC)
|
||||
set(CXXSTD_FLAGS "-std=c++17 -fPIC")
|
||||
endif()
|
||||
|
|
|
@ -87,7 +87,7 @@ int main(int argc, char *argv[]) {
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
|
||||
free((void *)p.data());
|
||||
aligned_free((void *)p.data());
|
||||
if (!is_ok) {
|
||||
std::cerr << " Parsing failed. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
|
|
@ -18,5 +18,5 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
jsonminify(p, (char *)p.data());
|
||||
printf("%s",p.data());
|
||||
free((void*)p.data());
|
||||
aligned_free((void*)p.data());
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// only use under Visual Studio
|
||||
// only use under Visual Studio and only for jsoncheck.cpp
|
||||
/*
|
||||
* This file was originally: "dirent for Visual C++" from: http://softagalleria.net/dirent.php (version 1.20.1)
|
||||
* However I've modified it to <dirent_portable.h> by adding:
|
||||
|
@ -1041,5 +1041,3 @@ inline static int alphasort (const struct dirent **e1,const struct dirent **e2)
|
|||
#endif /*DIRENT_H*/
|
||||
|
||||
#endif //#if (!defined(_WIN32) && !defined(_WIN64))
|
||||
|
||||
|
Loading…
Reference in New Issue