Lots of changes.

This commit is contained in:
Daniel Lemire 2018-12-30 18:43:46 -05:00
parent 4a8e229566
commit 992116b01f
39 changed files with 419 additions and 339 deletions

View File

@ -1,7 +1,6 @@
cmake_minimum_required(VERSION 2.8)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_MACOSX_RPATH OFF)
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to Release")
@ -11,8 +10,11 @@ endif()
project(simdjson)
set(SIMDJSON_LIB_NAME simdjson)
if(NOT MSVC)
option(SIMDJSON_BUILD_STATIC "Build a static library" OFF) # turning it on disables the production of a dynamic library
else()
option(SIMDJSON_BUILD_STATIC "Build a static library" ON) # turning it on disables the production of a dynamic library
endif()
option(SIMDJSON_BUILD_LTO "Build library with Link Time Optimization" OFF)
option(SIMDJSON_SANITIZE "Sanitize addresses" OFF)

View File

@ -80,7 +80,7 @@ of memory allocation with each new JSON document:
const char * filename = ... //
std::string_view p = get_corpus(filename);
ParsedJson pj = build_parsed_json(p); // do the parsing
// you no longer need p at this point, can do free((void*)p.data())
// you no longer need p at this point, can do aligned_free((void*)p.data())
if( ! pj.isValid() ) {
// something went wrong
}

View File

@ -106,7 +106,7 @@ double diff(timespec start, timespec end) {
clock_gettime(CLOCK_REALTIME, &time1); \
RDTSC_START(cycles_start); \
if (test != expected) { \
printf("not expected (%d , %d )", (int)test, (int)expected); \
fprintf(stderr, "not expected (%d , %d )", (int)test, (int)expected); \
break; \
} \
RDTSC_STOP(cycles_final); \

View File

@ -279,5 +279,5 @@ int main(int argc, char *argv[]) {
!justdata);
BEST_TIME("sasjon ", sasjon_computestats(p).size(), size, , repeat, volume,
!justdata);
free((void*)p.data());
aligned_free((void*)p.data());
}

View File

@ -137,7 +137,7 @@ int main(int argc, char *argv[]) {
ParsedJson pj;
bool isallocok = pj.allocateCapacity(p.size(), 1024);
if(!isallocok) {
printf("failed to allocate memory\n");
fprintf(stderr, "failed to allocate memory\n");
return EXIT_FAILURE;
}
BEST_TIME("simdjson orig", json_parse((const uint8_t*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
@ -145,12 +145,12 @@ int main(int argc, char *argv[]) {
ParsedJson pj2;
bool isallocok2 = pj2.allocateCapacity(p.size(), 1024);
if(!isallocok2) {
printf("failed to allocate memory\n");
fprintf(stderr, "failed to allocate memory\n");
return EXIT_FAILURE;
}
BEST_TIME("simdjson despaced", json_parse((const uint8_t*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata);
free((void*)p.data());
aligned_free((void*)p.data());
free(buffer);
free(ast_buffer);
free(minibuffer);

View File

@ -285,9 +285,9 @@ int main(int argc, char *argv[]) {
if (dump) {
isok = isok && pj.dump_raw_tape(std::cout);
}
free((void *)p.data());
aligned_free((void *)p.data());
if (!isok) {
printf(" Parsing failed. \n ");
fprintf(stderr, " Parsing failed. \n ");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;

View File

@ -300,5 +300,5 @@ int main(int argc, char *argv[]) {
!justdata);
BEST_TIME("sasjon ", sasjon_computestats(p).valid, true, , repeat, volume,
!justdata);
free((void*)p.data());
aligned_free((void*)p.data());
}

View File

@ -158,7 +158,7 @@ int main(int argc, char *argv[]) {
if(!justdata) BEST_TIME("memcpy ",
(memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat,
volume, !justdata);
free((void *)p.data());
aligned_free((void *)p.data());
free(ast_buffer);
free(buffer);
}

View File

@ -113,9 +113,9 @@ stat_t simdjson_computestats(const std::string_view &p) {
}
int main(int argc, char *argv[]) {
int c;
#ifndef _MSC_VER
while ((c = getopt(argc, argv, "")) != -1)
int c;
while ((c = getopt(argc, argv, "")) != -1)
switch (c) {
default:

View File

@ -1,21 +1,22 @@
#pragma once
#ifndef SIMDJSON_COMMON_DEFS_H
#define SIMDJSON_COMMON_DEFS_H
#include "simdjson/portability.h"
#include <cassert>
// the input buf should be readable up to buf + SIMDJSON_PADDING
#define SIMDJSON_PADDING sizeof(__m256i)
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <x86intrin.h>
#ifndef _MSC_VER
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
// also in Intel's compiler), but won't work in MSVC.
#define SIMDJSON_USE_COMPUTED_GOTO
#endif
// Align to N-byte boundary
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
@ -53,4 +54,6 @@
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#endif
#endif // MSC_VER
#endif // COMMON_DEFS_H

View File

@ -1,4 +1,5 @@
#pragma once
#ifndef SIMDJSON_JSONCHARUTILS_H
#define SIMDJSON_JSONCHARUTILS_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
@ -96,7 +97,7 @@ uint32_t hex_to_u32_nocheck(const uint8_t *src) {
// and clz and table lookups, but JSON documents
// have few escaped code points, and the following
// function looks cheap.
//
//
// Note: we assume that surrogates are treated separately
//
inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
@ -126,3 +127,4 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
return 0; // bad r
}
#endif

View File

@ -1,4 +1,5 @@
#pragma once
#ifndef SIMDJSON_JSONFORMATUTILS_H
#define SIMDJSON_JSONFORMATUTILS_H
#include <stdio.h>
#include <iostream>
@ -79,7 +80,7 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
default:
if (*src <= 0x1F) {
std::ios::fmtflags f(os.flags());
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src;
os << std::hex << std::setw(4) << std::setfill('0') << (int) *src;
os.flags(f);
} else
os << *src;
@ -89,5 +90,7 @@ static inline void print_with_escapes(const unsigned char *src, std::ostream &os
}
static inline void print_with_escapes(const char *src, std::ostream &os) {
print_with_escapes((const unsigned char *)src, os);
}
print_with_escapes((const unsigned char *)src, os);
}
#endif

View File

@ -1,12 +1,11 @@
#ifndef JSONIOUTIL_H
#define JSONIOUTIL_H
#ifndef SIMDJSON_JSONIOUTIL_H
#define SIMDJSON_JSONIOUTIL_H
#include <exception>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <string_view>
#include "simdjson/common_defs.h"
@ -32,7 +31,7 @@ char * allocate_padded_buffer(size_t length);
// try {
// p = get_corpus(filename);
// } catch (const std::exception& e) {
// free((void*)p.data());
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio
// std::cout << "Could not load the file " << filename << std::endl;
// }
std::string_view get_corpus(std::string filename);

View File

@ -1,10 +1,11 @@
#pragma once
#ifndef SIMDJSON_JSONMINIFIER_H
#define SIMDJSON_JSONMINIFIER_H
#include <cstddef>
#include <cstdint>
// Take input from buf and remove useless whitespace, write it to out; buf and
// out can be the same pointer. Result is null terminated,
// out can be the same pointer. Result is null terminated,
// return the string length (minus the null termination).
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
@ -16,4 +17,6 @@ static inline size_t jsonminify(const char *buf, size_t len, char *out) {
static inline size_t jsonminify(const std::string_view & p, char *out) {
return jsonminify(p.data(), p.size(), out);
}
}
#endif

View File

@ -1,4 +1,5 @@
#pragma once
#ifndef SIMDJSON_JSONPARSER_H
#define SIMDJSON_JSONPARSER_H
#include "simdjson/common_defs.h"
#include "simdjson/jsonioutil.h"
@ -11,7 +12,7 @@
// Parse a document found in buf, need to preallocate ParsedJson.
// Return false in case of a failure. You can also check validity
// Return false in case of a failure. You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@ -22,7 +23,7 @@ WARN_UNUSED
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
// Parse a document found in buf, need to preallocate ParsedJson.
// Return false in case of a failure. You can also check validity
// Return false in case of a failure. You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@ -30,12 +31,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after buf + len are ignored (can be garbage).
WARN_UNUSED
static inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
return json_parse((const uint8_t *) buf, len, pj, reallocifneeded);
}
// Parse a document found in buf, need to preallocate ParsedJson.
// Return false in case of a failure. You can also check validity
// Return false in case of a failure. You can also check validity
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
//
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@ -43,12 +44,12 @@ static inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool
// the input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
WARN_UNUSED
static inline bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
inline bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
return json_parse(s.data(), s.size(), pj, reallocifneeded);
}
// Build a ParsedJson object. You can check validity
// Build a ParsedJson object. You can check validity
// by calling pj.isValid(). This does the memory allocation needed for ParsedJson.
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
// (a copy of the input string is made).
@ -59,24 +60,26 @@ WARN_UNUSED
ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneeded = true);
WARN_UNUSED
// Build a ParsedJson object. You can check validity
// Build a ParsedJson object. You can check validity
// by calling pj.isValid(). This does the memory allocation needed for ParsedJson.
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
// (a copy of the input string is made).
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after buf + len are ignored (can be garbage).
static inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
inline ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded = true) {
return build_parsed_json((const uint8_t *) buf, len, reallocifneeded);
}
// convenience function
WARN_UNUSED
// Build a ParsedJson object. You can check validity
// Build a ParsedJson object. You can check validity
// by calling pj.isValid(). This does the memory allocation needed for ParsedJson.
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
// (a copy of the input string is made).
// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
static inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
return build_parsed_json(s.data(), s.size(), reallocifneeded);
}
}
#endif

View File

@ -1,31 +1,7 @@
#pragma once
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
# pragma intrinsic(_umul128)
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
uint64_t high;
*result = _umul128(value1, value2, &high);
return high;
}
#else
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddl_overflow(value1, value2, result);
}
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_umulll_overflow(value1, value2, result);
}
#endif // _MSC_VER
#ifndef SIMDJSON_NUMBERPARSING_H
#define SIMDJSON_NUMBERPARSING_H
#include "simdjson/portability.h"
#include "simdjson/common_defs.h"
#include "simdjson/jsoncharutils.h"
#include "simdjson/parsedjson.h"
@ -187,7 +163,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
// Note: a redesign could avoid this function entirely.
//
static never_inline bool
parse_float(const uint8_t *const buf,
parse_float(const uint8_t *const buf,
ParsedJson &pj, const uint32_t offset,
bool found_minus) {
const char *p = (const char *)(buf + offset);
@ -352,21 +328,14 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
return is_structural_or_whitespace(*p);
}
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// parse the number at buf + offset
// define JSON_TEST_NUMBERS for unit testing
static really_inline bool parse_number(const uint8_t *const buf,
ParsedJson &pj,
const uint32_t offset,
static really_inline bool parse_number(const uint8_t *const buf,
ParsedJson &pj,
const uint32_t offset,
bool found_minus) {
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes useful to skip parsing
pj.write_tape_s64(0); // always write zero
@ -487,7 +456,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
if (unlikely(digitcount >= 19)) { // this is uncommon!!!
// this is almost never going to get called!!!
// we start anew, going slowly!!!
return parse_float(buf, pj, offset,
return parse_float(buf, pj, offset,
found_minus);
}
///////////
@ -516,7 +485,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
}
} else {
if (unlikely(digitcount >= 18)) { // this is uncommon!!!
return parse_large_integer(buf, pj, offset,
return parse_large_integer(buf, pj, offset,
found_minus);
}
pj.write_tape_s64(i);
@ -527,3 +496,5 @@ static really_inline bool parse_number(const uint8_t *const buf,
return is_structural_or_whitespace(*p);
#endif // SIMDJSON_SKIPNUMBERPARSING
}
#endif

View File

@ -1,37 +1,19 @@
#pragma once
#include <math.h>
#ifndef SIMDJSON_PARSEDJSON_H
#define SIMDJSON_PARSEDJSON_H
#include <math.h>
#include <inttypes.h>
#include <string.h>
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#include <iomanip>
#include <iostream>
#include "simdjson/portability.h"
#include "simdjson/jsonformatutils.h"
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF
#define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
// portable version of posix_memalign
static inline void *aligned_malloc(size_t alignment, size_t size) {
void *p;
#ifdef _MSC_VER
p = _aligned_malloc(size, alignment);
#elif defined(__MINGW32__) || defined(__MINGW64__)
p = __mingw_aligned_malloc(size, alignment);
#else
// somehow, if this is used before including "x86intrin.h", it creates an
// implicit defined warning.
if (posix_memalign(&p, alignment, size) != 0) return NULL;
#endif
return p;
}
/************
* The JSON is parsed to a tape, see the accompanying tape.md file
@ -76,17 +58,20 @@ public:
string_buf = new uint8_t[localstringcapacity];
tape = new uint64_t[localtapecapacity];
containing_scope_offset = new uint32_t[maxdepth];
#ifdef SIMDJSON_USE_COMPUTED_GOTO
ret_address = new void *[maxdepth];
#else
ret_address = new char[maxdepth];
#endif
if ((string_buf == NULL) || (tape == NULL) ||
(containing_scope_offset == NULL) || (ret_address == NULL) || (structural_indexes == NULL)) {
std::cerr << "Could not allocate memory" << std::endl;
delete[] ret_address;
delete[] containing_scope_offset;
delete[] tape;
delete[] string_buf;
delete[] structural_indexes;
free(structurals);
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
aligned_free(structurals);
return false;
}
@ -108,12 +93,12 @@ public:
depthcapacity = 0;
tapecapacity = 0;
stringcapacity = 0;
delete[] ret_address;
delete[] containing_scope_offset;
delete[] tape;
delete[] string_buf;
delete[] structural_indexes;
free(structurals);
if(ret_address != NULL) delete[] ret_address;
if(containing_scope_offset != NULL) delete[] containing_scope_offset;
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
aligned_free(structurals);
isvalid = false;
}
@ -139,11 +124,11 @@ public:
if (type == 'r') {
howmany = tape_val & JSONVALUEMASK;
} else {
printf("Error: no starting root node?");
fprintf(stderr, "Error: no starting root node?");
return false;
}
if (howmany > tapecapacity) {
printf(
fprintf(stderr,
"We may be exceeding the tape capacity. Is this a valid document?\n");
return false;
}
@ -207,22 +192,22 @@ public:
os << '}';
break;
case '[': // we start an array
os << '[';
os << '[';
depth++;
inobject[depth] = false;
inobjectidx[depth] = 0;
break;
case ']': // we end an array
depth--;
os << ']';
os << ']';
break;
case 'r': // we start and end with the root node
printf("should we be hitting the root node?\n");
fprintf(stderr, "should we be hitting the root node?\n");
delete[] inobject;
delete[] inobjectidx;
return false;
default:
printf("bug %c\n", type);
fprintf(stderr, "bug %c\n", type);
delete[] inobject;
delete[] inobjectidx;
return false;
@ -239,25 +224,25 @@ public:
size_t tapeidx = 0;
uint64_t tape_val = tape[tapeidx];
uint8_t type = (tape_val >> 56);
os << tapeidx << " : " << type;
os << tapeidx << " : " << type;
tapeidx++;
size_t howmany = 0;
if (type == 'r') {
howmany = tape_val & JSONVALUEMASK;
} else {
printf("Error: no starting root node?");
fprintf(stderr, "Error: no starting root node?");
return false;
}
os << "\t// pointing to " << howmany <<" (right after last node)\n";
uint64_t payload;
for (; tapeidx < howmany; tapeidx++) {
os << tapeidx << " : ";
os << tapeidx << " : ";
tape_val = tape[tapeidx];
payload = tape_val & JSONVALUEMASK;
type = (tape_val >> 56);
switch (type) {
case '"': // we have a string
os << "string \"";
os << "string \"";
print_with_escapes((const unsigned char *)(string_buf + payload));
os << '"';
os << '\n';
@ -377,9 +362,9 @@ public:
delete[] depthindex;
}
iterator(const iterator &o):
pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type),
iterator(const iterator &o):
pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(NULL) {
depthindex = new scopeindex_t[pj.depthcapacity];
if(depthindex != NULL) {
@ -389,10 +374,10 @@ public:
}
}
iterator(iterator &&o):
pj(o.pj), depth(o.depth), location(o.location),
tape_length(o.tape_length), current_type(o.current_type),
current_val(o.current_val), depthindex(o.depthindex) {
iterator(iterator &&o):
pj(std::move(o.pj)), depth(std::move(o.depth)), location(std::move(o.location)),
tape_length(std::move(o.tape_length)), current_type(std::move(o.current_type)),
current_val(std::move(o.current_val)), depthindex(std::move(o.depthindex)) {
o.depthindex = NULL;// we take ownship
}
@ -400,7 +385,7 @@ public:
bool isOk() const {
return location < tape_length;
}
// useful for debuging purposes
size_t get_tape_location() const {
return location;
@ -432,7 +417,7 @@ public:
depth++;
depthindex[depth].start_of_scope = location;
depthindex[depth].scope_type = current_type;
}
}
location = location + 1;
current_val = pj.tape[location];
current_type = (current_val >> 56);
@ -471,7 +456,7 @@ public:
double answer;
memcpy(&answer, & pj.tape[location + 1], sizeof(answer));
return answer;
}
}
bool is_object_or_array() const {
return is_object_or_array(get_type());
@ -488,15 +473,15 @@ public:
bool is_string() const {
return get_type() == '"';
}
bool is_integer() const {
return get_type() == 'l';
}
bool is_double() const {
return get_type() == 'd';
}
static bool is_object_or_array(uint8_t type) {
return (type == '[' || (type == '{'));
}
@ -524,7 +509,7 @@ public:
really_inline const char * get_string() const {
return (const char *)(pj.string_buf + (current_val & JSONVALUEMASK)) ;
}
// throughout return true if we can do the navigation, false
// otherwise
@ -533,10 +518,10 @@ public:
// Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { and [.
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
// valid if we're not at the end of a scope (returns true).
really_inline bool next() {
really_inline bool next() {
if ((current_type == '[') || (current_type == '{')){
// we need to jump
size_t npos = ( current_val & JSONVALUEMASK);
size_t npos = ( current_val & JSONVALUEMASK);
if(npos >= tape_length) {
return false; // shoud never happen unless at the root
}
@ -563,14 +548,14 @@ public:
return true;
}
}
// Withing a given scope (series of nodes at the same depth within either an
// array or an object), we move backward.
// Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true when starting at the end
// of the scope.
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
// At the object ({) or at the array ([), you can issue a "down" to visit their content.
really_inline bool prev() {
if(location - 1 < depthindex[depth].start_of_scope) return false;
location -= 1;
@ -578,9 +563,9 @@ public:
current_type = (current_val >> 56);
if ((current_type == ']') || (current_type == '}')){
// we need to jump
size_t new_location = ( current_val & JSONVALUEMASK);
size_t new_location = ( current_val & JSONVALUEMASK);
if(new_location < depthindex[depth].start_of_scope) {
return false; // shoud never happen
return false; // shoud never happen
}
location = new_location;
current_val = pj.tape[location];
@ -589,7 +574,7 @@ public:
return true;
}
// Moves back to either the containing array or object (type { or [) from
// Moves back to either the containing array or object (type { or [) from
// within a contained scope.
// Valid unless we are at the first level of the document
//
@ -605,8 +590,8 @@ public:
current_type = (current_val >> 56);
return true;
}
// Valid if we're at a [ or { and it starts a non-empty scope; moves us to start of
// that deeper scope if it not empty.
// Thus, given [true, null, {"a":1}, [1,2]], if we are at the { node, we would move to the
@ -614,7 +599,7 @@ public:
really_inline bool down() {
if(location + 1 >= tape_length) return false;
if ((current_type == '[') || (current_type == '{')) {
size_t npos = (current_val & JSONVALUEMASK);
size_t npos = (current_val & JSONVALUEMASK);
if(npos == location + 2) {
return false; // we have an empty scope
}
@ -625,13 +610,13 @@ public:
current_val = pj.tape[location];
current_type = (current_val >> 56);
return true;
}
}
return false;
}
// move us to the start of our current scope,
// a scope is a series of nodes at the same level
void to_start_scope() {
void to_start_scope() {
location = depthindex[depth].start_of_scope;
current_val = pj.tape[location];
current_type = (current_val >> 56);
@ -656,7 +641,7 @@ public:
case 'l': // we have a long int
os << get_integer();
break;
case 'd':
case 'd':
os << get_double();
break;
case 'n': // we have a null
@ -689,7 +674,7 @@ private:
ParsedJson &pj;
size_t depth;
size_t location; // our current location on a tape
size_t tape_length;
size_t tape_length;
uint8_t current_type;
uint64_t current_val;
scopeindex_t *depthindex;
@ -711,12 +696,40 @@ private:
uint64_t *tape;
uint32_t *containing_scope_offset;
#ifdef SIMDJSON_USE_COMPUTED_GOTO
void **ret_address;
#else
char *ret_address;
#endif
uint8_t *string_buf; // should be at least bytecapacity
uint8_t *current_string_buf_loc;
bool isvalid;
ParsedJson(const ParsedJson && p);
ParsedJson(ParsedJson && p)
: bytecapacity(std::move(p.bytecapacity)),
depthcapacity(std::move(p.depthcapacity)),
tapecapacity(std::move(p.tapecapacity)),
stringcapacity(std::move(p.stringcapacity)),
current_loc(std::move(p.current_loc)),
structurals(std::move(p.structurals)),
n_structural_indexes(std::move(p.n_structural_indexes)),
structural_indexes(std::move(p.structural_indexes)),
tape(std::move(p.tape)),
containing_scope_offset(std::move(p.containing_scope_offset)),
ret_address(std::move(p.ret_address)),
string_buf(std::move(p.string_buf)),
current_string_buf_loc(std::move(p.current_string_buf_loc)),
isvalid(std::move(p.isvalid)) {
p.structurals=NULL;
p.structural_indexes=NULL;
p.tape=NULL;
p.containing_scope_offset=NULL;
p.ret_address=NULL;
p.string_buf=NULL;
p.current_string_buf_loc=NULL;
p.isvalid=NULL;
}
private :
@ -743,3 +756,6 @@ inline void dumpbits32_always(uint32_t v, const std::string &msg) {
}
std::cout << " " << msg.c_str() << "\n";
}
#endif

View File

@ -0,0 +1,122 @@
#ifndef SIMDJSON_PORTABILITY_H
#define SIMDJSON_PORTABILITY_H
#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#include <iso646.h>
#include <cstdint>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
# pragma intrinsic(_umul128)
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
uint64_t high;
*result = _umul128(value1, value2, &high);
return high;
}
static inline int trailingzeroes(uint64_t input_num) {
return _tzcnt_u64(input_num);
}
static inline int leadingzeroes(uint64_t input_num) {
return _lzcnt_u64(input_num);
}
static inline int hamming(uint64_t input_num) {
#ifdef _WIN64 // highly recommended!!!
return (int)__popcnt64(input_num);
#else // if we must support 32-bit Windows
return (int)(__popcnt((uint32_t)input_num) +
__popcnt((uint32_t)(input_num >> 32)));
#endif
}
#else
#include <x86intrin.h>
#include <cstdint>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddl_overflow(value1, value2, result);
}
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_umulll_overflow(value1, value2, result);
}
/* result might be undefined when input_num is zero */
static inline int trailingzeroes(uint64_t input_num) {
return __tzcnt_u64(input_num);
}
/* result might be undefined when input_num is zero */
static inline int leadingzeroes(uint64_t input_num) {
return __lzcnt_u64(input_num);
}
/* result might be undefined when input_num is zero */
static inline int hamming(uint64_t input_num) {
return _popcnt64(input_num);
}
#endif // _MSC_VER
// portable version of posix_memalign
static inline void *aligned_malloc(size_t alignment, size_t size) {
void *p;
#ifdef _MSC_VER
p = _aligned_malloc(size, alignment);
#elif defined(__MINGW32__) || defined(__MINGW64__)
p = __mingw_aligned_malloc(size, alignment);
#else
// somehow, if this is used before including "x86intrin.h", it creates an
// implicit defined warning.
if (posix_memalign(&p, alignment, size) != 0) return NULL;
#endif
return p;
}
#ifndef __clang__
#ifndef _MSC_VER
static __m256i inline _mm256_loadu2_m128i(__m128i const *__addr_hi,
__m128i const *__addr_lo) {
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
}
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
__m256i __a) {
__m128i __v128;
__v128 = _mm256_castsi256_si128(__a);
_mm_storeu_si128(__addr_lo, __v128);
__v128 = _mm256_extractf128_si256(__a, 1);
_mm_storeu_si128(__addr_hi, __v128);
}
#endif
#endif
static inline void aligned_free(void *memblock) {
if(memblock == NULL) return;
#ifdef _MSC_VER
_aligned_free(memblock);
#elif defined(__MINGW32__) || defined(__MINGW64__)
__mingw_aligned_free(memblock);
#else
free(memblock);
#endif
}
#endif /* end of include PORTABILITY_H */

View File

@ -1,18 +1,8 @@
/**
* (c) Daniel Lemire
* License: Apache License 2.0
*/
#ifndef SIMDJSON_SIMDPRUNE_TABLES_H
#define SIMDJSON_SIMDPRUNE_TABLES_H
#ifndef SIMDPRUNE_TABLES_H
#define SIMDPRUNE_TABLES_H
#include "simdjson/portability.h"
#if defined(_MSC_VER)
#include <intrin.h>
#include <iso646.h>
#else
#include <x86intrin.h>
#endif
#include <cstdint>
#ifdef __AVX__
static const unsigned char mask128_epi8[] = {

View File

@ -1,16 +1,12 @@
#ifndef SIMDUTF8CHECK_H
#define SIMDUTF8CHECK_H
#ifndef SIMDJSON_SIMDUTF8CHECK_H
#define SIMDJSON_SIMDUTF8CHECK_H
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#include <string.h>
#include "simdjson/portability.h"
/*
* legal utf-8 byte sequence
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94

View File

@ -1,7 +1,8 @@
#pragma once
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
#define SIMDJSON_STAGE1_FIND_MARKS_H
#include "common_defs.h"
#include "parsedjson.h"
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
WARN_UNUSED
bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
@ -10,3 +11,5 @@ WARN_UNUSED
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
return find_structural_bits((const uint8_t *)buf, len, pj);
}
#endif

View File

@ -1,7 +1,10 @@
#pragma once
#ifndef SIMDJSON_STAGE2_FLATTEN_H
#define SIMDJSON_STAGE2_FLATTEN_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
WARN_UNUSED
bool flatten_indexes(size_t len, ParsedJson &pj);
#endif

View File

@ -1,4 +1,5 @@
#pragma once
#ifndef SIMDJSON_STAGE34_UNIFIED_H
#define SIMDJSON_STAGE34_UNIFIED_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
@ -13,3 +14,4 @@ static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj)
return unified_machine((const uint8_t *)buf,len,pj);
}
#endif

View File

@ -1,4 +1,5 @@
#pragma once
#ifndef SIMDJSON_STRINGPARSING_H
#define SIMDJSON_STRINGPARSING_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
@ -53,7 +54,7 @@ really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, uint8_t **d
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
*src_ptr += 6;
}
size_t offset = codepoint_to_utf8(code_point, *dst_ptr);
size_t offset = codepoint_to_utf8(code_point, *dst_ptr);
*dst_ptr += offset;
return offset > 0;
}
@ -87,8 +88,8 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
__m256i unescaped_vec = _mm256_cmpeq_epi8(_mm256_max_epu8(unitsep,v),unitsep);// could do it with saturated subtraction
#endif // CHECKUNESCAPED
uint32_t quote_dist = __tzcnt_u64(quote_bits);
uint32_t bs_dist = __tzcnt_u64(bs_bits);
uint32_t quote_dist = trailingzeroes(quote_bits);
uint32_t bs_dist = trailingzeroes(bs_bits);
// store to dest unconditionally - we can overwrite the bits we don't like
// later
_mm256_storeu_si256((__m256i *)(dst), v);
@ -104,7 +105,7 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
uint32_t unescaped_bits = (uint32_t)_mm256_movemask_epi8(unescaped_vec);
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
#ifdef JSON_TEST_STRINGS // for unit testing
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
else foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS
return is_ok;
@ -176,3 +177,4 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
}
#endif

View File

@ -32,7 +32,7 @@ std::string_view get_corpus(std::string filename) {
size_t readb = std::fread(buf, 1, len, fp);
std::fclose(fp);
if(readb != len) {
free(buf);
aligned_free(buf);
throw std::runtime_error("could not read the data");
}
return std::string_view(buf,len);

View File

@ -1,5 +1,5 @@
#include <cstdint>
#include "simdjson/portability.h"
#ifndef __AVX2__
@ -59,41 +59,8 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
#else
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
#else
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddl_overflow(value1, value2, (unsigned long long *)result);
}
#endif // _MSC_VER
#include "simdjson/simdprune_tables.h"
#include <cstring>
#ifndef __clang__
#ifndef _MSC_VER
static __m256i inline _mm256_loadu2_m128i(__m128i const *__addr_hi,
__m128i const *__addr_lo) {
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
}
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
__m256i __a) {
__m128i __v128;
__v128 = _mm256_castsi256_si128(__a);
_mm_storeu_si128(__addr_lo, __v128);
__v128 = _mm256_extractf128_si256(__a, 1);
_mm_storeu_si128(__addr_hi, __v128);
}
#endif
#endif
// a straightforward comparison of a mask against input.
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,

View File

@ -5,6 +5,14 @@
#else
#include <unistd.h>
#endif
extern bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded);
extern bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded);
extern ParsedJson build_parsed_json(const char * buf, size_t len, bool reallocifneeded);
extern ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded);
// parse a document found in buf, need to preallocate ParsedJson.
WARN_UNUSED
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {

View File

@ -1,19 +1,5 @@
#include <cstdint>
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
#else
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddl_overflow(value1, value2, result);
}
#endif // _MSC_VER
#include "simdjson/portability.h"
#include <cassert>
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
@ -23,7 +9,7 @@ static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *re
// It seems that many parsers do UTF-8 validation.
// RapidJSON does not do it by default, but a flag
// allows it.
// allows it.
#ifdef SIMDJSON_UTF8VALIDATE
#include "simdjson/simdutf8check.h"
#endif
@ -50,9 +36,9 @@ WARN_UNUSED
#ifdef SIMDJSON_UTF8VALIDATE
__m256i has_error = _mm256_setzero_si256();
struct avx_processed_utf_bytes previous;
previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256();
previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256();
#endif
// Useful constant masks
@ -70,7 +56,7 @@ WARN_UNUSED
// effectively the very first char is considered to follow "whitespace" for the
// purposes of psuedo-structural character detection
uint64_t prev_iter_ends_pseudo_pred = 1ULL;
size_t lenminus64 = len < 64 ? 0 : len - 64;
size_t lenminus64 = len < 64 ? 0 : len - 64;
size_t idx = 0;
for (; idx < lenminus64; idx += 64) {
#ifndef _MSC_VER
@ -87,7 +73,7 @@ WARN_UNUSED
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 1)),has_error);
} else {
// it is not ascii so we have to do heavy work
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
@ -223,7 +209,7 @@ WARN_UNUSED
/// but otherwise the string needs to be properly padded or else we
/// risk invalidating the UTF-8 checks.
////////////
if (idx < len) {
if (idx < len) {
uint8_t tmpbuf[64];
memset(tmpbuf,0x20,64);
memcpy(tmpbuf,buf+idx,len - idx);
@ -238,7 +224,7 @@ WARN_UNUSED
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 1)),has_error);
} else {
// it is not ascii so we have to do heavy work
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
@ -264,7 +250,6 @@ WARN_UNUSED
// indicates whether the sense of any edge going to the next iteration
// should be flipped
//bool iter_ends_odd_backslash =
// __builtin_uaddll_overflow(bs_bits, odd_starts, (unsigned long long *) &odd_carries);
add_overflow(bs_bits, odd_starts, &odd_carries);
odd_carries |=

View File

@ -1,42 +1,6 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#ifndef __clang__ // if one compiles with MSVC *with* clang, then these
// intrinsics are defined!!!
static inline int __builtin_ctzll(unsigned long long input_num) {
unsigned long index;
#ifdef _WIN64 // highly recommended!!!
_BitScanForward64(&index, input_num);
#else // if we must support 32-bit Windows
if ((uint32_t)input_num != 0) {
_BitScanForward(&index, (uint32_t)input_num);
}
else {
_BitScanForward(&index, (uint32_t)(input_num >> 32));
index += 32;
}
#endif
return index;
}
static inline int __builtin_popcountll(unsigned long long input_num) {
#ifdef _WIN64 // highly recommended!!!
return (int)__popcnt64(input_num);
#else // if we must support 32-bit Windows
return (int)(__popcnt((uint32_t)input_num) +
__popcnt((uint32_t)(input_num >> 32)));
#endif
}
#endif// not clang
#else
// we have a normal compiler
#include <x86intrin.h>
#endif
#include <cassert>
#include "simdjson/portability.h"
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
@ -50,7 +14,7 @@ static inline int __builtin_popcountll(unsigned long long input_num) {
#endif
#define SET_BIT(i) \
base_ptr[base + i] = (uint32_t)idx + __builtin_ctzll(s); \
base_ptr[base + i] = (uint32_t)idx + trailingzeroes(s); \
s = s & (s - 1);
#define SET_BIT1 SET_BIT(0)
@ -89,7 +53,7 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
counters[k] = 0;
for (size_t idx = 0; idx < len; idx += 64) {
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
uint32_t cnt = __builtin_popcountll(s);
uint32_t cnt = hamming(s);
total++;
counters[cnt]++;
}
@ -104,7 +68,7 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
#ifdef SUPPRESS_CHEESY_FLATTEN
while (s) {
base_ptr[base++] = (uint32_t)idx + __builtin_ctzll(s);
base_ptr[base++] = (uint32_t)idx + trailingzeroes(s);
s &= s - 1ULL;
}
#elif defined(NO_PDEP_PLEASE)
@ -113,14 +77,14 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
while (s) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + __builtin_ctzll(s);
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
#else
uint32_t cnt = __builtin_popcountll(s);
uint32_t cnt = hamming(s);
uint32_t next_base = base + cnt;
while (s) {
// spoil the suspense by reducing dependency chains; actually a win even
@ -128,18 +92,18 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
uint64_t s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
uint64_t s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
base_ptr[base + 0] = (uint32_t)idx + __builtin_ctzll(s);
base_ptr[base + 0] = (uint32_t)idx + trailingzeroes(s);
uint64_t s1 = s & (s - 1ULL);
base_ptr[base + 1] = (uint32_t)idx + __builtin_ctzll(s1);
base_ptr[base + 1] = (uint32_t)idx + trailingzeroes(s1);
uint64_t s2 = s1 & (s1 - 1ULL);
base_ptr[base + 2] =
(uint32_t)idx + __builtin_ctzll(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
base_ptr[base + 3] = (uint32_t)idx + __builtin_ctzll(s3);
(uint32_t)idx + trailingzeroes(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
base_ptr[base + 3] = (uint32_t)idx + trailingzeroes(s3);
uint64_t s4 = s3 & (s3 - 1ULL);
base_ptr[base + 4] =
(uint32_t)idx + __builtin_ctzll(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
base_ptr[base + 5] = (uint32_t)idx + __builtin_ctzll(s5);
(uint32_t)idx + trailingzeroes(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
base_ptr[base + 5] = (uint32_t)idx + trailingzeroes(s5);
uint64_t s6 = s5 & (s5 - 1ULL);
s = s6;
base += 6;

View File

@ -61,9 +61,6 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
* The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation.
***********/
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
// also in Intel's compiler), but won't work in MSVC. This would need to be
// reimplemented differently if one wants to be standard compliant.
WARN_UNUSED
bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
uint32_t i = 0; // index of the structural character (0,1,2,3...)
@ -73,7 +70,7 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
uint32_t depth = 0; // could have an arbitrary starting depth
pj.init();
if(pj.bytecapacity < len) {
printf("insufficient capacity\n");
fprintf(stderr, "insufficient capacity\n");
return false;
}
// this macro reads the next structural character, updating idx, i and c.
@ -85,7 +82,11 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
////////////////////////////// START STATE /////////////////////////////
#ifdef SIMDJSON_USE_COMPUTED_GOTO
pj.ret_address[depth] = &&start_continue;
#else
pj.ret_address[depth] = 's';
#endif
pj.containing_scope_offset[depth] = pj.get_current_loc();
pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
// the root is used, if nothing else, to capture the size of the tape
@ -98,7 +99,11 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
switch (c) {
case '{':
pj.containing_scope_offset[depth] = pj.get_current_loc();
#ifdef SIMDJSON_USE_COMPUTED_GOTO
pj.ret_address[depth] = &&start_continue;
#else
pj.ret_address[depth] = 's';
#endif
depth++;
if (depth > pj.depthcapacity) {
goto fail;
@ -107,7 +112,11 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
goto object_begin;
case '[':
pj.containing_scope_offset[depth] = pj.get_current_loc();
#ifdef SIMDJSON_USE_COMPUTED_GOTO
pj.ret_address[depth] = &&start_continue;
#else
pj.ret_address[depth] = 's';
#endif
depth++;
if (depth > pj.depthcapacity) {
goto fail;
@ -299,7 +308,11 @@ object_key_state:
pj.containing_scope_offset[depth] = pj.get_current_loc();
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
// we have not yet encountered } so we need to come back for it
#ifdef SIMDJSON_USE_COMPUTED_GOTO
pj.ret_address[depth] = &&object_continue;
#else
pj.ret_address[depth] = 'o';
#endif
// we found an object inside an object, so we need to increment the depth
depth++;
if (depth > pj.depthcapacity) {
@ -312,7 +325,11 @@ object_key_state:
pj.containing_scope_offset[depth] = pj.get_current_loc();
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
// we have not yet encountered } so we need to come back for it
#ifdef SIMDJSON_USE_COMPUTED_GOTO
pj.ret_address[depth] = &&object_continue;
#else
pj.ret_address[depth] = 'o';
#endif
// we found an array inside an object, so we need to increment the depth
depth++;
if (depth > pj.depthcapacity) {
@ -352,7 +369,15 @@ scope_end:
pj.annotate_previousloc(pj.containing_scope_offset[depth],
pj.get_current_loc());
// goto saved_state
#ifdef SIMDJSON_USE_COMPUTED_GOT
goto *pj.ret_address[depth];
#else
if(pj.ret_address[depth] == 'a') {
goto array_continue;
} else if (pj.ret_address[depth] == 'o') {
goto object_continue;
} else goto start_continue;
#endif
////////////////////////////// ARRAY STATES /////////////////////////////
array_begin:
@ -415,7 +440,11 @@ main_array_switch:
// we have not yet encountered ] so we need to come back for it
pj.containing_scope_offset[depth] = pj.get_current_loc();
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
#ifdef SIMDJSON_USE_COMPUTED_GOTO
pj.ret_address[depth] = &&array_continue;
#else
pj.ret_address[depth] = 'a';
#endif
// we found an object inside an array, so we need to increment the depth
depth++;
if (depth > pj.depthcapacity) {
@ -428,7 +457,11 @@ main_array_switch:
// we have not yet encountered ] so we need to come back for it
pj.containing_scope_offset[depth] = pj.get_current_loc();
pj.write_tape(0, c); // here the compilers knows what c is so this gets optimized
#ifdef SIMDJSON_USE_COMPUTED_GOTO
pj.ret_address[depth] = &&array_continue;
#else
pj.ret_address[depth] = 'a';
#endif
// we found an array inside an array, so we need to increment the depth
depth++;
if (depth > pj.depthcapacity) {
@ -457,11 +490,11 @@ array_continue:
succeed:
depth --;
if(depth != 0) {
printf("internal bug\n");
fprintf(stderr, "internal bug\n");
abort();
}
if(pj.containing_scope_offset[depth] != 0) {
printf("internal bug\n");
fprintf(stderr, "internal bug\n");
abort();
}
pj.annotate_previousloc(pj.containing_scope_offset[depth],

View File

@ -1,3 +1,8 @@
if(MSVC)
target_include_directories(${SIMDJSON_LIB_NAME}
PUBLIC ${PROJECT_SOURCE_DIR}/windows
)
endif()
add_cpp_test(jsoncheck)
add_test(jsoncheck jsoncheck)

View File

@ -108,7 +108,7 @@ int main(int argc, char *argv[]) {
printf("fastjson : %s \n", fastjson_correct ? "correct":"invalid");
printf("gason : %s \n", gason_correct ? "correct":"invalid");
printf("ultrajson : %s \n", ultrajson_correct ? "correct":"invalid");
free((void*)p.data());
aligned_free((void*)p.data());
free(buffer);
return EXIT_SUCCESS;
}

View File

@ -5,7 +5,7 @@
#include <unistd.h>
#else
// Microsoft can't be bothered to provide standard utils.
#include <simdjson/dirent_portable.h>
#include <dirent_portable.h>
#endif
#include <inttypes.h>
#include <stdbool.h>
@ -39,7 +39,7 @@ bool validate(const char *dirname) {
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort);
if (c < 0) {
printf("error accessing %s \n", dirname);
fprintf(stderr, "error accessing %s \n", dirname);
return false;
}
if (c == 0) {
@ -67,19 +67,19 @@ bool validate(const char *dirname) {
std::string_view p;
try {
p = get_corpus(fullpath);
} catch (const std::exception& e) {
std::cout << "Could not load the file " << fullpath << std::endl;
} catch (const std::exception& e) {
std::cerr << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
if(!allocok) {
std::cerr<< "can't allocate memory"<<std::endl;
std::cerr << "can't allocate memory"<<std::endl;
return false;
}
++howmany;
bool isok = json_parse(p, pj);
free((void*)p.data());
aligned_free((void*)p.data());
printf("%s\n", isok ? "ok" : "invalid");
if(contains("EXCLUDE",name)) {
// skipping
@ -96,7 +96,7 @@ bool validate(const char *dirname) {
printf("warning: file %s should fail but it passes.\n", name);
everythingfine = false;
}
}
}
free(fullpath);
}
}
@ -104,16 +104,15 @@ bool validate(const char *dirname) {
if(everythingfine) {
printf("All ok!\n");
} else {
printf("There were problems! Consider reviewing the following files:\n");
fprintf(stderr, "There were problems! Consider reviewing the following files:\n");
for(int i = 0; i < c; i++) {
if(!isfileasexpected[i]) printf("%s \n", entry_list[i]->d_name);
if(!isfileasexpected[i]) fprintf(stderr, "%s \n", entry_list[i]->d_name);
}
}
for (int i = 0; i < c; ++i)
free(entry_list[i]);
free(entry_list);
delete[] isfileasexpected;
return everythingfine;
}

View File

@ -57,8 +57,8 @@ inline void foundInteger(int64_t result, const uint8_t *buf) {
char *endptr;
long long expected = strtoll((const char *)buf, &endptr, 10);
if ((endptr == (const char *)buf) || (expected != result)) {
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
printf(" while parsing %s \n", fullpath);
fprintf(stderr, "Error: parsed %" PRId64 " out of %.32s, ", result, buf);
fprintf(stderr, " while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR;
}
}
@ -68,23 +68,23 @@ inline void foundFloat(double result, const uint8_t *buf) {
float_count++;
double expected = strtod((const char *)buf, &endptr);
if (endptr == (const char *)buf) {
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
fprintf(stderr, "parsed %f from %.32s whereas strtod refuses to parse a float, ",
result, buf);
printf(" while parsing %s \n", fullpath);
fprintf(stderr, " while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR;
}
if( fpclassify(expected) != fpclassify(result) ) {
printf("floats not in the same category expected: %f observed: %f \n", expected, result);
printf("%.128s\n", buf);
fprintf(stderr, "floats not in the same category expected: %f observed: %f \n", expected, result);
fprintf(stderr, "%.128s\n", buf);
parse_error |= PARSE_ERROR;
}
// we want to get some reasonable relative accuracy
else if (fabs(expected - result) / fmin(fabs(expected), fabs(result)) >
1e-14) {
printf("parsed %.128e from \n", result);
printf(" %.200s whereas strtod gives\n", buf);
printf(" %.128e,", expected);
printf(" while parsing %s \n", fullpath);
fprintf(stderr, "parsed %.128e from \n", result);
fprintf(stderr, " %.200s whereas strtod gives\n", buf);
fprintf(stderr, " %.128e,", expected);
fprintf(stderr, " while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR;
}
}
@ -154,7 +154,7 @@ bool validate(const char *dirname) {
float_count, invalid_count,
int_count + float_count + invalid_count);
}
free((void*)p.data());
aligned_free((void*)p.data());
free(fullpath);
}
}

View File

@ -341,7 +341,7 @@ bool validate(const char *dirname) {
bigbuffer = (char *) malloc(p.size());
if(bigbuffer == NULL) {
std::cerr << "can't allocate memory" << std::endl;
free((void*)p.data());
aligned_free((void*)p.data());
return false;
}
bad_string = 0;
@ -350,7 +350,7 @@ bool validate(const char *dirname) {
empty_string = 0;
bool isok = json_parse(p, pj);
free(bigbuffer);
free((void*)p.data());
aligned_free((void*)p.data());
if (good_string > 0) {
printf("File %40s %s --- bad strings: %10zu \tgood strings: %10zu\t "
"empty strings: %10zu "
@ -368,7 +368,7 @@ bool validate(const char *dirname) {
}
printf("%zu strings checked.\n", total_strings);
if (probable_bug) {
printf("STRING PARSING FAILS?\n");
fprintf(stderr, "STRING PARSING FAILS?\n");
} else {
printf("All ok.\n");
}

View File

@ -18,9 +18,10 @@ endif()
if(NOT MSVC)
set (OPT_FLAGS "${OPT_FLAGS} -mavx2 -mbmi2 -mpclmul")
else()
set (OPT_FLAGS "${OPT_FLAGS} /arch:AVX2")
set (OPT_FLAGS "${OPT_FLAGS} /arch:AVX2 /std:c++latest")
endif()
if(NOT MSVC)
set(CXXSTD_FLAGS "-std=c++17 -fPIC")
endif()

View File

@ -87,7 +87,7 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
free((void *)p.data());
aligned_free((void *)p.data());
if (!is_ok) {
std::cerr << " Parsing failed. " << std::endl;
return EXIT_FAILURE;

View File

@ -18,5 +18,5 @@ int main(int argc, char *argv[]) {
}
jsonminify(p, (char *)p.data());
printf("%s",p.data());
free((void*)p.data());
aligned_free((void*)p.data());
}

View File

@ -1,4 +1,4 @@
// only use under Visual Studio
// only use under Visual Studio and only for jsoncheck.cpp
/*
* This file was originally: "dirent for Visual C++" from: http://softagalleria.net/dirent.php (version 1.20.1)
* However I've modified it to <dirent_portable.h> by adding:
@ -1041,5 +1041,3 @@ inline static int alphasort (const struct dirent **e1,const struct dirent **e2)
#endif /*DIRENT_H*/
#endif //#if (!defined(_WIN32) && !defined(_WIN64))