Merge branch 'master' of https://github.com/lemire/simdjson into Multiple_implementation_refactoring_stage2

This commit is contained in:
ioioioio 2019-07-03 10:34:58 -04:00
commit 036f9d5a45
42 changed files with 1781 additions and 1270 deletions

View File

@ -19,5 +19,6 @@ Reini Urban
Tom Dyson
Ihor Dotsenko
Alexey Milovidov
Chang Liu
# if you have contributed to the project and your name does not
# appear in this list, please let us know!

View File

@ -100,6 +100,7 @@ cat <<< '
#include <iostream>
#include "simdjson.h"
#include "simdjson.cpp"
using namespace simdjson;
int main(int argc, char *argv[]) {
const char * filename = argv[1];
padded_string p = get_corpus(filename);

View File

@ -30,7 +30,7 @@ void print_vec(const std::vector<int64_t> &v) {
std::cout << std::endl;
}
void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
void simdjson_scan(std::vector<int64_t> &answer, simdjson::ParsedJson::iterator &i) {
while(i.move_forward()) {
if(i.get_scope_type() == '{') {
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
@ -48,30 +48,30 @@ void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
}
__attribute__ ((noinline))
std::vector<int64_t> simdjson_justdom(ParsedJson &pj) {
std::vector<int64_t> simdjson_justdom(simdjson::ParsedJson &pj) {
std::vector<int64_t> answer;
ParsedJson::iterator i(pj);
simdjson::ParsedJson::iterator i(pj);
simdjson_scan(answer,i);
remove_duplicates(answer);
return answer;
}
__attribute__ ((noinline))
std::vector<int64_t> simdjson_computestats(const padded_string &p) {
std::vector<int64_t> simdjson_computestats(const simdjson::padded_string &p) {
std::vector<int64_t> answer;
ParsedJson pj = build_parsed_json(p);
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
if (!pj.isValid()) {
return answer;
}
ParsedJson::iterator i(pj);
simdjson::ParsedJson::iterator i(pj);
simdjson_scan(answer,i);
remove_duplicates(answer);
return answer;
}
__attribute__ ((noinline))
bool simdjson_justparse(const padded_string &p) {
ParsedJson pj = build_parsed_json(p);
bool simdjson_justparse(const simdjson::padded_string &p) {
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
bool answer = !pj.isValid();
return answer;
}
@ -135,7 +135,7 @@ std::vector<int64_t> sasjon_justdom(sajson::document & d) {
}
__attribute__ ((noinline))
std::vector<int64_t> sasjon_computestats(const padded_string &p) {
std::vector<int64_t> sasjon_computestats(const simdjson::padded_string &p) {
std::vector<int64_t> answer;
char *buffer = (char *)malloc(p.size());
memcpy(buffer, p.data(), p.size());
@ -152,7 +152,7 @@ std::vector<int64_t> sasjon_computestats(const padded_string &p) {
}
__attribute__ ((noinline))
bool sasjon_justparse(const padded_string &p) {
bool sasjon_justparse(const simdjson::padded_string &p) {
char *buffer = (char *)malloc(p.size());
memcpy(buffer, p.data(), p.size());
auto d = sajson::parse(sajson::dynamic_allocation(),
@ -210,7 +210,7 @@ std::vector<int64_t> rapid_justdom(rapidjson::Document &d) {
}
__attribute__ ((noinline))
std::vector<int64_t> rapid_computestats(const padded_string &p) {
std::vector<int64_t> rapid_computestats(const simdjson::padded_string &p) {
std::vector<int64_t> answer;
char *buffer = (char *)malloc(p.size() + 1);
memcpy(buffer, p.data(), p.size());
@ -228,7 +228,7 @@ std::vector<int64_t> rapid_computestats(const padded_string &p) {
}
__attribute__ ((noinline))
bool rapid_justparse(const padded_string &p) {
bool rapid_justparse(const simdjson::padded_string &p) {
char *buffer = (char *)malloc(p.size() + 1);
memcpy(buffer, p.data(), p.size());
buffer[p.size()] = '\0';
@ -267,9 +267,9 @@ int main(int argc, char *argv[]) {
if (optind + 1 < argc) {
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
@ -321,7 +321,7 @@ int main(int argc, char *argv[]) {
!justdata);
BEST_TIME("sasjon (just parse) ", sasjon_justparse(p), false, , repeat, volume,
!justdata);
ParsedJson dsimdjson = build_parsed_json(p);
simdjson::ParsedJson dsimdjson = simdjson::build_parsed_json(p);
BEST_TIME("simdjson (just dom) ", simdjson_justdom(dsimdjson).size(), size, , repeat,
volume, !justdata);
char *buffer = (char *)malloc(p.size());

View File

@ -65,9 +65,9 @@ int main(int argc, char *argv[]) {
exit(1);
}
const char * filename = argv[optind];
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception& e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;

View File

@ -78,9 +78,9 @@ int main(int argc, char *argv[]) {
if (verbose) {
std::cout << "[verbose] loading " << filename << std::endl;
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
@ -128,7 +128,7 @@ int main(int argc, char *argv[]) {
std::cout << "[verbose] iteration # " << i << std::endl;
}
unified.start();
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size());
if (!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
@ -145,7 +145,7 @@ int main(int argc, char *argv[]) {
}
unified.start();
// The default template is simdjson::instruction_set::native.
isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
unified.end(results);
cy1 += results[0];
cl1 += results[1];
@ -158,7 +158,7 @@ int main(int argc, char *argv[]) {
}
unified.start();
// The default template is simdjson::instruction_set::native.
isok = isok && (simdjson::SUCCESS == unified_machine<>(p.data(), p.size(), pj));
isok = isok && (simdjson::SUCCESS == simdjson::unified_machine<>(p.data(), p.size(), pj));
unified.end(results);
cy2 += results[0];
cl2 += results[1];
@ -176,7 +176,7 @@ int main(int argc, char *argv[]) {
if (verbose) {
std::cout << "[verbose] iteration # " << i << std::endl;
}
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size());
if (!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
@ -188,8 +188,8 @@ int main(int argc, char *argv[]) {
auto start = std::chrono::steady_clock::now();
// The default template is simdjson::instruction_set::native.
isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
isok = isok && (simdjson::SUCCESS == unified_machine<>(p.data(), p.size(), pj));
isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
isok = isok && (simdjson::SUCCESS == simdjson::unified_machine<>(p.data(), p.size(), pj));
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;
res[i] = secs.count();
@ -199,7 +199,7 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
}
ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
simdjson::ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
if (!pj.isValid()) {
std::cerr << pj.getErrorMsg() << std::endl;
std::cerr << "Could not parse. " << std::endl;

View File

@ -146,7 +146,7 @@ void sajson_traverse(stat_t &stats, const sajson::value &node) {
}
__attribute__ ((noinline))
stat_t sasjon_computestats(const padded_string &p) {
stat_t sasjon_computestats(const simdjson::padded_string &p) {
stat_t answer;
char *buffer = (char *)malloc(p.size());
memcpy(buffer, p.data(), p.size());
@ -204,7 +204,7 @@ void rapid_traverse(stat_t &stats, const rapidjson::Value &v) {
}
__attribute__ ((noinline))
stat_t rapid_computestats(const padded_string &p) {
stat_t rapid_computestats(const simdjson::padded_string &p) {
stat_t answer;
char *buffer = (char *)malloc(p.size() + 1);
memcpy(buffer, p.data(), p.size());
@ -253,9 +253,9 @@ int main(int argc, char *argv[]) {
if (optind + 1 < argc) {
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;

View File

@ -83,9 +83,9 @@ int main(int argc, char *argv[]) {
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
<< std::endl;
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
@ -101,7 +101,7 @@ int main(int argc, char *argv[]) {
std::cout << p.size() << " B ";
std::cout << std::endl;
}
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
if (!allocok) {

View File

@ -42,9 +42,9 @@ struct stat_s {
using stat_t = struct stat_s;
stat_t simdjson_computestats(const padded_string &p) {
stat_t simdjson_computestats(const simdjson::padded_string &p) {
stat_t answer;
ParsedJson pj = build_parsed_json(p);
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
answer.valid = pj.isValid();
if (!answer.valid) {
return answer;
@ -134,9 +134,9 @@ int main(int argc, char *argv[]) {
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
<< std::endl;
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cerr << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
@ -163,7 +163,7 @@ int main(int argc, char *argv[]) {
s.object_count, s.array_count, s.null_count, s.true_count,
s.false_count, s.byte_count, s.structural_indexes_count);
#ifdef __linux__
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size());
if (!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
@ -181,7 +181,7 @@ int main(int argc, char *argv[]) {
for (uint32_t i = 0; i < iterations; i++) {
unified.start();
// The default template is simdjson::instruction_set::native.
bool isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
unified.end(results);
cy1 += results[0];

View File

@ -4,6 +4,7 @@
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
namespace simdjson {
// structural chars here are
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
// we are also interested in the four whitespace characters
@ -293,5 +294,6 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
// will return 0 when the code point was too large.
return 0; // bad r
}
}
#endif

View File

@ -5,6 +5,7 @@
#include <iomanip>
#include <iostream>
namespace simdjson {
// ends with zero char
static inline void print_with_escapes(const unsigned char *src) {
while (*src) {
@ -195,6 +196,7 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
size_t len) {
print_with_escapes(reinterpret_cast<const unsigned char *>(src), os, len);
}
}
#
#endif

View File

@ -12,7 +12,7 @@
#include "simdjson/padded_string.h"
namespace simdjson {
// load a file in memory...
// get a corpus; pad out to cache line so we can always use SIMD
@ -29,6 +29,6 @@
// std::cout << "Could not load the file " << filename << std::endl;
// }
padded_string get_corpus(const std::string& filename);
}
#endif

View File

@ -4,6 +4,7 @@
#include <cstddef>
#include <cstdint>
namespace simdjson {
// Take input from buf and remove useless whitespace, write it to out; buf and
// out can be the same pointer. Result is null terminated,
// return the string length (minus the null termination).
@ -22,5 +23,5 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
static inline size_t jsonminify(const padded_string & p, char *out) {
return jsonminify(p.data(), p.size(), out);
}
}
#endif

View File

@ -15,6 +15,7 @@
#include <unistd.h>
#endif
namespace simdjson {
// The function that users are expected to call is json_parse.
// We have more than one such function because we want to support several
// instruction sets.
@ -26,9 +27,9 @@ using json_parse_functype = int (const uint8_t *buf, size_t len, ParsedJson &pj,
extern json_parse_functype *json_parse_ptr;
// json_parse_implementation is the generic function, it is specialized for various
// SIMD instruction sets, e.g., as json_parse_implementation<simdjson::instruction_set::avx2>
// or json_parse_implementation<simdjson::instruction_set::neon>
template<simdjson::instruction_set T>
// SIMD instruction sets, e.g., as json_parse_implementation<instruction_set::avx2>
// or json_parse_implementation<instruction_set::neon>
template<instruction_set T>
int json_parse_implementation(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
if (pj.bytecapacity < len) {
return simdjson::CAPACITY;
@ -199,7 +200,5 @@ WARN_UNUSED
inline ParsedJson build_parsed_json(const padded_string &s) {
return build_parsed_json(s.data(), s.length(), false);
}
}
#endif

View File

@ -12,7 +12,7 @@ void foundInteger(int64_t result, const uint8_t *buf);
void foundFloat(double result, const uint8_t *buf);
#endif
namespace simdjson {
// Allowable floating-point values range from std::numeric_limits<double>::lowest()
// to std::numeric_limits<double>::max(), so from
// -1.7976e308 all the way to 1.7975e308 in binary64. The lowest non-zero
@ -557,5 +557,5 @@ static really_inline bool parse_number(const uint8_t *const buf,
return is_structural_or_whitespace(*p);
#endif // SIMDJSON_SKIPNUMBERPARSING
}
}
#endif

View File

@ -3,6 +3,8 @@
#include "simdjson/portability.h"
#include <memory>
#include <cstring>
namespace simdjson {
// low-level function to allocate memory with padding so we can read passed the
// "length" bytes safely. if you must provide a pointer to some data, create it
// with this function: length is the max. size in bytes of the string caller is
@ -63,5 +65,6 @@ private:
size_t viable_size;
char *data_ptr;
};
}
#endif

View File

@ -15,7 +15,7 @@
#define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
namespace simdjson {
/************
* The JSON is parsed to a tape, see the accompanying tape.md file
* for documentation.
@ -499,4 +499,5 @@ bool ParsedJson::iterator::next() {
current_type = nexttype;
return true;
}
}
#endif

View File

@ -7,6 +7,7 @@
#include <iso646.h>
#include <cstdint>
namespace simdjson {
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
@ -34,7 +35,7 @@ static inline int hamming(uint64_t input_num) {
__popcnt((uint32_t)(input_num >> 32)));
#endif
}
}
#else
#include <cstdint>
#include <cstdlib>
@ -42,7 +43,7 @@ static inline int hamming(uint64_t input_num) {
#if defined(__BMI2__) || defined(__POPCOUNT__) || defined(__AVX2__)
#include <x86intrin.h>
#endif
namespace simdjson {
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
}
@ -76,10 +77,11 @@ static inline int hamming(uint64_t input_num) {
return __builtin_popcountll(input_num);
#endif
}
}
#endif // _MSC_VER
namespace simdjson {
// portable version of posix_memalign
static inline void *aligned_malloc(size_t alignment, size_t size) {
void *p;
@ -139,5 +141,6 @@ static inline void aligned_free(void *memblock) {
static inline void aligned_free_char(char *memblock) {
aligned_free((void*)memblock);
}
}
#endif // SIMDJSON_PORTABILITY_H

View File

@ -3,46 +3,45 @@
#include <string>
struct simdjson {
enum class instruction_set {
avx2,
sse4_2,
neon,
none,
namespace simdjson {
enum class instruction_set {
avx2,
sse4_2,
neon,
none,
// the 'native' enum class value should point at a good default on the current machine
#ifdef __AVX2__
native = avx2
native = avx2
#elif defined(__ARM_NEON)
native = neon
native = neon
#else
// Let us assume that we have an old x64 processor, but one that has SSE (i.e., something
// that came out in the second decade of the XXIst century.
// It would be nicer to check explicitly, but there many not be a good way to do so
// that is cross-platform.
// Under Visual Studio, there is no way to check for SSE4.2 support at compile-time.
native = sse4_2
// Let us assume that we have an old x64 processor, but one that has SSE (i.e., something
// that came out in the second decade of the XXIst century.
// It would be nicer to check explicitly, but there many not be a good way to do so
// that is cross-platform.
// Under Visual Studio, there is no way to check for SSE4.2 support at compile-time.
native = sse4_2
#endif
};
enum errorValues {
SUCCESS = 0,
CAPACITY, // This ParsedJson can't support a document that big
MEMALLOC, // Error allocating memory, most likely out of memory
TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this is a generic error
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
STRING_ERROR, // Problem while parsing a string
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR, // the input is not valid UTF-8
UNITIALIZED, // unknown error, or uninitialized document
EMPTY, // no structural document found
UNESCAPED_CHARS, // found unescaped characters in a string.
UNCLOSED_STRING, // missing quote at the end
UNEXPECTED_ERROR // indicative of a bug in simdjson
};
static const std::string& errorMsg(const int);
};
enum errorValues {
SUCCESS = 0,
CAPACITY, // This ParsedJson can't support a document that big
MEMALLOC, // Error allocating memory, most likely out of memory
TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this is a generic error
DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
STRING_ERROR, // Problem while parsing a string
T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, // Problem while parsing a number
UTF8_ERROR, // the input is not valid UTF-8
UNITIALIZED, // unknown error, or uninitialized document
EMPTY, // no structural document found
UNESCAPED_CHARS, // found unescaped characters in a string.
UNCLOSED_STRING, // missing quote at the end
UNEXPECTED_ERROR // indicative of a bug in simdjson
};
const std::string& errorMsg(const int);
}
#endif

View File

@ -2,9 +2,11 @@
#ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
#define SIMDJSON_INCLUDE_SIMDJSON_VERSION
#define SIMDJSON_VERSION 0.1.2
namespace simdjson {
enum {
SIMDJSON_VERSION_MAJOR = 0,
SIMDJSON_VERSION_MINOR = 1,
SIMDJSON_VERSION_REVISION = 2
};
}
#endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION

View File

@ -3,8 +3,9 @@
#include "simdjson/portability.h"
#ifdef __AVX__
namespace simdjson {
#ifdef __AVX__
static const unsigned char mask128_epi8[] = {
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
0xf, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
@ -34984,13 +34985,13 @@ static const unsigned char mask128_epi32[] = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff,
};
}
#endif //__SSE3__
#ifdef __AVX2__
#include <cstdint>
namespace simdjson {
static const uint32_t mask256_epi32[] = {
0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 7, 0, 2, 3, 4, 5, 6, 7, 7, 2,
3, 4, 5, 6, 7, 7, 7, 0, 1, 3, 4, 5, 6, 7, 7, 1, 3, 4, 5, 6, 7, 7, 7, 0, 3,
@ -35074,6 +35075,7 @@ static const uint32_t mask256_epi32[] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2,
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
}
#endif //__AVX2__
#endif

View File

@ -26,9 +26,8 @@
// all byte values must be no larger than 0xF4
namespace simdjson {
#ifdef __AVX2__
/*****************************/
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
@ -194,4 +193,5 @@ avxcheckUTF8Bytes(__m256i current_bytes,
#else // __AVX2__
#warning "We require AVX2 support!"
#endif // __AVX2__
}
#endif

View File

@ -32,6 +32,7 @@
#define TRANSPOSE
namespace simdjson {
template<simdjson::instruction_set>
struct simd_input;
#ifdef __AVX2__
@ -853,5 +854,5 @@ WARN_UNUSED
int find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
return find_structural_bits<T>(reinterpret_cast<const uint8_t *>(buf), len, pj);
}
}
#endif

View File

@ -14,6 +14,7 @@
#define PATH_SEP '/'
namespace simdjson {
void init_state_machine();
WARN_UNUSED
@ -582,5 +583,6 @@ template<simdjson::instruction_set T = simdjson::instruction_set::native>
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
return unified_machine<T>(reinterpret_cast<const uint8_t*>(buf), len, pj);
}
}
#endif

View File

@ -10,7 +10,7 @@ void foundString(const uint8_t *buf, const uint8_t *parsed_begin, const uint8_t
void foundBadString(const uint8_t *buf);
#endif
namespace simdjson {
// begin copypasta
// These chars yield themselves: " \ /
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
@ -234,6 +234,6 @@ bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
return true;
#endif // SIMDJSON_SKIPSTRINGPARSING
}
}
#endif

View File

@ -1,8 +1,9 @@
/* auto-generated on Thu May 9 20:55:13 EDT 2019. Do not edit! */
/* auto-generated on Tue 02 Jul 2019 04:34:44 PM EDT. Do not edit! */
#include <iostream>
#include "simdjson.h"
#include "simdjson.cpp"
using namespace simdjson;
int main(int argc, char *argv[]) {
const char * filename = argv[1];
padded_string p = get_corpus(filename);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
#include <cstring>
#include <cstdlib>
namespace simdjson {
char * allocate_padded_buffer(size_t length) {
// we could do a simple malloc
//return (char *) malloc(length + SIMDJSON_PADDING);
@ -31,3 +32,4 @@ padded_string get_corpus(const std::string& filename) {
}
throw std::runtime_error("could not load corpus");
}
}

View File

@ -1,8 +1,9 @@
#include "simdjson/portability.h"
#include <cstdint>
#ifndef __AVX2__
namespace simdjson {
static uint8_t jump_table[256 * 3] = {
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
@ -56,12 +57,12 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
}
return pos;
}
}
#else
#include "simdjson/simdprune_tables.h"
#include <cstring>
namespace simdjson {
// a straightforward comparison of a mask against input.
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) {
@ -247,5 +248,5 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
*out = '\0';// NULL termination
return out - initout;
}
}
#endif

View File

@ -7,44 +7,44 @@
#endif
#include "simdjson/simdjson.h"
namespace simdjson {
// Responsible to select the best json_parse implementation
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
// Versions for each implementation
#ifdef __AVX2__
json_parse_functype* avx_implementation = &json_parse_implementation<simdjson::instruction_set::avx2>;
json_parse_functype* avx_implementation = &json_parse_implementation<instruction_set::avx2>;
#endif
#ifdef __SSE4_2__
// json_parse_functype* sse4_2_implementation = &json_parse_implementation<simdjson::instruction_set::sse4_2>; // not implemented yet
// json_parse_functype* sse4_2_implementation = &json_parse_implementation<instruction_set::sse4_2>; // not implemented yet
#endif
#ifdef __ARM_NEON
json_parse_functype* neon_implementation = &json_parse_implementation<simdjson::instruction_set::neon>;
json_parse_functype* neon_implementation = &json_parse_implementation<instruction_set::neon>;
#endif
// Determining which implementation is the more suitable
// Should be done at runtime. Does not make any sense on preprocessor.
#ifdef __AVX2__
simdjson::instruction_set best_implementation = simdjson::instruction_set::avx2;
instruction_set best_implementation = instruction_set::avx2;
#elif defined (__SSE4_2__)
simdjson::instruction_set best_implementation = simdjson::instruction_set::sse4_2;
instruction_set best_implementation = instruction_set::sse4_2;
#elif defined (__ARM_NEON)
simdjson::instruction_set best_implementation = simdjson::instruction_set::neon;
instruction_set best_implementation = instruction_set::neon;
#else
simdjson::instruction_set best_implementation = simdjson::instruction_set::none;
instruction_set best_implementation = instruction_set::none;
#endif
// Selecting the best implementation
switch (best_implementation) {
#ifdef __AVX2__
case simdjson::instruction_set::avx2 :
case instruction_set::avx2 :
json_parse_ptr = avx_implementation;
break;
#elif defined (__SSE4_2__)
/*case simdjson::instruction_set::sse4_2 :
/*case instruction_set::sse4_2 :
json_parse_ptr = sse4_2_implementation;
break;*/
#elif defined (__ARM_NEON)
case simdjson::instruction_set::neon :
case instruction_set::neon :
json_parse_ptr = neon_implementation;
break;
#endif
@ -69,3 +69,4 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
}
return pj;
}
}

View File

@ -1,5 +1,6 @@
#include "simdjson/parsedjson.h"
namespace simdjson {
ParsedJson::ParsedJson() :
structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {}
@ -97,7 +98,7 @@ int ParsedJson::getErrorCode() const {
}
std::string ParsedJson::getErrorMsg() const {
return simdjson::errorMsg(errorcode);
return errorMsg(errorcode);
}
void ParsedJson::deallocate() {
@ -318,3 +319,4 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
return true;
}
}

View File

@ -2,6 +2,7 @@
#include "simdjson/common_defs.h"
#include <iterator>
namespace simdjson {
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
if(!pj.isValid()) {
throw InvalidJSON();
@ -92,3 +93,4 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
}
return true;
}
}

View File

@ -1,23 +1,25 @@
#include <map>
#include "simdjson/simdjson.h"
namespace simdjson {
const std::map<int, const std::string> errorStrings = {
{simdjson::SUCCESS, "No errors"},
{simdjson::CAPACITY, "This ParsedJson can't support a document that big"},
{simdjson::MEMALLOC, "Error allocating memory, we're most likely out of memory"},
{simdjson::TAPE_ERROR, "Something went wrong while writing to the tape"},
{simdjson::STRING_ERROR, "Problem while parsing a string"},
{simdjson::T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"},
{simdjson::F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
{simdjson::N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
{simdjson::NUMBER_ERROR, "Problem while parsing a number"},
{simdjson::UTF8_ERROR, "The input is not valid UTF-8"},
{simdjson::UNITIALIZED, "Unitialized"},
{simdjson::EMPTY, "Empty"},
{simdjson::UNESCAPED_CHARS, "Within strings, some characters must be escapted, we found unescapted characters"},
{simdjson::UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson"},
{SUCCESS, "No errors"},
{CAPACITY, "This ParsedJson can't support a document that big"},
{MEMALLOC, "Error allocating memory, we're most likely out of memory"},
{TAPE_ERROR, "Something went wrong while writing to the tape"},
{STRING_ERROR, "Problem while parsing a string"},
{T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"},
{F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"},
{N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"},
{NUMBER_ERROR, "Problem while parsing a number"},
{UTF8_ERROR, "The input is not valid UTF-8"},
{UNITIALIZED, "Unitialized"},
{EMPTY, "Empty"},
{UNESCAPED_CHARS, "Within strings, some characters must be escapted, we found unescapted characters"},
{UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson"},
};
const std::string& simdjson::errorMsg(const int errorCode) {
const std::string& errorMsg(const int errorCode) {
return errorStrings.at(errorCode);
}
}
}

View File

@ -59,9 +59,9 @@ int main(int argc, char *argv[]) {
exit(1);
}
const char *filename = argv[optind];
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
@ -76,7 +76,7 @@ int main(int argc, char *argv[]) {
std::cout << p.size() << " B ";
std::cout << std::endl;
}
ParsedJson pj;
simdjson::ParsedJson pj;
size_t maxdepth = 1024 * 4;
bool allocok = pj.allocateCapacity(p.size(), maxdepth);
if (!allocok) {

View File

@ -39,7 +39,7 @@ bool skyprophet_test() {
if (maxsize < s.size())
maxsize = s.size();
}
ParsedJson pj;
simdjson::ParsedJson pj;
if (!pj.allocateCapacity(maxsize)) {
printf("allocation failure in skyprophet_test\n");
return false;

View File

@ -65,14 +65,14 @@ bool validate(const char *dirname) {
} else {
strcpy(fullpath + dirlen, name);
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(fullpath).swap(p);
simdjson::get_corpus(fullpath).swap(p);
} catch (const std::exception &e) {
std::cerr << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
if (!allocok) {
std::cerr << "can't allocate memory" << std::endl;

View File

@ -132,15 +132,15 @@ bool validate(const char *dirname) {
} else {
strcpy(fullpath + dirlen, name);
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(fullpath).swap(p);
simdjson::get_corpus(fullpath).swap(p);
} catch (const std::exception &e) {
std::cout << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
// terrible hack but just to get it working
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
if (!allocok) {
std::cerr << "can't allocate memory" << std::endl;

View File

@ -1,6 +1,8 @@
#include "../singleheader/simdjson.h"
#include <iostream>
using namespace simdjson;
int main() {
const char *filename = JSON_TEST_PATH;
padded_string p = get_corpus(filename);
@ -13,7 +15,7 @@ int main() {
}
const int res = json_parse(p, pj);
if (res) {
std::cerr << simdjson::errorMsg(res) << std::endl;
std::cerr << errorMsg(res) << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;

View File

@ -325,14 +325,14 @@ bool validate(const char *dirname) {
} else {
strcpy(fullpath + dirlen, name);
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(fullpath).swap(p);
simdjson::get_corpus(fullpath).swap(p);
} catch (const std::exception &e) {
std::cout << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
if (!allocok) {
std::cerr << "can't allocate memory" << std::endl;

View File

@ -5,7 +5,7 @@
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
void compute_dump(ParsedJson::iterator &pjh) {
void compute_dump(simdjson::ParsedJson::iterator &pjh) {
if (pjh.is_object()) {
std::cout << "{";
if (pjh.down()) {
@ -72,26 +72,26 @@ int main(int argc, char *argv[]) {
if (optind + 1 < argc) {
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
ParsedJson pj;
simdjson::ParsedJson pj;
bool allocok = pj.allocateCapacity(p.size(), 1024);
if (!allocok) {
std::cerr << "failed to allocate memory" << std::endl;
return EXIT_FAILURE;
}
int res = json_parse(p, pj); // do the parsing, return false on error
int res = simdjson::json_parse(p, pj); // do the parsing, return false on error
if (res) {
std::cerr << " Parsing failed. " << std::endl;
return EXIT_FAILURE;
}
if (apidump) {
ParsedJson::iterator pjh(pj);
simdjson::ParsedJson::iterator pjh(pj);
if (!pjh.isOk()) {
std::cerr << " Could not iterate parsed result. " << std::endl;
return EXIT_FAILURE;

View File

@ -41,9 +41,9 @@ using stat_t = struct stat_s;
stat_t simdjson_computestats(const padded_string &p) {
stat_t simdjson_computestats(const simdjson::padded_string &p) {
stat_t answer;
ParsedJson pj = build_parsed_json(p);
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
answer.valid = pj.isValid();
if (!answer.valid) {
return answer;
@ -125,9 +125,9 @@ int main(int argc, char *argv[]) {
if (myoptind + 1 < argc) {
std::cerr << "warning: ignoring everything after " << argv[myoptind + 1] << std::endl;
}
padded_string p;
simdjson::padded_string p;
try {
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception &e) { // caught by reference to base
std::cerr << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;

View File

@ -8,14 +8,14 @@ int main(int argc, char *argv[]) {
std::cerr << "Usage: " << argv[0] << " <jsonfile>\n";
exit(1);
}
padded_string p;
simdjson::padded_string p;
std::string filename = argv[argc - 1];
try{
get_corpus(filename).swap(p);
simdjson::get_corpus(filename).swap(p);
} catch (const std::exception& e) {
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
jsonminify(p, p.data());
simdjson::jsonminify(p, p.data());
printf("%s",p.data());
}