still not done

still not done.
This commit is contained in:
Daniel Lemire 2018-12-29 15:01:11 -05:00
parent 737b515110
commit 4a8e229566
14 changed files with 1185 additions and 45 deletions

View File

@ -1,13 +1,17 @@
#include <assert.h>
#include <ctype.h>
#ifndef _MSC_VER
#include <unistd.h>
#include <x86intrin.h>
#include <dirent.h>
#else
#include <intrin.h>
#endif
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <x86intrin.h>
#include <algorithm>
#include <chrono>
@ -41,7 +45,7 @@ int main(int argc, char *argv[]) {
bool jsonoutput = false;
bool forceoneiteration = false;
bool justdata = false;
#ifndef _MSC_VER
int c;
while ((c = getopt(argc, argv, "1vdt")) != -1)
@ -64,6 +68,9 @@ int main(int argc, char *argv[]) {
default:
abort();
}
#else
int optind = 1;
#endif
if (optind >= argc) {
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1);

View File

@ -1,6 +1,7 @@
#include <iostream>
#ifndef _MSC_VER
#include <unistd.h>
#endif
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
#ifdef __linux__
@ -113,13 +114,16 @@ stat_t simdjson_computestats(const std::string_view &p) {
int main(int argc, char *argv[]) {
int c;
#ifndef _MSC_VER
while ((c = getopt(argc, argv, "")) != -1)
switch (c) {
default:
abort();
}
#else
int optind = 1;
#endif
if (optind >= argc) {
cerr << "Reads json, prints stats. " << endl;
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;

View File

@ -3,7 +3,7 @@
#include <cassert>
// the input buf should be readable up to buf + SIMDJSON_PADDING
#define SIMDJSON_PADDING sizeof(__m256i)
#define SIMDJSON_PADDING sizeof(__m256i)
@ -25,17 +25,17 @@
#ifdef _MSC_VER
#define really_inline inline
#define never_inline inline
#define really_inline inline
#define never_inline inline
#define UNUSED
#define WARN_UNUSED
#define UNUSED
#define WARN_UNUSED
#ifndef likely
#define likely(x)
#define likely(x) x
#endif
#ifndef unlikely
#define unlikely(x)
#define unlikely(x) x
#endif
#else
@ -53,4 +53,4 @@
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,31 @@
#pragma once
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
# pragma intrinsic(_umul128)
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
uint64_t high;
*result = _umul128(value1, value2, &high);
return high;
}
#else
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddl_overflow(value1, value2, result);
}
static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_umulll_overflow(value1, value2, result);
}
#endif // _MSC_VER
#include "simdjson/common_defs.h"
#include "simdjson/jsoncharutils.h"
#include "simdjson/parsedjson.h"
@ -105,10 +131,11 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
#ifdef SWAR_NUMBER_PARSING
#ifdef _MSC_VER
// check quickly whether the next 8 chars are made of digits
// at a glance, it looks better than Mula's
// http://0x80.pl/articles/swar-digits-validate.html
/*static inline bool is_made_of_eight_digits_fast(const char *chars) {
static inline bool is_made_of_eight_digits_fast(const char *chars) {
uint64_t val;
memcpy(&val, chars, 8);
// a branchy method might be faster:
@ -118,8 +145,8 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
return (((val & 0xF0F0F0F0F0F0F0F0) |
(((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
0x3333333333333333);
}*/
}
#else
// this is more efficient apparently than the scalar code above (fewer instructions)
static inline bool is_made_of_eight_digits_fast(const char *chars) {
__m64 val;
@ -128,6 +155,7 @@ static inline bool is_made_of_eight_digits_fast(const char *chars) {
__m64 basecmp = _mm_subs_pu8(base,_mm_set1_pi8(9));
return _mm_cvtm64_si64(basecmp) == 0;
}
#endif
static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
// this actually computes *16* values so we are being wasteful.
@ -284,13 +312,13 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
// we rarely see large integer parts like 123456789
while (is_integer(*p)) {
digit = *p - '0';
if (__builtin_umulll_overflow(i, 10, (unsigned long long *)&i)) {
if (mul_overflow(i, 10, &i)) {
#ifdef JSON_TEST_NUMBERS // for unit testing
foundInvalidNumber(buf + offset);
#endif
return false; // overflow
}
if (__builtin_uaddll_overflow(i, digit, (unsigned long long *)&i)) {
if (add_overflow(i, digit, &i)) {
#ifdef JSON_TEST_NUMBERS // for unit testing
foundInvalidNumber(buf + offset);
#endif

View File

@ -13,7 +13,7 @@
#include <x86intrin.h>
#endif
#include <cstdint>
#ifdef __SSE3__
#ifdef __AVX__
static const unsigned char mask128_epi8[] = {
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,

View File

@ -9,7 +9,7 @@ char * allocate_padded_buffer(size_t length) {
char *padded_buffer;
size_t totalpaddedlength = length + SIMDJSON_PADDING;
#ifdef _MSC_VER
padded_buffer = (uint8_t*) _aligned_malloc(totalpaddedlength, 64);
padded_buffer = (char*) _aligned_malloc(totalpaddedlength, 64);
#elif defined(__MINGW32__) || defined(__MINGW64__)
padded_buffer = __mingw_aligned_malloc(totalpaddedlength, 64);
#else

View File

@ -1,7 +1,7 @@
#include <cstdint>
#ifndef __AVX2__
#include <cstdint>
static uint8_t jump_table[256 * 3] = {
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
@ -62,9 +62,14 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
#else
#include <immintrin.h>
#include <x86intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddl_overflow(value1, value2, (unsigned long long *)result);
}
#endif // _MSC_VER
#include "simdjson/simdprune_tables.h"
@ -125,7 +130,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash = _addcarry_u64(
bool iter_ends_odd_backslash = add_overflow(
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
@ -209,7 +214,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
//bool iter_ends_odd_backslash =
__builtin_uaddll_overflow( bs_bits, odd_starts, (unsigned long long *)&odd_carries);
add_overflow( bs_bits, odd_starts, &odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
//prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; // we never use it
uint64_t even_carry_ends = even_carries & ~bs_bits;

View File

@ -1,6 +1,10 @@
#include "simdjson/jsonparser.h"
#ifdef _MSC_VER
#include <windows.h>
#include <sysinfoapi.h>
#else
#include <unistd.h>
#endif
// parse a document found in buf, need to preallocate ParsedJson.
WARN_UNUSED
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
@ -12,8 +16,14 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
bool reallocated = false;
if(reallocifneeded) {
// realloc is needed if the end of the memory crosses a page
long pagesize = sysconf (_SC_PAGESIZE); // on windows this should be SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); sysInfo.dwPageSize
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
#ifdef _MSC_VER
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
long pagesize = sysInfo.dwPageSize;
#else
long pagesize = sysconf (_SC_PAGESIZE);
#endif
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
const uint8_t *tmpbuf = buf;
buf = (uint8_t *) allocate_padded_buffer(len);
if(buf == NULL) return false;

View File

@ -1,9 +1,16 @@
#include <cstdint>
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return _addcarry_u64(0, value1, value2, reinterpret_cast<unsigned __int64 *>(result));
}
#else
#include <x86intrin.h>
#endif
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
return __builtin_uaddl_overflow(value1, value2, result);
}
#endif // _MSC_VER
#include <cassert>
@ -106,7 +113,7 @@ WARN_UNUSED
// indicates whether the sense of any edge going to the next iteration
// should be flipped
bool iter_ends_odd_backslash =
addcarry_u64(bs_bits, odd_starts, (unsigned long long *) &odd_carries);
add_overflow(bs_bits, odd_starts, (unsigned long long *) &odd_carries);
odd_carries |=
prev_iter_ends_odd_backslash; // push in bit zero as a potential end
@ -257,7 +264,8 @@ WARN_UNUSED
// indicates whether the sense of any edge going to the next iteration
// should be flipped
//bool iter_ends_odd_backslash =
__builtin_uaddll_overflow(bs_bits, odd_starts, (unsigned long long *) &odd_carries);
// __builtin_uaddll_overflow(bs_bits, odd_starts, (unsigned long long *) &odd_carries);
add_overflow(bs_bits, odd_starts, &odd_carries);
odd_carries |=
prev_iter_ends_odd_backslash; // push in bit zero as a potential end

View File

@ -1,7 +1,37 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#ifndef __clang__ // if one compiles with MSVC *with* clang, then these
// intrinsics are defined!!!
static inline int __builtin_ctzll(unsigned long long input_num) {
unsigned long index;
#ifdef _WIN64 // highly recommended!!!
_BitScanForward64(&index, input_num);
#else // if we must support 32-bit Windows
if ((uint32_t)input_num != 0) {
_BitScanForward(&index, (uint32_t)input_num);
}
else {
_BitScanForward(&index, (uint32_t)(input_num >> 32));
index += 32;
}
#endif
return index;
}
static inline int __builtin_popcountll(unsigned long long input_num) {
#ifdef _WIN64 // highly recommended!!!
return (int)__popcnt64(input_num);
#else // if we must support 32-bit Windows
return (int)(__popcnt((uint32_t)input_num) +
__popcnt((uint32_t)(input_num >> 32)));
#endif
}
#endif// not clang
#else
// we have a normal compiler
#include <x86intrin.h>
#endif
@ -20,7 +50,7 @@
#endif
#define SET_BIT(i) \
base_ptr[base + i] = (uint32_t)idx + __tzcnt_u64(s); \
base_ptr[base + i] = (uint32_t)idx + __builtin_ctzll(s); \
s = s & (s - 1);
#define SET_BIT1 SET_BIT(0)
@ -78,7 +108,7 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
s &= s - 1ULL;
}
#elif defined(NO_PDEP_PLEASE)
uint32_t cnt = __builtin_popcountll(s);
uint32_t cnt = _mm_popcnt_u64(s);
uint32_t next_base = base + cnt;
while (s) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)

View File

@ -1,11 +1,16 @@
#include <assert.h>
#include <cstring>
#ifndef _MSC_VER
#include <dirent.h>
#include <unistd.h>
#else
// Microsoft can't be bothered to provide standard utils.
#include <simdjson/dirent_portable.h>
#endif
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "simdjson/jsonparser.h"

View File

@ -1,6 +1,7 @@
#include <iostream>
#ifndef _MSC_VER
#include <unistd.h>
#endif
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
@ -41,9 +42,11 @@ void compute_dump(ParsedJson::iterator &pjh) {
}
int main(int argc, char *argv[]) {
bool rawdump = false;
bool apidump = false;
#ifndef _MSC_VER
int c;
bool rawdump = false;
bool apidump = false;
while ((c = getopt(argc, argv, "da")) != -1)
switch (c) {
@ -56,6 +59,9 @@ int main(int argc, char *argv[]) {
default:
abort();
}
#else
int optind = 1;
#endif
if (optind >= argc) {
cerr << "Reads json in, out the result of the parsing. " << endl;
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;

View File

@ -1,5 +1,4 @@
#include <iostream>
#include <unistd.h>
#include "simdjson/jsonioutil.h"
#include "simdjson/jsonparser.h"
@ -118,14 +117,7 @@ stat_t simdjson_computestats(const std::string_view &p) {
int main(int argc, char *argv[]) {
int c;
while ((c = getopt(argc, argv, "")) != -1)
switch (c) {
default:
abort();
}
int optind = 1;
if (optind >= argc) {
cerr << "Reads json, prints stats. " << endl;
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;