Major surgery.

This commit is contained in:
Daniel Lemire 2018-08-20 17:27:25 -04:00
parent 726eb5a030
commit fb65be64bb
26 changed files with 37172 additions and 1273 deletions

View File

@ -6,32 +6,48 @@
.PHONY: clean cleandist
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Iinclude/linux -Idependencies/double-conversion -Ldependencies/double-conversion/release
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux -Idependencies/double-conversion -Idependencies/rapidjson/include -Ldependencies/double-conversion/release
LIBFLAGS = -ldouble-conversion
EXECUTABLES=parse jsoncheck
HEADERS=include/common_defs.h include/jsonioutil.h include/linux/linux-perf-events.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h
LIBFILES=src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp
EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition
HEADERS=include/jsonparser.h include/common_defs.h include/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdprune_tables.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h include/jsonminifier.h
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp src/jsonminifier.cpp
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
LIBDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
RAPIDJSON_INCLUDE:=dependencies/rapidjson/include
LIBS=$(LIDDOUBLE)
LIBS=$(RAPIDJSON_INCLUDE) $(LIBDOUBLE)
all: $(LIBS) $(EXECUTABLES)
test: jsoncheck
./jsoncheck
$(LIDDOUBLE) : dependencies/double-conversion/README.md
$(RAPIDJSON_INCLUDE):
git submodule update --init --recursive
$(LIBDOUBLE) : dependencies/double-conversion/README.md
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS)
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp -I. $(LIBFLAGS)
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM

View File

@ -0,0 +1,88 @@
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
#pragma once
#ifdef __linux__
#include <asm/unistd.h> // for __NR_perf_event_open
#include <linux/perf_event.h> // for perf event constants
#include <sys/ioctl.h> // for ioctl
#include <unistd.h> // for syscall
#include <cerrno> // for errno
#include <cstring> // for memset
#include <stdexcept>
#include <vector>
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
int fd;
perf_event_attr attribs;
int num_events;
std::vector<uint64_t> temp_result_vec;
std::vector<uint64_t> ids;
public:
LinuxEvents(std::vector<int> config_vec) : fd(0) {
memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE;
attribs.size = sizeof(attribs);
attribs.disabled = 1;
attribs.exclude_kernel = 1;
attribs.exclude_hv = 1;
attribs.sample_period = 0;
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
const int pid = 0; // the current process
const int cpu = -1; // all CPUs
const unsigned long flags = 0;
int group = -1; // no group
num_events = config_vec.size();
u32 i = 0;
for (auto config : config_vec) {
attribs.config = config;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) {
report_error("perf_event_open");
}
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
if (group == -1) {
group = fd;
}
}
temp_result_vec.resize(num_events * 2 + 1);
}
~LinuxEvents() { close(fd); }
really_inline void start() {
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_RESET)");
}
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
really_inline void end(std::vector<unsigned long long> &results) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
}
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
report_error("read");
}
// our actual results are in slots 1,3,5, ... of this structure
// we really should be checking our ids obtained earlier to be safe
for (u32 i = 1; i < temp_result_vec.size(); i += 2) {
results[i / 2] = temp_result_vec[i];
}
}
private:
void report_error(const std::string &context) {
throw std::runtime_error(context + ": " + std::string(strerror(errno)));
}
};
#endif

View File

@ -1,27 +1,26 @@
#include "common_defs.h"
#include "double-conversion/double-conversion.h"
#include "linux-perf-events.h"
#include <algorithm>
#include <assert.h>
#include <chrono>
#include <cstring>
#include <dirent.h>
#include <fstream>
#include <inttypes.h>
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <iostream>
#include <iomanip>
#include <chrono>
#include <fstream>
#include <sstream>
#include <string>
#include <cstring>
#include <unistd.h>
#include <vector>
#include <set>
#include <map>
#include <algorithm>
#include <x86intrin.h>
#include <assert.h>
#include "double-conversion/double-conversion.h"
#include "common_defs.h"
#include "linux-perf-events.h"
/// Fixme: enable doube conv
// #define DOUBLECONV
@ -34,45 +33,58 @@ using namespace double_conversion;
//#define DEBUG
#include "jsonioutil.h"
#include "simdjson_internal.h"
#include "stage1_find_marks.h"
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
#include "jsonioutil.h"
using namespace std;
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
namespace Color {
enum Code {
FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32,
FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36,
FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91,
FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94,
FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97,
BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49
};
class Modifier {
enum Code {
FG_DEFAULT = 39,
FG_BLACK = 30,
FG_RED = 31,
FG_GREEN = 32,
FG_YELLOW = 33,
FG_BLUE = 34,
FG_MAGENTA = 35,
FG_CYAN = 36,
FG_LIGHT_GRAY = 37,
FG_DARK_GRAY = 90,
FG_LIGHT_RED = 91,
FG_LIGHT_GREEN = 92,
FG_LIGHT_YELLOW = 93,
FG_LIGHT_BLUE = 94,
FG_LIGHT_MAGENTA = 95,
FG_LIGHT_CYAN = 96,
FG_WHITE = 97,
BG_RED = 41,
BG_GREEN = 42,
BG_BLUE = 44,
BG_DEFAULT = 49
};
class Modifier {
Code code;
public:
public:
Modifier(Code pCode) : code(pCode) {}
friend std::ostream&
operator<<(std::ostream& os, const Modifier& mod) {
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
return os << "\033[" << mod.code << "m";
}
};
}
};
} // namespace Color
void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
Color::Modifier greenfg(Color::FG_GREEN);
Color::Modifier yellowfg(Color::FG_YELLOW);
Color::Modifier deffg(Color::FG_DEFAULT);
size_t i = 0;
// skip initial fluff
while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){
while ((i + 1 < pj.n_structural_indexes) &&
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
i++;
}
for (; i < pj.n_structural_indexes; i++) {
@ -85,9 +97,9 @@ void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
} else {
std::cout << yellowfg << buf[idx] << deffg;
}
if(i + 1 < pj.n_structural_indexes) {
if (i + 1 < pj.n_structural_indexes) {
u32 nextidx = pj.structural_indexes[i + 1];
for(u32 pos = idx + 1 ; pos < nextidx; pos++) {
for (u32 pos = idx + 1; pos < nextidx; pos++) {
std::cout << buf[pos];
}
}
@ -95,20 +107,17 @@ void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
std::cout << std::endl;
}
int main(int argc, char * argv[]) {
int main(int argc, char *argv[]) {
if (argc != 2) {
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1);
}
pair<u8 *, size_t> p = get_corpus(argv[1]);
ParsedJson * pj_ptr = new ParsedJson;
ParsedJson & pj(*pj_ptr);
ParsedJson *pj_ptr = new ParsedJson;
ParsedJson &pj(*pj_ptr);
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
if (posix_memalign((void **)&pj.structurals, 8,
ROUNDUP_N(p.second, 64) / 8)) {
cerr << "Could not allocate memory" << endl;
exit(1);
};
@ -159,70 +168,93 @@ int main(int argc, char * argv[]) {
isok = find_structural_bits(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy1 += results[0]; cl1 += results[1];
if(! isok ) break;
cy1 += results[0];
cl1 += results[1];
if (!isok)
break;
unified.start();
#endif
isok = flatten_indexes(p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy2 += results[0]; cl2 += results[1];
if(! isok ) break;
cy2 += results[0];
cl2 += results[1];
if (!isok)
break;
unified.start();
#endif
isok = ape_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy3 += results[0]; cl3 += results[1];
if(! isok ) break;
cy3 += results[0];
cl3 += results[1];
if (!isok)
break;
unified.start();
#endif
isok = shovel_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy4 += results[0]; cl4 += results[1];
cy4 += results[0];
cl4 += results[1];
#endif
if(! isok ) break;
if (!isok)
break;
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;
res[i] = secs.count();
}
#ifndef SQUASH_COUNTERS
printf("number of bytes %ld number of structural chars %d ratio %.3f\n", p.second, pj.n_structural_indexes,
(double) pj.n_structural_indexes / p.second);
printf("number of bytes %ld number of structural chars %d ratio %.3f\n",
p.second, pj.n_structural_indexes,
(double)pj.n_structural_indexes / p.second);
unsigned long total = cy1 + cy2 + cy3 + cy4;
printf("stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl1, cy1, 100. * cy1 / total, (double) cl1 / cy1);
printf(" stage 1 runs at %.2f cycles per input byte.\n", (double) cy1 / (iterations * p.second));
printf(
"stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl1, cy1, 100. * cy1 / total, (double)cl1 / cy1);
printf(" stage 1 runs at %.2f cycles per input byte.\n",
(double)cy1 / (iterations * p.second));
printf("stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl2, cy2, 100. * cy2 / total, (double) cl2 / cy2);
printf(" stage 2 runs at %.2f cycles per input byte and ", (double) cy2 / (iterations * p.second));
printf("%.2f cycles per structural character.\n", (double) cy2 / (iterations * pj.n_structural_indexes));
printf(
"stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl2, cy2, 100. * cy2 / total, (double)cl2 / cy2);
printf(" stage 2 runs at %.2f cycles per input byte and ",
(double)cy2 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy2 / (iterations * pj.n_structural_indexes));
printf("stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl3, cy3, 100. * cy3 / total, (double) cl3 / cy3);
printf(" stage 3 runs at %.2f cycles per input byte and ", (double) cy3 / (iterations * p.second));
printf("%.2f cycles per structural character.\n", (double) cy3 / (iterations * pj.n_structural_indexes));
printf(
"stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl3, cy3, 100. * cy3 / total, (double)cl3 / cy3);
printf(" stage 3 runs at %.2f cycles per input byte and ",
(double)cy3 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy3 / (iterations * pj.n_structural_indexes));
printf("stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl4, cy4, 100. * cy4 / total, (double) cl4 / cy4);
printf(" stage 4 runs at %.2f cycles per input byte and ", (double) cy4 / (iterations * p.second));
printf("%.2f cycles per structural character.\n", (double) cy4 / (iterations * pj.n_structural_indexes));
printf(
"stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl4, cy4, 100. * cy4 / total, (double)cl4 / cy4);
printf(" stage 4 runs at %.2f cycles per input byte and ",
(double)cy4 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy4 / (iterations * pj.n_structural_indexes));
printf(" all stages: %.2f cycles per input byte.\n", (double) total / (iterations * p.second));
printf(" all stages: %.2f cycles per input byte.\n",
(double)total / (iterations * p.second));
#endif
// colorfuldisplay(pj, p.first);
// colorfuldisplay(pj, p.first);
double min_result = *min_element(res.begin(), res.end());
cout << "Min: " << min_result << " bytes read: " << p.second << " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0) << "\n";
cout << "Min: " << min_result << " bytes read: " << p.second
<< " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0)
<< "\n";
free(pj.structurals);
free(p.first);
delete[] pj.structural_indexes;
delete pj_ptr;
if(! isok ) {
if (!isok) {
printf(" Parsing failed. \n ");
return EXIT_FAILURE;
}

View File

@ -1,5 +1,7 @@
#pragma once
#include <cassert>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
@ -9,34 +11,36 @@ typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
#include <x86intrin.h>
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#endif
typedef __m128i m128;
typedef __m256i m256;
// Snippets from Hyperscan
// Align to N-byte boundary
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
#define really_inline inline __attribute__ ((always_inline, unused))
#define never_inline inline __attribute__ ((noinline, unused))
#define really_inline inline __attribute__((always_inline, unused))
#define never_inline inline __attribute__((noinline, unused))
#define UNUSED __attribute__ ((unused))
#define UNUSED __attribute__((unused))
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
static inline
u32 ctz64(u64 x) {
static inline u32 ctz64(u64 x) {
assert(x); // behaviour not defined for x == 0
#if defined(_WIN64)
unsigned long r;

View File

@ -7,25 +7,10 @@
#include <sstream>
#include <string>
#include "common_defs.h"
// get a corpus; pad out to cache line so we can always use SIMD
// throws exceptions in case of failure
std::pair<u8 *, size_t> get_corpus(std::string filename) {
std::ifstream is(filename, std::ios::binary);
if (is) {
std::stringstream buffer;
buffer << is.rdbuf();
size_t length = buffer.str().size();
char *aligned_buffer;
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
throw std::runtime_error("Could not allocate sufficient memory");
};
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
memcpy(aligned_buffer, buffer.str().c_str(), length);
is.close();
return std::make_pair((u8 *)aligned_buffer, length);
}
throw std::runtime_error("could not load corpus");
return std::make_pair((u8 *)0, (size_t)0);
}
std::pair<u8 *, size_t> get_corpus(std::string filename);
#endif

8
include/jsonminifier.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include <cstddef>
#include <cstdint>
// Take input from buf and remove useless whitespace, write it to out; buf and
// out can be the same pointer.
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);

23
include/jsonparser.h Normal file
View File

@ -0,0 +1,23 @@
#pragma once
#include "common_defs.h"
#include "jsonioutil.h"
#include "simdjson_internal.h"
#include "stage1_find_marks.h"
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
// Allocate a ParsedJson structure that can support document
// up to len bytes.
// Return NULL if memory cannot be allocated.
// This structure is meant to be reused from document to document, as needed.
// you can use deallocate_ParsedJson to deallocate the memory.
ParsedJson *allocate_ParsedJson(size_t len);
// deallocate a ParsedJson struct (see allocate_ParsedJson)
void deallocate_ParsedJson(ParsedJson *pj_ptr);
// Parse a document found in buf, need to preallocate ParsedJson.
// Return false in case of a failure.
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);

View File

@ -1,93 +0,0 @@
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
#pragma once
#ifdef __linux__
#include <unistd.h> // for syscall
#include <sys/ioctl.h> // for ioctl
#include <asm/unistd.h> // for __NR_perf_event_open
#include <linux/perf_event.h> // for perf event constants
#include <cerrno> // for errno
#include <cstring> // for memset
#include <stdexcept>
#include <vector>
template <int TYPE = PERF_TYPE_HARDWARE>
class LinuxEvents {
int fd;
perf_event_attr attribs;
int num_events;
std::vector<uint64_t> temp_result_vec;
std::vector<uint64_t> ids;
public:
LinuxEvents(std::vector<int> config_vec) : fd(0) {
memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE;
attribs.size = sizeof(attribs);
attribs.disabled = 1;
attribs.exclude_kernel = 1;
attribs.exclude_hv = 1;
attribs.sample_period = 0;
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
const int pid = 0; // the current process
const int cpu = -1; // all CPUs
const unsigned long flags = 0;
int group = -1; // no group
num_events = config_vec.size();
u32 i = 0;
for (auto config: config_vec) {
attribs.config = config;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) {
report_error("perf_event_open");
}
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
if (group == -1) {
group = fd;
}
}
temp_result_vec.resize(num_events*2 + 1);
}
~LinuxEvents() {
close(fd);
}
really_inline void start() {
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_RESET)");
}
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
really_inline void end(std::vector<unsigned long long> & results) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
}
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
report_error("read");
}
// our actual results are in slots 1,3,5, ... of this structure
// we really should be checking our ids obtained earlier to be safe
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
results[i/2] = temp_result_vec[i];
}
}
private:
void report_error(const std::string& context) {
throw std::runtime_error(context + ": " + std::string(strerror(errno)));
}
};
#endif

View File

@ -1,32 +1,39 @@
#pragma once
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#endif
#include <iostream>
const u32 MAX_DEPTH = 256;
const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this with a modulo in our
// hot stage 3 loop
const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this
// with a modulo in our hot stage 3 loop
const u32 START_DEPTH = DEPTH_SAFETY_MARGIN;
const u32 REDLINE_DEPTH = MAX_DEPTH - DEPTH_SAFETY_MARGIN;
const size_t MAX_TAPE_ENTRIES = 127*1024;
const size_t MAX_TAPE_ENTRIES = 127 * 1024;
const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
struct ParsedJson {
u8 * structurals;
size_t bytecapacity; // indicates how many bits are meant to be supported by
// structurals
u8 *structurals;
u32 n_structural_indexes;
u32 * structural_indexes;
u32 *structural_indexes;
// grossly overprovisioned
u64 tape[MAX_TAPE];
u32 tape_locs[MAX_DEPTH];
u8 string_buf[512*1024];
u8 * current_string_buf_loc;
u8 number_buf[512*1024]; // holds either doubles or longs, really
u8 * current_number_buf_loc;
u8 string_buf[512 * 1024];
u8 *current_string_buf_loc;
u8 number_buf[512 * 1024]; // holds either doubles or longs, really
u8 *current_number_buf_loc;
};
// all of this stuff needs to get moved somewhere reasonable
// like our ParsedJson structure
/*
@ -38,14 +45,13 @@ extern u8 number_buf[512*1024]; // holds either doubles or longs, really
extern u8 * current_number_buf_loc;
*/
#ifdef DEBUG
inline void dump256(m256 d, string msg) {
for (u32 i = 0; i < 32; i++) {
std::cout << setw(3) << (int)*(((u8 *)(&d)) + i);
if (!((i+1)%8))
if (!((i + 1) % 8))
std::cout << "|";
else if (!((i+1)%4))
else if (!((i + 1) % 4))
std::cout << ":";
else
std::cout << " ";
@ -56,19 +62,19 @@ inline void dump256(m256 d, string msg) {
// dump bits low to high
void dumpbits(u64 v, string msg) {
for (u32 i = 0; i < 64; i++) {
std::cout << (((v>>(u64)i) & 0x1ULL) ? "1" : "_");
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg << "\n";
}
void dumpbits32(u32 v, string msg) {
for (u32 i = 0; i < 32; i++) {
std::cout << (((v>>(u32)i) & 0x1ULL) ? "1" : "_");
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg << "\n";
}
#else
#define dump256(a,b) ;
#define dumpbits(a,b) ;
#define dumpbits32(a,b) ;
#define dump256(a, b) ;
#define dumpbits(a, b) ;
#define dumpbits32(a, b) ;
#endif

35179
include/simdprune_tables.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,4 +3,4 @@
#include "common_defs.h"
#include "simdjson_internal.h"
bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj);
bool find_structural_bits(const u8 *buf, size_t len, ParsedJson &pj);

View File

@ -3,5 +3,4 @@
#include "common_defs.h"
#include "simdjson_internal.h"
bool flatten_indexes(size_t len, ParsedJson & pj);
bool flatten_indexes(size_t len, ParsedJson &pj);

View File

@ -4,4 +4,4 @@
#include "simdjson_internal.h"
void init_state_machine();
bool ape_machine(const u8 * buf, size_t len, ParsedJson & pj);
bool ape_machine(const u8 *buf, size_t len, ParsedJson &pj);

View File

@ -3,5 +3,4 @@
#include "common_defs.h"
#include "simdjson_internal.h"
bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj);
bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj);

182
include/transitions.h Normal file
View File

@ -0,0 +1,182 @@
// automatically generated by generatetransitions.cpp
u32 trans[MAX_STATES][256] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0,
0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 11, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 11, 0, 0, 0, 0, 0, 0,
0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0,
0, 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 11, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0,
0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0,
0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};

View File

@ -0,0 +1,10 @@
all:../../include/transitions.h
../../include/transitions.h: generatetransitions
./generatetransitions > ../../include/transitions.h
generatetransitions: generatetransitions.cpp
$(CXX) -o generatetransitions generatetransitions.cpp -I../../include
clean:
rm -f generatetransitions

View File

@ -0,0 +1,20 @@
#include "../src/stage3_ape_machine.cpp"
int main() {
init_state_machine();
std::cout << "// automatically generated by generatetransitions.cpp"
<< std::endl;
std::cout << " u32 trans[MAX_STATES][256] = {" << std::endl;
for (int k = 0; k < MAX_STATES; k++) {
std::cout << "{";
for (int z = 0; z < 255; z++) {
std::cout << trans[k][z] << ",";
}
std::cout << trans[k][255];
std::cout << "}";
if (k + 1 < MAX_STATES)
std::cout << ",";
std::cout << std::endl;
}
std::cout << "};" << std::endl;
}

20
src/jsonioutil.cpp Normal file
View File

@ -0,0 +1,20 @@
#include "jsonioutil.h"
std::pair<u8 *, size_t> get_corpus(std::string filename) {
std::ifstream is(filename, std::ios::binary);
if (is) {
std::stringstream buffer;
buffer << is.rdbuf();
size_t length = buffer.str().size();
char *aligned_buffer;
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
throw std::runtime_error("Could not allocate sufficient memory");
};
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
memcpy(aligned_buffer, buffer.str().c_str(), length);
is.close();
return std::make_pair((u8 *)aligned_buffer, length);
}
throw std::runtime_error("could not load corpus");
return std::make_pair((u8 *)0, (size_t)0);
}

275
src/jsonminifier.cpp Normal file
View File

@ -0,0 +1,275 @@
#ifndef __AVX2__
#include <cstdint>
static uint8_t jump_table[256 * 3] = {
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
};
size_t jsonminify(const unsigned char *bytes, size_t howmany,
unsigned char *out) {
size_t i = 0, pos = 0;
uint8_t quote = 0;
uint8_t nonescape = 1;
while (i < howmany) {
unsigned char c = bytes[i];
uint8_t *meta = jump_table + 3 * c;
quote = quote ^ (meta[0] & nonescape);
out[pos] = c;
pos += meta[2] | quote;
i += 1;
nonescape = (~nonescape) | (meta[1]);
}
return pos;
}
#else
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#endif // _MSC_VER
#include "simdprune_tables.h"
#include <cstring>
#ifndef __clang__
static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi,
__m128i const *__addr_lo) {
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
}
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
__m256i __a) {
__m128i __v128;
__v128 = _mm256_castsi256_si128(__a);
_mm_storeu_si128(__addr_lo, __v128);
__v128 = _mm256_extractf128_si256(__a, 1);
_mm_storeu_si128(__addr_hi, __v128);
}
#endif
// a straightforward comparison of a mask against input.
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
}
// take input from buf and remove useless whitespace, input and output can be
// the same
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
// Useful constant masks
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint8_t *initout(out);
uint64_t prev_iter_ends_odd_backslash =
0ULL; // either 0 or 1, but a 64-bit value
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
size_t idx = 0;
if (len >= 64) {
size_t avxlen = len - 63;
for (; idx < avxlen; idx += 64) {
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
const __m256i low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
const __m256i high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
1, 0, 0, 0, 3, 2, 1, 0, 0);
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
__m256i v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
_mm256_set1_epi8(0x7f))));
__m256i v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
_mm256_set1_epi8(0x7f))));
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
result2);
out += pop4;
}
}
// we finish off the job... copying and pasting the code is not ideal here,
// but it gets the job done.
if (idx < len) {
uint8_t buffer[64];
memset(buffer, 0, 64);
memcpy(buffer, buf + idx, len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
uint64_t bs_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
__m256i mask_70 =
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
// but moves any value >= 16 above 128
__m256i lut_cntrl = _mm256_setr_epi8(
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
__m256i tmp_ws_lo = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_lo),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
__m256i tmp_ws_hi = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_hi),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
if (len - idx < 64) {
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
}
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
result1);
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
result2);
memcpy(out, buffer, pop4);
out += pop4;
}
return out - initout;
}
#endif

65
src/jsonparser.cpp Normal file
View File

@ -0,0 +1,65 @@
#include "jsonparser.h"
// allocate a ParsedJson structure that can support document
// up to len bytes.
// returns NULL if memory cannot be allocated
// This structure is meant to be reused from document to document, as needed.
// you can use deallocate_ParsedJson to deallocate the memory.
ParsedJson *allocate_ParsedJson(size_t len) {
if (len > 0xffffff) {
std::cerr << "Currently only support JSON files < 16MB, requested length: "
<< len << std::endl;
return NULL;
}
ParsedJson *pj_ptr = new ParsedJson;
if (pj_ptr == NULL) {
std::cerr << "Could not allocate memory for core struct." << std::endl;
return NULL;
}
ParsedJson &pj(*pj_ptr);
pj.bytecapacity = len;
if (posix_memalign((void **)&pj.structurals, 8, ROUNDUP_N(len, 64) / 8)) {
std::cerr << "Could not allocate memory for structurals" << std::endl;
delete pj_ptr;
return NULL;
};
pj.n_structural_indexes = 0;
u32 max_structures = ROUNDUP_N(len, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
if (pj.structural_indexes == NULL) {
std::cerr << "Could not allocate memory for structural_indexes"
<< std::endl;
delete[] pj.structurals;
delete pj_ptr;
return NULL;
}
return pj_ptr;
}
void deallocate_ParsedJson(ParsedJson *pj_ptr) {
if (pj_ptr == NULL)
return;
delete[] pj_ptr->structural_indexes;
delete[] pj_ptr->structurals;
delete pj_ptr;
}
// parse a document found in buf, need to preallocate ParsedJson.
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
if (pj.bytecapacity < len) {
std::cerr << "Your ParsedJson cannot support documents that big: " << len
<< std::endl;
return false;
}
bool isok = find_structural_bits(buf, len, pj);
if (isok) {
isok = flatten_indexes(len, pj);
}
if (isok) {
isok = ape_machine(buf, len, pj);
}
if (isok) {
isok = shovel_machine(buf, len, pj);
}
return isok;
}

View File

@ -1,13 +1,22 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#include <assert.h>
#endif
#include <cassert>
#include "common_defs.h"
#include "simdjson_internal.h"
using namespace std;
// a straightforward comparison of a mask against input. 5 uops; would be cheaper in AVX512.
really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) {
// a straightforward comparison of a mask against input. 5 uops; would be
// cheaper in AVX512.
really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi,
m256 mask) {
m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0);
m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
@ -15,7 +24,8 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
return res_0 | (res_1 << 32);
}
/*never_inline*/ bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
/*never_inline*/ bool find_structural_bits(const u8 *buf, size_t len,
ParsedJson &pj) {
if (len > 0xffffff) {
cerr << "Currently only support JSON files < 16MB\n";
return false;
@ -25,19 +35,20 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
const u64 odd_bits = ~even_bits;
// for now, just work in 64-byte chunks
// we have padded the input out to 64 byte multiple with the remainder being zeros
// we have padded the input out to 64 byte multiple with the remainder being
// zeros
// persistent state across loop
u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value
u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones
u64 prev_iter_ends_pseudo_pred = 0ULL;
for (size_t idx = 0; idx < len; idx+=64) {
for (size_t idx = 0; idx < len; idx += 64) {
__builtin_prefetch(buf + idx + 128);
#ifdef DEBUG
cout << "Idx is " << idx << "\n";
for (u32 j = 0; j < 64; j++) {
char c = *(buf+idx+j);
char c = *(buf + idx + j);
if (isprint(c)) {
cout << c;
} else {
@ -53,12 +64,14 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
// Step 1: detect odd sequences of backslashes
////////////////////////////////////////////////////////////////////////////////////////////
u64 bs_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
u64 bs_bits =
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
dumpbits(bs_bits, "backslash bits");
u64 start_edges = bs_bits & ~(bs_bits << 1);
dumpbits(start_edges, "start_edges");
// flip lowest if we have an odd-length run at the end of the prior iteration
// flip lowest if we have an odd-length run at the end of the prior
// iteration
u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
u64 even_starts = start_edges & even_start_mask;
u64 odd_starts = start_edges & ~even_start_mask;
@ -69,13 +82,16 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
u64 even_carries = bs_bits + even_starts;
u64 odd_carries;
// must record the carry-out of our odd-carries out of bit 63; this indicates whether the
// sense of any edge going to the next iteration should be flipped
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
// must record the carry-out of our odd-carries out of bit 63; this
// indicates whether the sense of any edge going to the next iteration
// should be flipped
bool iter_ends_odd_backslash =
__builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end
// if we had an odd-numbered run at the end of
// the previous iteration
odd_carries |=
prev_iter_ends_odd_backslash; // push in bit zero as a potential end
// if we had an odd-numbered run at the
// end of the previous iteration
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
dumpbits(even_carries, "even_carries");
@ -98,13 +114,14 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
// Step 2: detect insides of quote pairs
////////////////////////////////////////////////////////////////////////////////////////////
u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
u64 quote_bits =
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
dumpbits(quote_bits, "quote_bits");
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0ULL, quote_bits),
_mm_set1_epi8(0xFF), 0));
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (u64)((s64)quote_mask>>63);
prev_iter_inside_quote = (u64)((s64)quote_mask >> 63);
dumpbits(quote_mask, "quote_mask");
// How do we build up a user traversable data structure
@ -117,14 +134,12 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
// these go into the next 2 buckets of the comparison (8/16)
const m256 low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0,
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0
);
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0,
0, 0, 8, 12, 1, 2, 9, 0, 0);
const m256 high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0,
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0
);
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
1, 0, 0, 0, 3, 2, 1, 0, 0);
m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
@ -132,26 +147,29 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
m256 v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4), _mm256_set1_epi8(0x7f))));
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
_mm256_set1_epi8(0x7f))));
m256 v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4), _mm256_set1_epi8(0x7f))));
m256 tmp_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, structural_shufti_mask),
_mm256_set1_epi8(0));
m256 tmp_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, structural_shufti_mask),
_mm256_set1_epi8(0));
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
_mm256_set1_epi8(0x7f))));
m256 tmp_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
m256 tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
// this additional mask and transfer is non-trivially expensive, unfortunately
m256 tmp_ws_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, whitespace_shufti_mask),
_mm256_set1_epi8(0));
m256 tmp_ws_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, whitespace_shufti_mask),
_mm256_set1_epi8(0));
// this additional mask and transfer is non-trivially expensive,
// unfortunately
m256 tmp_ws_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
m256 tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
@ -167,11 +185,13 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
// quickly traverse the strings we've spent all this trouble gathering
structurals |= quote_bits;
// Now, establish "pseudo-structural characters". These are non-whitespace characters
// that are (a) outside quotes and (b) have a predecessor that's either whitespace or a structural
// character. This means that subsequent passes will get a chance to encounter the first character
// of every string of non-whitespace and, if we're parsing an atom like true/false/null or a number
// we can stop at the first whitespace or structural character following it.
// Now, establish "pseudo-structural characters". These are non-whitespace
// characters that are (a) outside quotes and (b) have a predecessor that's
// either whitespace or a structural character. This means that subsequent
// passes will get a chance to encounter the first character of every string
// of non-whitespace and, if we're parsing an atom like true/false/null or a
// number we can stop at the first whitespace or structural character
// following it.
// a qualified predecessor is something that can happen 1 position before an
// psuedo-structural character
@ -180,7 +200,8 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
dumpbits(shifted_pseudo_pred, "shifted_pseudo_pred");
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
u64 pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask);
u64 pseudo_structurals =
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
dumpbits(pseudo_structurals, "pseudo_structurals");
dumpbits(structurals, "final structurals without pseudos");
structurals |= pseudo_structurals;
@ -189,8 +210,10 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
dumpbits(structurals, "final structurals and pseudo structurals after close quote removal");
*(u64 *)(pj.structurals + idx/8) = structurals;
dumpbits(
structurals,
"final structurals and pseudo structurals after close quote removal");
*(u64 *)(pj.structurals + idx / 8) = structurals;
}
return true;
}

View File

@ -1,5 +1,12 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#include <assert.h>
#endif
#include <cassert>
#include "common_defs.h"
#include "simdjson_internal.h"
@ -8,8 +15,9 @@
#define NO_PDEP_WIDTH 8
#endif
#define SET_BIT(i) base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);\
s = s & (s - 1);
#define SET_BIT(i) \
base_ptr[base + i] = (u32)idx + __builtin_ctzll(s); \
s = s & (s - 1);
#define SET_BIT1 SET_BIT(0)
#define SET_BIT2 SET_BIT1 SET_BIT(1)
@ -32,40 +40,43 @@ s = s & (s - 1);
#define SET_BITLOOPN(n) SET_BIT##n
// just transform the bitmask to a big list of 32-bit integers for now
// that's all; the type of character the offset points to will
// tell us exactly what we need to know. Naive but straightforward implementation
bool flatten_indexes(size_t len, ParsedJson & pj) {
u32 * base_ptr = pj.structural_indexes;
// tell us exactly what we need to know. Naive but straightforward
// implementation
bool flatten_indexes(size_t len, ParsedJson &pj) {
u32 *base_ptr = pj.structural_indexes;
u32 base = 0;
#ifdef BUILDHISTOGRAM
uint32_t counters [65];
uint32_t counters[65];
uint32_t total = 0;
for(int k = 0; k < 66; k++) counters[k] = 0;
for (size_t idx = 0; idx < len; idx+=64) {
u64 s = *(u64 *)(pj.structurals + idx/8);
for (int k = 0; k < 66; k++)
counters[k] = 0;
for (size_t idx = 0; idx < len; idx += 64) {
u64 s = *(u64 *)(pj.structurals + idx / 8);
u32 cnt = __builtin_popcountll(s);
total ++;
total++;
counters[cnt]++;
}
printf("\n histogram:\n");
for(int k = 0; k < 66; k++) {
if(counters[k]>0)printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
for (int k = 0; k < 66; k++) {
if (counters[k] > 0)
printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
}
printf("\n\n");
#endif
for (size_t idx = 0; idx < len; idx+=64) {
u64 s = *(u64 *)(pj.structurals + idx/8);
for (size_t idx = 0; idx < len; idx += 64) {
u64 s = *(u64 *)(pj.structurals + idx / 8);
#ifdef SUPPRESS_CHEESY_FLATTEN
while (s) {
base_ptr[base++] = (u32)idx + __builtin_ctzll(s); s &= s - 1ULL;
base_ptr[base++] = (u32)idx + __builtin_ctzll(s);
s &= s - 1ULL;
}
#elif defined(NO_PDEP_PLEASE)
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
CALL(SET_BITLOOPN,NO_PDEP_WIDTH)
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);
s = s & (s - 1);
@ -77,17 +88,24 @@ bool flatten_indexes(size_t len, ParsedJson & pj) {
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
// spoil the suspense by reducing dependency chains; actually a win even with cost of pdep
// spoil the suspense by reducing dependency chains; actually a win even
// with cost of pdep
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
base_ptr[base+0] = (u32)idx + __builtin_ctzll(s); u64 s1 = s & (s - 1ULL);
base_ptr[base+1] = (u32)idx + __builtin_ctzll(s1); u64 s2 = s1 & (s1 - 1ULL);
base_ptr[base+2] = (u32)idx + __builtin_ctzll(s2); //u64 s3 = s2 & (s2 - 1ULL);
base_ptr[base+3] = (u32)idx + __builtin_ctzll(s3); u64 s4 = s3 & (s3 - 1ULL);
base_ptr[base + 0] = (u32)idx + __builtin_ctzll(s);
u64 s1 = s & (s - 1ULL);
base_ptr[base + 1] = (u32)idx + __builtin_ctzll(s1);
u64 s2 = s1 & (s1 - 1ULL);
base_ptr[base + 2] =
(u32)idx + __builtin_ctzll(s2); // u64 s3 = s2 & (s2 - 1ULL);
base_ptr[base + 3] = (u32)idx + __builtin_ctzll(s3);
u64 s4 = s3 & (s3 - 1ULL);
base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL);
base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL);
base_ptr[base + 4] =
(u32)idx + __builtin_ctzll(s4); // u64 s5 = s4 & (s4 - 1ULL);
base_ptr[base + 5] = (u32)idx + __builtin_ctzll(s5);
u64 s6 = s5 & (s5 - 1ULL);
s = s6;
base += 6;
}
@ -95,9 +113,7 @@ bool flatten_indexes(size_t len, ParsedJson & pj) {
#endif
}
pj.n_structural_indexes = base;
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
base_ptr[pj.n_structural_indexes] =
0; // make it safe to dereference one beyond this array
return true;
}

View File

@ -1,5 +1,12 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#include <assert.h>
#endif
#include <cassert>
#include <cstring>
#include "common_defs.h"
@ -8,22 +15,25 @@
// the ape machine consists of two parts:
//
// 1) The "state machine", which is a multiple channel per-level state machine
// It is a conventional DFA except in that it 'changes track' on {}[] characters
// It is a conventional DFA except in that it 'changes track' on {}[]
// characters
//
// 2) The "tape machine": this records offsets of various structures as they go by
// These structures are either u32 offsets of other tapes or u32 offsets into our input
// or structures.
// 2) The "tape machine": this records offsets of various structures as they go
// by
// These structures are either u32 offsets of other tapes or u32 offsets into
// our input or structures.
//
// The state machine doesn't record ouput.
// The tape machine doesn't validate.
//
// The output of the tape machine is meaningful only if the state machine is in non-error states.
// The output of the tape machine is meaningful only if the state machine is in
// non-error states.
// depth adjustment is strictly based on whether we are {[ or }]
// depth adjustment is a pre-increment which, in effect, means that a {[ contained in an object
// is in the level one deeper, while the corresponding }] is at the level
// depth adjustment is a pre-increment which, in effect, means that a {[
// contained in an object is in the level one deeper, while the corresponding }]
// is at the level
// TAPE MACHINE DEFINITIONS
@ -43,37 +53,44 @@ inline size_t get_write_size(u32 control) { return control & 0xff; }
const u32 char_control[256] = {
// nothing interesting from 0x00-0x20
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF,
// " is 0x22, - is 0x2d
CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,C04,CDF,CDF,
CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF,
CDF,
// numbers are 0x30-0x39
C04,C04,C04,C04, C04,C04,C04,C04, C04,C04,CDF,CDF, CDF,CDF,CDF,CDF,
C04, C04, C04, C04, C04, C04, C04, C04, C04, C04, CDF, CDF, CDF, CDF, CDF,
CDF,
// nothing interesting from 0x40-0x49
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF,
// 0x5b/5d are []
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
CDF,
// f is 0x66 n is 0x6e
CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04,
CDF,
// 0x7b/7d are {}, 74 is t
CDF,CDF,CDF,CDF, C04,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
CDF,
// nothing interesting from 0x80-0xff
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF
};
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF};
// all of this stuff needs to get moved somewhere reasonable
// like our ParsedJson structure
@ -88,7 +105,14 @@ u8 * current_number_buf_loc;
// STATE MACHINE DECLARATIONS
const u32 MAX_STATES = 16;
u32 trans[MAX_STATES][256];
/**
* It is annoying to have to call init_state_machine each time.
* Better to precompute the (small) result into a header file.
*/
// u32 trans[MAX_STATES][256];
#include "transitions.h"
u32 states[MAX_DEPTH];
const int START_STATE = 1;
@ -98,7 +122,8 @@ u32 valid_end_states[MAX_STATES] = {
1, // state 2: we've seen an { - if we left this level it's ok
0, // state 3 is abolished, we shouldn't be in it
0, // state 4 means we saw a string inside an object. We can't end like this!
0, // state 4 means we saw a string inside an object. We can't end like
// this!
0, // similarly state 5 means we saw a string followed by a colon.
0, // state 6 is abolished
1, // it's ok to finish on 7
@ -109,8 +134,10 @@ u32 valid_end_states[MAX_STATES] = {
1, // state 11 is ok to finish on, we just saw a unary inside a array
0, // state 12 we've just seen a comma inside an array - can't finish
0, // state 13 is our weird start state. I think we shouldn't end on it as we need to see something
1, // state 14 is ok. Its an error to see something *more* here but not to be in this state
0, // state 13 is our weird start state. I think we shouldn't end on it as
// we need to see something
1, // state 14 is ok. Its an error to see something *more* here but not to
// be in this state
0, // we don't use state 15
};
@ -131,22 +158,22 @@ const int ANYTHING_IS_ERROR_STATE = 14;
void init_state_machine() {
// states 10 and 6 eliminated
trans[ 1]['{'] = 2;
trans[ 2]['"'] = 4;
trans[ 4][':'] = 5;
trans[1][(int)'{'] = 2;
trans[2][(int)'"'] = 4;
trans[4][(int)':'] = 5;
// 5->7 on all values ftn0123456789-"
trans[ 7][','] = 8;
trans[ 8]['"'] = 4;
trans[7][(int)','] = 8;
trans[8][(int)'"'] = 4;
trans[ 1]['['] = 9;
trans[1][(int)'['] = 9;
// 9->11 on all values ftn0123456789-"
trans[11][','] = 12;
trans[11][(int)','] = 12;
// 12->11 on all values ftn0123456789-"
const char * UNARIES = "}]ftn0123456789-\"";
const char *UNARIES = "}]ftn0123456789-\"";
for (u32 i = 0; i < strlen(UNARIES); i++) {
trans[ 5][(u32)UNARIES[i]] = 7;
trans[ 9][(u32)UNARIES[i]] = 11;
trans[5][(u32)UNARIES[i]] = 7;
trans[9][(u32)UNARIES[i]] = 11;
trans[12][(u32)UNARIES[i]] = 11;
#ifdef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
// NOTE: if we permit JSON documents that
@ -161,40 +188,42 @@ void init_state_machine() {
// that contain a single number or string, we must
// make sure we accept the top-level closing braces
// that are delivered to the start depth only
trans[13]['}'] = 14;
trans[13][']'] = 14;
trans[13][(int)'}'] = 14;
trans[13][(int)']'] = 14;
#endif
// back transitions when new things are open
trans[2]['{'] = 2;
trans[7]['{'] = 2;
trans[9]['{'] = 2;
trans[11]['{'] = 2;
trans[2]['['] = 9;
trans[7]['['] = 9;
trans[9]['['] = 9;
trans[11]['['] = 9;
trans[2][(int)'{'] = 2;
trans[7][(int)'{'] = 2;
trans[9][(int)'{'] = 2;
trans[11][(int)'{'] = 2;
trans[2][(int)'['] = 9;
trans[7][(int)'['] = 9;
trans[9][(int)'['] = 9;
trans[11][(int)'['] = 9;
}
bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
bool ape_machine(const u8 *buf, UNUSED size_t len, ParsedJson &pj) {
// NOTE - our depth is used by both the tape machine and the state machine
// Further, in production we will set it to a largish value in a generous buffer as a rogue input
// could consist of many {[ characters or many }] characters. We aren't busily checking errors
// (and in fact, a aggressive sequence of [ characters is actually valid input!) so something that
// blows out maximum depth will need to be periodically checked for, as will something that tries
// to set depth very low. If we set our starting depth, say, to 256, we can tolerate 256 bogus close brace
// characters without aggressively going wrong and writing to bad memory
// Note that any specious depth can have a specious tape associated with and all these specious depths
// can share a region of the tape - it's harmless. Since tape is one-way, any movement in a specious tape
// is an error (so we can detect max_depth violations by making sure that specious tape locations haven't
// Further, in production we will set it to a largish value in a generous
// buffer as a rogue input could consist of many {[ characters or many }]
// characters. We aren't busily checking errors (and in fact, a aggressive
// sequence of [ characters is actually valid input!) so something that blows
// out maximum depth will need to be periodically checked for, as will
// something that tries to set depth very low. If we set our starting depth,
// say, to 256, we can tolerate 256 bogus close brace characters without
// aggressively going wrong and writing to bad memory Note that any specious
// depth can have a specious tape associated with and all these specious
// depths can share a region of the tape - it's harmless. Since tape is
// one-way, any movement in a specious tape is an error (so we can detect
// max_depth violations by making sure that specious tape locations haven't
// moved from their starting values)
u32 depth = START_DEPTH;
for (u32 i = 0; i < MAX_DEPTH; i++) {
pj.tape_locs[i] = i*MAX_TAPE_ENTRIES;
pj.tape_locs[i] = i * MAX_TAPE_ENTRIES;
if (i == START_DEPTH) {
states[i] = START_DEPTH_START_STATE;
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
@ -220,7 +249,7 @@ bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
// haven't been in our dangerous zones above or below our normal
// depths. It ONLY checks to be sure that we don't manage to leave
// these zones and write completely off our tape.
if (!(i%DEPTH_SAFETY_MARGIN)) {
if (!(i % DEPTH_SAFETY_MARGIN)) {
if (depth < START_DEPTH || depth >= REDLINE_DEPTH) {
error_sump |= 1;
break;
@ -231,7 +260,7 @@ bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
u8 c = next_c;
u32 control = next_control;
next_idx = pj.structural_indexes[i+1];
next_idx = pj.structural_indexes[i + 1];
next_c = buf[next_idx];
next_control = char_control[next_c];
@ -242,11 +271,12 @@ bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
depth += depth_adjust;
#ifdef DEBUG
cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust
<< " write_size " << (u32)write_size << " current_depth: " << depth << "\n";
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust << " write_size "
<< (u32)write_size << " current_depth: " << depth << "\n";
#endif
// STATE MACHINE - hoisted here to fill in during the tape machine's latencies
// STATE MACHINE - hoisted here to fill in during the tape machine's
// latencies
#ifdef DEBUG
cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
#endif
@ -260,8 +290,9 @@ bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
}
if (depth != START_DEPTH) {
// We haven't returned to our start depth, so our braces can't possibly match
// Note this doesn't exclude the possibility that we have improperly matched { } or [] pairs
// We haven't returned to our start depth, so our braces can't possibly
// match Note this doesn't exclude the possibility that we have improperly
// matched { } or [] pairs
return false;
}
@ -277,7 +308,7 @@ bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
#define DUMP_TAPES
#ifdef DEBUG
for (u32 i = 0; i < MAX_DEPTH; i++) {
u32 start_loc = i*MAX_TAPE_ENTRIES;
u32 start_loc = i * MAX_TAPE_ENTRIES;
cout << " tape section i " << i;
if (i == START_DEPTH) {
cout << " (START) ";
@ -287,15 +318,14 @@ bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
cout << " (NORMAL) ";
}
cout << " from: " << start_loc
<< " to: " << tape_locs[i] << " "
<< " size: " << (tape_locs[i]-start_loc) << "\n";
cout << " from: " << start_loc << " to: " << tape_locs[i] << " "
<< " size: " << (tape_locs[i] - start_loc) << "\n";
cout << " state: " << states[i] << "\n";
#ifdef DUMP_TAPES
for (u32 j = start_loc; j < tape_locs[i]; j++) {
if (tape[j]) {
cout << "j: " << j << " tape[j] char " << (char)(tape[j]>>56)
<< " tape[j][0..55]: " << (tape[j]&0xffffffffffffffULL ) << "\n";
cout << "j: " << j << " tape[j] char " << (char)(tape[j] >> 56)
<< " tape[j][0..55]: " << (tape[j] & 0xffffffffffffffULL) << "\n";
}
}
#endif
@ -306,5 +336,3 @@ bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
}
return true;
}

View File

@ -1,5 +1,12 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#include <assert.h>
#endif
#include <cassert>
#include <cstring>
#include "common_defs.h"
@ -12,26 +19,21 @@
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
const u32 structural_or_whitespace_negated[256] = {
1,1,1,1, 1,1,1,1, 1,0,0,1, 1,0,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
0,1,1,1, 1,1,1,1, 1,1,1,1, 0,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,0,1, 1,1,1,1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 1,0,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 1,0,1,1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1
};
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
// return non-zero if not a structural or whitespace char
// zero otherwise
@ -43,52 +45,39 @@ really_inline u32 is_not_structural_or_whitespace(u8 c) {
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
// u not handled in this table as it's complex
const u8 escape_map[256] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, //0x0.
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0x22,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x2f,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, //0x4.
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x5c,0,0,0, //0x5.
0,0,0x08,0, 0,0,0x12,0, 0,0,0,0, 0,0,0x0a,0, //0x6.
0,0,0x0d,0, 0x09,0,0,0, 0,0,0,0, 0,0,0,0, //0x7.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5.
0, 0, 0x08, 0, 0, 0, 0x12, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6.
0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const u32 leading_zeros_to_utf_bytes[33] = {
1,
1, 1, 1, 1, 1, 1, 1, // 7 bits for first one
1, 1, 1, 1, 1, 1, 1, 1, // 7 bits for first one
2, 2, 2, 2, // 11 bits for next
3, 3, 3, 3, 3, // 16 bits for next
4, 4, 4, 4, 4, // 21 bits for next
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // error
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // error
const u32 UTF_PDEP_MASK[5] = {0x00, // error
0x7f, 0x1f3f, 0x0f3f3f, 0x073f3f3f};
const u32 UTF_PDEP_MASK[5] = {
0x00, // error
0x7f,
0x1f3f,
0x0f3f3f,
0x073f3f3f
};
const u32 UTF_OR_MASK[5] = {
0x00, // error
0x00,
0xc080,
0xe08080,
0xf0808080
};
const u32 UTF_OR_MASK[5] = {0x00, // error
0x00, 0xc080, 0xe08080, 0xf0808080};
bool is_hex_digit(u8 v) {
if (v >= '0' && v <= '9')
@ -106,15 +95,17 @@ u8 digit_to_val(u8 v) {
return v - 'A' + 10;
}
bool hex_to_u32(const u8 * src, u32 * res) {
bool hex_to_u32(const u8 *src, u32 *res) {
u8 v1 = src[0];
u8 v2 = src[1];
u8 v3 = src[2];
u8 v4 = src[3];
if (!is_hex_digit(v1) || !is_hex_digit(v2) || !is_hex_digit(v3) || !is_hex_digit(v4)) {
if (!is_hex_digit(v1) || !is_hex_digit(v2) || !is_hex_digit(v3) ||
!is_hex_digit(v4)) {
return false;
}
*res = digit_to_val(v1) << 24 | digit_to_val(v2) << 16 | digit_to_val(v3) << 8 | digit_to_val(v4);
*res = digit_to_val(v1) << 24 | digit_to_val(v2) << 16 |
digit_to_val(v3) << 8 | digit_to_val(v4);
return true;
}
@ -124,16 +115,19 @@ bool hex_to_u32(const u8 * src, u32 * res) {
// dest will advance a variable amount (return via pointer)
// return true if the unicode codepoint was valid
// We work in little-endian then swap at write time
really_inline bool handle_unicode_codepoint(const u8 ** src_ptr, u8 ** dst_ptr) {
u32 code_point = 0; // read the hex, potentially reading another \u beyond if it's a // wacky one
really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
u32 code_point = 0; // read the hex, potentially reading another \u beyond if
// it's a // wacky one
if (!hex_to_u32(*src_ptr + 2, &code_point)) {
return false;
}
*src_ptr += 6;
// check for the weirdo double-UTF-16 nonsense for things outside Basic Multilingual Plane.
// check for the weirdo double-UTF-16 nonsense for things outside Basic
// Multilingual Plane.
if (code_point >= 0xd800 && code_point < 0xdc00) {
// TODO: sanity check and clean up; snippeted from RapidJSON and poorly understood at the moment
if (( (*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') {
// TODO: sanity check and clean up; snippeted from RapidJSON and poorly
// understood at the moment
if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') {
return false;
}
u32 code_point_2 = 0;
@ -143,26 +137,31 @@ really_inline bool handle_unicode_codepoint(const u8 ** src_ptr, u8 ** dst_ptr)
if (code_point_2 < 0xdc00 || code_point_2 > 0xdfff) {
return false;
}
code_point = (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
code_point =
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
*src_ptr += 6;
}
// TODO: check to see whether the below code is nonsense (it's really only a sketch at this point)
// TODO: check to see whether the below code is nonsense (it's really only a
// sketch at this point)
u32 lz = __builtin_clz(code_point);
u32 utf_bytes = leading_zeros_to_utf_bytes[lz];
u32 tmp = _pdep_u32(code_point, UTF_PDEP_MASK[utf_bytes]) | UTF_OR_MASK[utf_bytes];
u32 tmp =
_pdep_u32(code_point, UTF_PDEP_MASK[utf_bytes]) | UTF_OR_MASK[utf_bytes];
// swap and move to the other side of the register
tmp = __builtin_bswap32(tmp);
tmp >>= ((4 - utf_bytes) * 8) & 31;// if utf_bytes, this could become a shift by 32, hence the mask with 31
tmp >>= ((4 - utf_bytes) * 8) & 31; // if utf_bytes, this could become a shift
// by 32, hence the mask with 31
// use memcpy to avoid undefined behavior:
std::memcpy(*(u32 **)dst_ptr,&tmp,sizeof(u32)); //**(u32 **)dst_ptr = tmp;
std::memcpy(*(u32 **)dst_ptr, &tmp, sizeof(u32)); //**(u32 **)dst_ptr = tmp;
*dst_ptr += utf_bytes;
return true;
}
really_inline bool parse_string(const u8 * buf, UNUSED size_t len, ParsedJson & pj, u32 tape_loc) {
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
ParsedJson &pj, u32 tape_loc) {
u32 offset = pj.tape[tape_loc] & 0xffffff;
const u8 * src = &buf[offset+1]; // we know that buf at offset is a "
u8 * dst = pj.current_string_buf_loc;
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
u8 *dst = pj.current_string_buf_loc;
#ifdef DEBUG
cout << "Entering parse string with offset " << offset << "\n";
#endif
@ -170,7 +169,7 @@ really_inline bool parse_string(const u8 * buf, UNUSED size_t len, ParsedJson &
while (1) {
#ifdef DEBUG
for (u32 j = 0; j < 32; j++) {
char c = *(src+j);
char c = *(src + j);
if (isprint(c)) {
cout << c;
} else {
@ -180,13 +179,16 @@ really_inline bool parse_string(const u8 * buf, UNUSED size_t len, ParsedJson &
cout << "| ... string handling input\n";
#endif
m256 v = _mm256_loadu_si256((const m256 *)(src));
u32 bs_bits = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
u32 bs_bits =
(u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
dumpbits32(bs_bits, "backslash bits 2");
u32 quote_bits = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
u32 quote_bits =
(u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
dumpbits32(quote_bits, "quote_bits");
u32 quote_dist = __builtin_ctz(quote_bits);
u32 bs_dist = __builtin_ctz(bs_bits);
// store to dest unconditionally - we can overwrite the bits we don't like later
// store to dest unconditionally - we can overwrite the bits we don't like
// later
_mm256_storeu_si256((m256 *)(dst), v);
#ifdef DEBUG
cout << "quote dist: " << quote_dist << " bs dist: " << bs_dist << "\n";
@ -199,10 +201,13 @@ really_inline bool parse_string(const u8 * buf, UNUSED size_t len, ParsedJson &
// we encountered quotes first. Move dst to point to quotes and exit
dst[quote_dist] = 0; // null terminate and get out
pj.current_string_buf_loc = dst + quote_dist + 1;
pj.tape[tape_loc] = ((u32)'"') << 24 | (pj.current_string_buf_loc - pj.string_buf); // assume 2^24 will hold all strings for now
pj.tape[tape_loc] =
((u32)'"') << 24 |
(pj.current_string_buf_loc -
pj.string_buf); // assume 2^24 will hold all strings for now
return true;
} else if (quote_dist > bs_dist) {
u8 escape_char = src[bs_dist+1];
u8 escape_char = src[bs_dist + 1];
#ifdef DEBUG
cout << "Found escape char: " << escape_char << "\n";
#endif
@ -219,65 +224,74 @@ really_inline bool parse_string(const u8 * buf, UNUSED size_t len, ParsedJson &
} else {
// simple 1:1 conversion. Will eat bs_dist+2 characters in input and
// write bs_dist+1 characters to output
// note this may reach beyond the part of the buffer we've actually seen.
// I think this is ok
// note this may reach beyond the part of the buffer we've actually
// seen. I think this is ok
u8 escape_result = escape_map[escape_char];
if (!escape_result)
return false; // bogus escape value is an error
dst[bs_dist] = escape_result;
src += bs_dist+2;
dst += bs_dist+1;
src += bs_dist + 2;
dst += bs_dist + 1;
}
} else {
// they are the same. Since they can't co-occur, it means we encountered neither.
src+=32;
dst+=32;
// they are the same. Since they can't co-occur, it means we encountered
// neither.
src += 32;
dst += 32;
}
return true;
}
// later extensions -
// if \\ we could detect whether it's a substantial run of \ or just eat 2 chars and write 1
// handle anything short of \u or \\\ (as a prefix) with clever PSHUFB stuff and don't leave SIMD
// if \\ we could detect whether it's a substantial run of \ or just eat 2
// chars and write 1 handle anything short of \u or \\\ (as a prefix) with
// clever PSHUFB stuff and don't leave SIMD
return true;
}
#ifdef DOUBLECONV
static StringToDoubleConverter converter(StringToDoubleConverter::ALLOW_TRAILING_JUNK, 2000000.0, Double::NaN(), NULL, NULL);
static StringToDoubleConverter
converter(StringToDoubleConverter::ALLOW_TRAILING_JUNK, 2000000.0,
Double::NaN(), NULL, NULL);
#endif
// put a parsed version of number (either as a double or a signed long) into the number buffer,
// put a 'tag' indicating which type and where it is back onto the tape at that location
// return false if we can't parse the number which means either
// (a) the number isn't valid, or (b) the number is followed by something that isn't whitespace, comma or a close }] character
// which are the only things that should follow a number at this stage
// bools to detect what we found in our initial character already here - we are already
// switching on 0 vs 1-9 vs - so we may as well keep separate paths where that's useful
// put a parsed version of number (either as a double or a signed long) into the
// number buffer, put a 'tag' indicating which type and where it is back onto
// the tape at that location return false if we can't parse the number which
// means either (a) the number isn't valid, or (b) the number is followed by
// something that isn't whitespace, comma or a close }] character which are the
// only things that should follow a number at this stage bools to detect what we
// found in our initial character already here - we are already switching on 0
// vs 1-9 vs - so we may as well keep separate paths where that's useful
// TODO: see if we really need a separate number_buf or whether we should just
// have a generic scratch - would need to align before using for this
really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED ParsedJson & pj, u32 tape_loc, UNUSED bool found_zero, bool found_minus) {
really_inline bool parse_number(const u8 *buf, UNUSED size_t len,
UNUSED ParsedJson &pj, u32 tape_loc,
UNUSED bool found_zero, bool found_minus) {
u32 offset = pj.tape[tape_loc] & 0xffffff;
////////////////
// This is temporary... but it illustrates how one could use Google's double conv.
// This is temporary... but it illustrates how one could use Google's double
// conv.
///
#ifdef DOUBLECONV
int processed_characters_count;
double result_double_conv = converter.StringToDouble((const char*)( buf+offset), 10, &processed_characters_count);
printf("number is %f and used %d chars \n", result_double_conv, processed_characters_count);
double result_double_conv = converter.StringToDouble(
(const char *)(buf + offset), 10, &processed_characters_count);
printf("number is %f and used %d chars \n", result_double_conv,
processed_characters_count);
#endif
////////////////
// end of double conv temporary stuff.
////////////////
////////////////
// end of double conv temporary stuff.
////////////////
if (found_minus) {
offset++;
}
const u8 * src = &buf[offset];
const u8 *src = &buf[offset];
m256 v = _mm256_loadu_si256((const m256 *)(src));
u64 error_sump = 0;
#ifdef DEBUG
for (u32 j = 0; j < 32; j++) {
char c = *(src+j);
char c = *(src + j);
if (isprint(c)) {
cout << c;
} else {
@ -290,7 +304,8 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
// categories to extract
// Digits:
// 0 (0x30) - bucket 0
// 1-9 (never any distinction except if we didn't get the free kick at 0 due to the leading minus) (0x31-0x39) - bucket 1
// 1-9 (never any distinction except if we didn't get the free kick at 0 due
// to the leading minus) (0x31-0x39) - bucket 1
// . (0x2e) - bucket 2
// E or e - no distinction (0x45/0x65) - bucket 3
// + (0x2b) - bucket 4
@ -302,18 +317,17 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
// Another shufti - also a bit hand-hacked. Need to make a better construction
const m256 low_nibble_mask = _mm256_setr_epi8(
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32,208, 4, 0,
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32,208, 4, 0
);
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0, 33, 2, 2, 2, 2,
10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0);
const m256 high_nibble_mask = _mm256_setr_epi8(
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
64, 0, 52, 3, 8,128, 8,128, 0, 0, 0, 0, 0, 0, 0, 0,
64, 0, 52, 3, 8,128, 8,128, 0, 0, 0, 0, 0, 0, 0, 0
);
64, 0, 52, 3, 8, -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 52, 3, 8,
-128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0);
m256 tmp = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, v),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_shuffle_epi8(
high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(v, 4), _mm256_set1_epi8(0x7f))));
m256 enders_mask = _mm256_set1_epi8(0xe0);
@ -327,12 +341,12 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
// a heroically long number string or some garbage
}
// TODO: make a mask that indicates where our digits are
u32 number_mask = ~enders & (enders-1);
u32 number_mask = ~enders & (enders - 1);
dumpbits32(number_mask, "number mask");
m256 n_mask = _mm256_set1_epi8(0x1f);
m256 tmp_n = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask),
_mm256_set1_epi8(0));
m256 tmp_n =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask), _mm256_set1_epi8(0));
u32 number_characters = ~(u32)_mm256_movemask_epi8(tmp_n);
// put something into our error sump if we have something
@ -343,29 +357,29 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
dumpbits32(number_characters, "number characters");
m256 d_mask = _mm256_set1_epi8(0x03);
m256 tmp_d = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask),
_mm256_set1_epi8(0));
m256 tmp_d =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask), _mm256_set1_epi8(0));
u32 digit_characters = ~(u32)_mm256_movemask_epi8(tmp_d);
digit_characters &= number_mask;
dumpbits32(digit_characters, "digit characters");
m256 p_mask = _mm256_set1_epi8(0x04);
m256 tmp_p = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask),
_mm256_set1_epi8(0));
m256 tmp_p =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask), _mm256_set1_epi8(0));
u32 decimal_characters = ~(u32)_mm256_movemask_epi8(tmp_p);
decimal_characters &= number_mask;
dumpbits32(decimal_characters, "decimal characters");
m256 e_mask = _mm256_set1_epi8(0x08);
m256 tmp_e = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask),
_mm256_set1_epi8(0));
m256 tmp_e =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask), _mm256_set1_epi8(0));
u32 exponent_characters = ~(u32)_mm256_movemask_epi8(tmp_e);
exponent_characters &= number_mask;
dumpbits32(exponent_characters, "exponent characters");
m256 s_mask = _mm256_set1_epi8(0x10);
m256 tmp_s = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask),
_mm256_set1_epi8(0));
m256 tmp_s =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask), _mm256_set1_epi8(0));
u32 sign_characters = ~(u32)_mm256_movemask_epi8(tmp_s);
sign_characters &= number_mask;
dumpbits32(sign_characters, "sign characters");
@ -375,7 +389,9 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
// check that we have 1-3 'edges' only
u32 t = digit_edges;
t &= t-1; t &= t-1; t &= t-1;
t &= t - 1;
t &= t - 1;
t &= t - 1;
error_sump |= t;
// check that we start with a digit
@ -390,7 +406,7 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
// spot in the buffer.
if (__builtin_popcount(digit_edges) == 1) {
// try a strtoll
char * end;
char *end;
u64 result = strtoll((const char *)src, &end, 10);
if ((errno != 0) || (end == (const char *)src)) {
error_sump |= 1;
@ -403,11 +419,14 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
cout << "Found number " << result << "\n";
#endif
*((u64 *)pj.current_number_buf_loc) = result;
pj.tape[tape_loc] = ((u32)'l') << 24 | (pj.current_number_buf_loc - pj.number_buf); // assume 2^24 will hold all numbers for now
pj.tape[tape_loc] =
((u32)'l') << 24 |
(pj.current_number_buf_loc -
pj.number_buf); // assume 2^24 will hold all numbers for now
pj.current_number_buf_loc += 8;
} else {
// try a strtod
char * end;
char *end;
double result = strtod((const char *)src, &end);
if ((errno != 0) || (end == (const char *)src)) {
error_sump |= 1;
@ -420,7 +439,10 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
cout << "Found number " << result << "\n";
#endif
*((double *)pj.current_number_buf_loc) = result;
pj.tape[tape_loc] = ((u32)'d') << 24 | (pj.current_number_buf_loc - pj.number_buf); // assume 2^24 will hold all numbers for now
pj.tape[tape_loc] =
((u32)'d') << 24 |
(pj.current_number_buf_loc -
pj.number_buf); // assume 2^24 will hold all numbers for now
pj.current_number_buf_loc += 8;
}
// TODO: check the MSB element is a digit
@ -456,13 +478,13 @@ really_inline bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED Parsed
return true;
}
bool tape_disturbed(u32 i, ParsedJson & pj) {
u32 start_loc = i*MAX_TAPE_ENTRIES;
bool tape_disturbed(u32 i, ParsedJson &pj) {
u32 start_loc = i * MAX_TAPE_ENTRIES;
u32 end_loc = pj.tape_locs[i];
return start_loc != end_loc;
}
bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj) {
bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj) {
// fixup the mess made by the ape_machine
// as such it does a bunch of miscellaneous things on the tapes
u32 error_sump = 0;
@ -476,40 +498,51 @@ bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj) {
// zone we need to quit. Note that our periodic checks to see that we're
// inside our safe zone in stage 3 don't guarantee that the system did
// not get into the danger area briefly.
if (tape_disturbed(START_DEPTH - 1, pj) || tape_disturbed(REDLINE_DEPTH, pj)) {
if (tape_disturbed(START_DEPTH - 1, pj) ||
tape_disturbed(REDLINE_DEPTH, pj)) {
return false;
}
// walk over each tape
for (u32 i = START_DEPTH; i < MAX_DEPTH; i++) {
u32 start_loc = i*MAX_TAPE_ENTRIES;
u32 start_loc = i * MAX_TAPE_ENTRIES;
u32 end_loc = pj.tape_locs[i];
if (start_loc == end_loc) {
break;
}
for (u32 j = start_loc; j < end_loc; j++) {
switch (pj.tape[j]>>56) {
case '{': case '[': {
switch (pj.tape[j] >> 56) {
case '{':
case '[': {
// pivot our tapes
// point the enclosing structural char (}]) to the head marker ({[) and
// put the end of the sequence on the tape at the head marker
// we start with head marker pointing at the enclosing structural char
// and the enclosing structural char pointing at the end. Just swap them.
// also check the balanced-{} or [] property here
// and the enclosing structural char pointing at the end. Just swap
// them. also check the balanced-{} or [] property here
u8 head_marker_c = pj.tape[j] >> 56;
u32 head_marker_loc = pj.tape[j] & 0xffffffffffffffULL;
u64 tape_enclosing = pj.tape[head_marker_loc];
u8 enclosing_c = tape_enclosing >> 56;
pj.tape[head_marker_loc] = pj.tape[j];
pj.tape[j] = tape_enclosing;
error_sump |= (enclosing_c - head_marker_c - 2); // [] and {} only differ by 2 chars
error_sump |= (enclosing_c - head_marker_c -
2); // [] and {} only differ by 2 chars
break;
}
case '"': {
error_sump |= !parse_string(buf, len, pj, j);
break;
}
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
error_sump |= !parse_number(buf, len, pj, j, false, false);
break;
case '0':
@ -520,27 +553,30 @@ bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj) {
break;
case 't': {
u32 offset = pj.tape[j] & 0xffffffffffffffULL;
const u8 * loc = buf + offset;
u64 locval;// we want to avoid unaligned 64-bit loads (undefined in C/C++)
std::memcpy(&locval,loc,sizeof(u64));
const u8 *loc = buf + offset;
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
// C/C++)
std::memcpy(&locval, loc, sizeof(u64));
error_sump |= (locval & mask4) ^ tv;
error_sump |= is_not_structural_or_whitespace(loc[4]);
break;
}
case 'f': {
u32 offset = pj.tape[j] & 0xffffffffffffffULL;
const u8 * loc = buf + offset;
u64 locval;// we want to avoid unaligned 64-bit loads (undefined in C/C++)
std::memcpy(&locval,loc,sizeof(u64));
const u8 *loc = buf + offset;
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
// C/C++)
std::memcpy(&locval, loc, sizeof(u64));
error_sump |= (locval & mask5) ^ fv;
error_sump |= is_not_structural_or_whitespace(loc[5]);
break;
}
case 'n': {
u32 offset = pj.tape[j] & 0xffffffffffffffULL;
const u8 * loc = buf + offset;
u64 locval;// we want to avoid unaligned 64-bit loads (undefined in C/C++)
std::memcpy(&locval,loc,sizeof(u64));
const u8 *loc = buf + offset;
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
// C/C++)
std::memcpy(&locval, loc, sizeof(u64));
error_sump |= (locval & mask4) ^ nv;
error_sump |= is_not_structural_or_whitespace(loc[4]);
break;
@ -551,10 +587,8 @@ bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj) {
}
}
if (error_sump) {
// cerr << "Ugh!\n";
// cerr << "Ugh!\n";
return false;
}
return true;
}

View File

@ -6,13 +6,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "common_defs.h"
#include "jsonioutil.h"
#include "simdjson_internal.h"
#include "stage1_find_marks.h"
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
#include "jsonparser.h"
/**
* Does the file filename ends with the given extension.
@ -29,7 +23,7 @@ bool startsWith(const char *pre, const char *str) {
bool validate(const char *dirname) {
bool everythingfine = true;
init_state_machine(); // to be safe
// init_state_machine(); // no longer necessary
const char *extension = ".json";
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
@ -58,26 +52,13 @@ bool validate(const char *dirname) {
}
std::pair<u8 *, size_t> p = get_corpus(fullpath);
// terrible hack but just to get it working
ParsedJson *pj_ptr = new ParsedJson;
ParsedJson &pj(*pj_ptr);
if (posix_memalign((void **)&pj.structurals, 8,
ROUNDUP_N(p.second, 64) / 8)) {
std::cerr << "Could not allocate memory" << std::endl;
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
if(pj_ptr == NULL) {
std::cerr<< "can't allocate memory"<<std::endl;
return false;
};
pj.n_structural_indexes = 0;
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
bool isok = find_structural_bits(p.first, p.second, pj);
if (isok) {
isok = flatten_indexes(p.second, pj);
}
if (isok) {
isok = ape_machine(p.first, p.second, pj);
}
if (isok) {
isok = shovel_machine(p.first, p.second, pj);
}
ParsedJson &pj(*pj_ptr);
bool isok = json_parse(p.first, p.second, pj);
if (startsWith("pass", name)) {
if (!isok) {
printf("warning: file %s should pass but it fails.\n", name);
@ -92,10 +73,9 @@ bool validate(const char *dirname) {
printf("File %s %s.\n", name,
isok ? " is valid JSON " : " is not valid JSON");
}
free(pj.structurals);
free(p.first);
delete[] pj.structural_indexes;
free(fullpath);
deallocate_ParsedJson(pj_ptr);
}
}
for (int i = 0; i < c; ++i)