Major surgery.
This commit is contained in:
parent
726eb5a030
commit
fb65be64bb
30
Makefile
30
Makefile
|
@ -6,32 +6,48 @@
|
||||||
|
|
||||||
.PHONY: clean cleandist
|
.PHONY: clean cleandist
|
||||||
|
|
||||||
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Iinclude/linux -Idependencies/double-conversion -Ldependencies/double-conversion/release
|
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux -Idependencies/double-conversion -Idependencies/rapidjson/include -Ldependencies/double-conversion/release
|
||||||
LIBFLAGS = -ldouble-conversion
|
LIBFLAGS = -ldouble-conversion
|
||||||
|
|
||||||
EXECUTABLES=parse jsoncheck
|
EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition
|
||||||
HEADERS=include/common_defs.h include/jsonioutil.h include/linux/linux-perf-events.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h
|
HEADERS=include/jsonparser.h include/common_defs.h include/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdprune_tables.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h include/jsonminifier.h
|
||||||
LIBFILES=src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp
|
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp src/jsonminifier.cpp
|
||||||
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
|
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
|
||||||
|
|
||||||
LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
|
LIBDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
|
||||||
|
RAPIDJSON_INCLUDE:=dependencies/rapidjson/include
|
||||||
|
|
||||||
LIBS=$(LIDDOUBLE)
|
LIBS=$(RAPIDJSON_INCLUDE) $(LIBDOUBLE)
|
||||||
|
|
||||||
all: $(LIBS) $(EXECUTABLES)
|
all: $(LIBS) $(EXECUTABLES)
|
||||||
|
|
||||||
test: jsoncheck
|
test: jsoncheck
|
||||||
./jsoncheck
|
./jsoncheck
|
||||||
|
|
||||||
$(LIDDOUBLE) : dependencies/double-conversion/README.md
|
$(RAPIDJSON_INCLUDE):
|
||||||
|
git submodule update --init --recursive
|
||||||
|
|
||||||
|
$(LIBDOUBLE) : dependencies/double-conversion/README.md
|
||||||
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
|
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
|
||||||
|
|
||||||
|
|
||||||
|
bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
|
||||||
|
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||||
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
||||||
|
|
||||||
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
|
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
|
||||||
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
|
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
|
||||||
|
|
||||||
|
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(LIBFILES)
|
||||||
|
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS)
|
||||||
|
|
||||||
|
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES)
|
||||||
|
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp -I. $(LIBFLAGS)
|
||||||
|
|
||||||
|
|
||||||
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||||
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
|
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
|
||||||
|
|
|
@ -17,7 +17,7 @@ const char *unitname = "cycles";
|
||||||
: \
|
: \
|
||||||
: /* no read only */ \
|
: /* no read only */ \
|
||||||
"%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
|
"%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
|
||||||
); \
|
); \
|
||||||
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
|
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ const char *unitname = "cycles";
|
||||||
: "=r"(cyc_high), "=r"(cyc_low) \
|
: "=r"(cyc_high), "=r"(cyc_low) \
|
||||||
: /* no read only registers */ \
|
: /* no read only registers */ \
|
||||||
: "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
|
: "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
|
||||||
); \
|
); \
|
||||||
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
|
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -106,9 +106,9 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
||||||
float cycle_per_op = (min_diff) / (double)S; \
|
float cycle_per_op = (min_diff) / (double)S; \
|
||||||
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
||||||
if (verbose) \
|
if (verbose) \
|
||||||
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
||||||
if (verbose) \
|
if (verbose) \
|
||||||
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
||||||
if (verbose) \
|
if (verbose) \
|
||||||
printf("\n"); \
|
printf("\n"); \
|
||||||
if (!verbose) \
|
if (!verbose) \
|
||||||
|
@ -117,7 +117,7 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
// like BEST_TIME, but no check
|
// like BEST_TIME, but no check
|
||||||
#define BEST_TIME_NOCHECK(test, pre, repeat, size, verbose) \
|
#define BEST_TIME_NOCHECK(test, pre, repeat, size, verbose) \
|
||||||
do { \
|
do { \
|
||||||
if (global_rdtsc_overhead == UINT64_MAX) { \
|
if (global_rdtsc_overhead == UINT64_MAX) { \
|
||||||
RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
|
RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
|
||||||
|
@ -143,10 +143,10 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
||||||
float cycle_per_op = (min_diff) / (double)S; \
|
float cycle_per_op = (min_diff) / (double)S; \
|
||||||
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
||||||
if (verbose) \
|
if (verbose) \
|
||||||
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
||||||
|
if (verbose) \
|
||||||
|
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
||||||
if (verbose) \
|
if (verbose) \
|
||||||
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
|
||||||
if (verbose) \
|
|
||||||
printf("\n"); \
|
printf("\n"); \
|
||||||
if (!verbose) \
|
if (!verbose) \
|
||||||
printf(" %.3f ", cycle_per_op); \
|
printf(" %.3f ", cycle_per_op); \
|
|
@ -0,0 +1,88 @@
|
||||||
|
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
|
||||||
|
#pragma once
|
||||||
|
#ifdef __linux__
|
||||||
|
|
||||||
|
#include <asm/unistd.h> // for __NR_perf_event_open
|
||||||
|
#include <linux/perf_event.h> // for perf event constants
|
||||||
|
#include <sys/ioctl.h> // for ioctl
|
||||||
|
#include <unistd.h> // for syscall
|
||||||
|
|
||||||
|
#include <cerrno> // for errno
|
||||||
|
#include <cstring> // for memset
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
||||||
|
int fd;
|
||||||
|
perf_event_attr attribs;
|
||||||
|
int num_events;
|
||||||
|
std::vector<uint64_t> temp_result_vec;
|
||||||
|
std::vector<uint64_t> ids;
|
||||||
|
|
||||||
|
public:
|
||||||
|
LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
||||||
|
memset(&attribs, 0, sizeof(attribs));
|
||||||
|
attribs.type = TYPE;
|
||||||
|
attribs.size = sizeof(attribs);
|
||||||
|
attribs.disabled = 1;
|
||||||
|
attribs.exclude_kernel = 1;
|
||||||
|
attribs.exclude_hv = 1;
|
||||||
|
|
||||||
|
attribs.sample_period = 0;
|
||||||
|
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
||||||
|
const int pid = 0; // the current process
|
||||||
|
const int cpu = -1; // all CPUs
|
||||||
|
const unsigned long flags = 0;
|
||||||
|
|
||||||
|
int group = -1; // no group
|
||||||
|
num_events = config_vec.size();
|
||||||
|
u32 i = 0;
|
||||||
|
for (auto config : config_vec) {
|
||||||
|
attribs.config = config;
|
||||||
|
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
|
||||||
|
if (fd == -1) {
|
||||||
|
report_error("perf_event_open");
|
||||||
|
}
|
||||||
|
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
|
||||||
|
if (group == -1) {
|
||||||
|
group = fd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
temp_result_vec.resize(num_events * 2 + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
~LinuxEvents() { close(fd); }
|
||||||
|
|
||||||
|
really_inline void start() {
|
||||||
|
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
||||||
|
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||||
|
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
really_inline void end(std::vector<unsigned long long> &results) {
|
||||||
|
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||||
|
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
|
||||||
|
report_error("read");
|
||||||
|
}
|
||||||
|
// our actual results are in slots 1,3,5, ... of this structure
|
||||||
|
// we really should be checking our ids obtained earlier to be safe
|
||||||
|
for (u32 i = 1; i < temp_result_vec.size(); i += 2) {
|
||||||
|
results[i / 2] = temp_result_vec[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void report_error(const std::string &context) {
|
||||||
|
throw std::runtime_error(context + ": " + std::string(strerror(errno)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
|
@ -1,27 +1,26 @@
|
||||||
|
#include "common_defs.h"
|
||||||
|
#include "double-conversion/double-conversion.h"
|
||||||
|
#include "linux-perf-events.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstring>
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
|
#include <fstream>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <chrono>
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <unistd.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <set>
|
|
||||||
#include <map>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#include <assert.h>
|
|
||||||
#include "double-conversion/double-conversion.h"
|
|
||||||
#include "common_defs.h"
|
|
||||||
#include "linux-perf-events.h"
|
|
||||||
|
|
||||||
|
|
||||||
/// Fixme: enable doube conv
|
/// Fixme: enable doube conv
|
||||||
// #define DOUBLECONV
|
// #define DOUBLECONV
|
||||||
|
@ -34,197 +33,230 @@ using namespace double_conversion;
|
||||||
|
|
||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
|
|
||||||
|
#include "jsonioutil.h"
|
||||||
#include "simdjson_internal.h"
|
#include "simdjson_internal.h"
|
||||||
#include "stage1_find_marks.h"
|
#include "stage1_find_marks.h"
|
||||||
#include "stage2_flatten.h"
|
#include "stage2_flatten.h"
|
||||||
#include "stage3_ape_machine.h"
|
#include "stage3_ape_machine.h"
|
||||||
#include "stage4_shovel_machine.h"
|
#include "stage4_shovel_machine.h"
|
||||||
#include "jsonioutil.h"
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
||||||
namespace Color {
|
namespace Color {
|
||||||
enum Code {
|
enum Code {
|
||||||
FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32,
|
FG_DEFAULT = 39,
|
||||||
FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36,
|
FG_BLACK = 30,
|
||||||
FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91,
|
FG_RED = 31,
|
||||||
FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94,
|
FG_GREEN = 32,
|
||||||
FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97,
|
FG_YELLOW = 33,
|
||||||
BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49
|
FG_BLUE = 34,
|
||||||
};
|
FG_MAGENTA = 35,
|
||||||
class Modifier {
|
FG_CYAN = 36,
|
||||||
Code code;
|
FG_LIGHT_GRAY = 37,
|
||||||
public:
|
FG_DARK_GRAY = 90,
|
||||||
Modifier(Code pCode) : code(pCode) {}
|
FG_LIGHT_RED = 91,
|
||||||
friend std::ostream&
|
FG_LIGHT_GREEN = 92,
|
||||||
operator<<(std::ostream& os, const Modifier& mod) {
|
FG_LIGHT_YELLOW = 93,
|
||||||
return os << "\033[" << mod.code << "m";
|
FG_LIGHT_BLUE = 94,
|
||||||
}
|
FG_LIGHT_MAGENTA = 95,
|
||||||
};
|
FG_LIGHT_CYAN = 96,
|
||||||
|
FG_WHITE = 97,
|
||||||
|
BG_RED = 41,
|
||||||
|
BG_GREEN = 42,
|
||||||
|
BG_BLUE = 44,
|
||||||
|
BG_DEFAULT = 49
|
||||||
|
};
|
||||||
|
class Modifier {
|
||||||
|
Code code;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Modifier(Code pCode) : code(pCode) {}
|
||||||
|
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
|
||||||
|
return os << "\033[" << mod.code << "m";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace Color
|
||||||
|
|
||||||
|
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
|
||||||
|
Color::Modifier greenfg(Color::FG_GREEN);
|
||||||
|
Color::Modifier yellowfg(Color::FG_YELLOW);
|
||||||
|
Color::Modifier deffg(Color::FG_DEFAULT);
|
||||||
|
size_t i = 0;
|
||||||
|
// skip initial fluff
|
||||||
|
while ((i + 1 < pj.n_structural_indexes) &&
|
||||||
|
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
for (; i < pj.n_structural_indexes; i++) {
|
||||||
|
u32 idx = pj.structural_indexes[i];
|
||||||
|
u8 c = buf[idx];
|
||||||
|
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
|
||||||
|
std::cout << greenfg << buf[idx] << deffg;
|
||||||
|
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
|
||||||
|
std::cout << greenfg << buf[idx] << deffg;
|
||||||
|
} else {
|
||||||
|
std::cout << yellowfg << buf[idx] << deffg;
|
||||||
|
}
|
||||||
|
if (i + 1 < pj.n_structural_indexes) {
|
||||||
|
u32 nextidx = pj.structural_indexes[i + 1];
|
||||||
|
for (u32 pos = idx + 1; pos < nextidx; pos++) {
|
||||||
|
std::cout << buf[pos];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
|
int main(int argc, char *argv[]) {
|
||||||
Color::Modifier greenfg(Color::FG_GREEN);
|
if (argc != 2) {
|
||||||
Color::Modifier yellowfg(Color::FG_YELLOW);
|
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||||
Color::Modifier deffg(Color::FG_DEFAULT);
|
exit(1);
|
||||||
size_t i = 0;
|
}
|
||||||
// skip initial fluff
|
pair<u8 *, size_t> p = get_corpus(argv[1]);
|
||||||
while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){
|
ParsedJson *pj_ptr = new ParsedJson;
|
||||||
i++;
|
ParsedJson &pj(*pj_ptr);
|
||||||
}
|
|
||||||
for (; i < pj.n_structural_indexes; i++) {
|
|
||||||
u32 idx = pj.structural_indexes[i];
|
|
||||||
u8 c = buf[idx];
|
|
||||||
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
|
|
||||||
std::cout << greenfg << buf[idx] << deffg;
|
|
||||||
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
|
|
||||||
std::cout << greenfg << buf[idx] << deffg;
|
|
||||||
} else {
|
|
||||||
std::cout << yellowfg << buf[idx] << deffg;
|
|
||||||
}
|
|
||||||
if(i + 1 < pj.n_structural_indexes) {
|
|
||||||
u32 nextidx = pj.structural_indexes[i + 1];
|
|
||||||
for(u32 pos = idx + 1 ; pos < nextidx; pos++) {
|
|
||||||
std::cout << buf[pos];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if (posix_memalign((void **)&pj.structurals, 8,
|
||||||
|
ROUNDUP_N(p.second, 64) / 8)) {
|
||||||
|
cerr << "Could not allocate memory" << endl;
|
||||||
|
exit(1);
|
||||||
|
};
|
||||||
|
|
||||||
|
if (p.second > 0xffffff) {
|
||||||
|
cerr << "Currently only support JSON files < 16MB\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
init_state_machine();
|
||||||
|
|
||||||
|
pj.n_structural_indexes = 0;
|
||||||
|
// we have potentially 1 structure per byte of input
|
||||||
int main(int argc, char * argv[]) {
|
// as well as a dummy structure and a root structure
|
||||||
if (argc != 2) {
|
// we also potentially write up to 7 iterations beyond
|
||||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
// in our 'cheesy flatten', so make some worst-case
|
||||||
exit(1);
|
// space for that too
|
||||||
}
|
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
||||||
pair<u8 *, size_t> p = get_corpus(argv[1]);
|
pj.structural_indexes = new u32[max_structures];
|
||||||
ParsedJson * pj_ptr = new ParsedJson;
|
|
||||||
ParsedJson & pj(*pj_ptr);
|
|
||||||
|
|
||||||
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
|
|
||||||
cerr << "Could not allocate memory" << endl;
|
|
||||||
exit(1);
|
|
||||||
};
|
|
||||||
|
|
||||||
if (p.second > 0xffffff) {
|
|
||||||
cerr << "Currently only support JSON files < 16MB\n";
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
init_state_machine();
|
|
||||||
|
|
||||||
pj.n_structural_indexes = 0;
|
|
||||||
// we have potentially 1 structure per byte of input
|
|
||||||
// as well as a dummy structure and a root structure
|
|
||||||
// we also potentially write up to 7 iterations beyond
|
|
||||||
// in our 'cheesy flatten', so make some worst-case
|
|
||||||
// space for that too
|
|
||||||
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
|
||||||
pj.structural_indexes = new u32[max_structures];
|
|
||||||
|
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
const u32 iterations = 1;
|
const u32 iterations = 1;
|
||||||
#else
|
#else
|
||||||
const u32 iterations = 1000;
|
const u32 iterations = 1000;
|
||||||
#endif
|
#endif
|
||||||
vector<double> res;
|
vector<double> res;
|
||||||
res.resize(iterations);
|
res.resize(iterations);
|
||||||
|
|
||||||
#if !defined(__linux__)
|
#if !defined(__linux__)
|
||||||
#define SQUASH_COUNTERS
|
#define SQUASH_COUNTERS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
vector<int> evts;
|
vector<int> evts;
|
||||||
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
||||||
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
||||||
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||||
vector<u64> results;
|
vector<u64> results;
|
||||||
results.resize(evts.size());
|
results.resize(evts.size());
|
||||||
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
|
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
|
||||||
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
|
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
|
||||||
#endif
|
#endif
|
||||||
bool isok = true;
|
bool isok = true;
|
||||||
for (u32 i = 0; i < iterations; i++) {
|
for (u32 i = 0; i < iterations; i++) {
|
||||||
auto start = std::chrono::steady_clock::now();
|
auto start = std::chrono::steady_clock::now();
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
unified.start();
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
isok = find_structural_bits(p.first, p.second, pj);
|
isok = find_structural_bits(p.first, p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
cy1 += results[0]; cl1 += results[1];
|
cy1 += results[0];
|
||||||
if(! isok ) break;
|
cl1 += results[1];
|
||||||
unified.start();
|
if (!isok)
|
||||||
|
break;
|
||||||
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
isok = flatten_indexes(p.second, pj);
|
isok = flatten_indexes(p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
cy2 += results[0]; cl2 += results[1];
|
cy2 += results[0];
|
||||||
if(! isok ) break;
|
cl2 += results[1];
|
||||||
unified.start();
|
if (!isok)
|
||||||
|
break;
|
||||||
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
isok = ape_machine(p.first, p.second, pj);
|
isok = ape_machine(p.first, p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
cy3 += results[0]; cl3 += results[1];
|
cy3 += results[0];
|
||||||
if(! isok ) break;
|
cl3 += results[1];
|
||||||
unified.start();
|
if (!isok)
|
||||||
|
break;
|
||||||
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
isok = shovel_machine(p.first, p.second, pj);
|
isok = shovel_machine(p.first, p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
cy4 += results[0]; cl4 += results[1];
|
cy4 += results[0];
|
||||||
|
cl4 += results[1];
|
||||||
#endif
|
#endif
|
||||||
if(! isok ) break;
|
if (!isok)
|
||||||
auto end = std::chrono::steady_clock::now();
|
break;
|
||||||
std::chrono::duration<double> secs = end - start;
|
auto end = std::chrono::steady_clock::now();
|
||||||
res[i] = secs.count();
|
std::chrono::duration<double> secs = end - start;
|
||||||
}
|
res[i] = secs.count();
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
printf("number of bytes %ld number of structural chars %d ratio %.3f\n", p.second, pj.n_structural_indexes,
|
printf("number of bytes %ld number of structural chars %d ratio %.3f\n",
|
||||||
(double) pj.n_structural_indexes / p.second);
|
p.second, pj.n_structural_indexes,
|
||||||
unsigned long total = cy1 + cy2 + cy3 + cy4;
|
(double)pj.n_structural_indexes / p.second);
|
||||||
|
unsigned long total = cy1 + cy2 + cy3 + cy4;
|
||||||
|
|
||||||
printf("stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
printf(
|
||||||
cl1, cy1, 100. * cy1 / total, (double) cl1 / cy1);
|
"stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
||||||
printf(" stage 1 runs at %.2f cycles per input byte.\n", (double) cy1 / (iterations * p.second));
|
cl1, cy1, 100. * cy1 / total, (double)cl1 / cy1);
|
||||||
|
printf(" stage 1 runs at %.2f cycles per input byte.\n",
|
||||||
|
(double)cy1 / (iterations * p.second));
|
||||||
|
|
||||||
printf("stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
printf(
|
||||||
cl2, cy2, 100. * cy2 / total, (double) cl2 / cy2);
|
"stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
||||||
printf(" stage 2 runs at %.2f cycles per input byte and ", (double) cy2 / (iterations * p.second));
|
cl2, cy2, 100. * cy2 / total, (double)cl2 / cy2);
|
||||||
printf("%.2f cycles per structural character.\n", (double) cy2 / (iterations * pj.n_structural_indexes));
|
printf(" stage 2 runs at %.2f cycles per input byte and ",
|
||||||
|
(double)cy2 / (iterations * p.second));
|
||||||
|
printf("%.2f cycles per structural character.\n",
|
||||||
|
(double)cy2 / (iterations * pj.n_structural_indexes));
|
||||||
|
|
||||||
printf("stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
printf(
|
||||||
cl3, cy3, 100. * cy3 / total, (double) cl3 / cy3);
|
"stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
||||||
printf(" stage 3 runs at %.2f cycles per input byte and ", (double) cy3 / (iterations * p.second));
|
cl3, cy3, 100. * cy3 / total, (double)cl3 / cy3);
|
||||||
printf("%.2f cycles per structural character.\n", (double) cy3 / (iterations * pj.n_structural_indexes));
|
printf(" stage 3 runs at %.2f cycles per input byte and ",
|
||||||
|
(double)cy3 / (iterations * p.second));
|
||||||
|
printf("%.2f cycles per structural character.\n",
|
||||||
|
(double)cy3 / (iterations * pj.n_structural_indexes));
|
||||||
|
|
||||||
printf("stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
printf(
|
||||||
cl4, cy4, 100. * cy4 / total, (double) cl4 / cy4);
|
"stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
||||||
printf(" stage 4 runs at %.2f cycles per input byte and ", (double) cy4 / (iterations * p.second));
|
cl4, cy4, 100. * cy4 / total, (double)cl4 / cy4);
|
||||||
printf("%.2f cycles per structural character.\n", (double) cy4 / (iterations * pj.n_structural_indexes));
|
printf(" stage 4 runs at %.2f cycles per input byte and ",
|
||||||
|
(double)cy4 / (iterations * p.second));
|
||||||
|
printf("%.2f cycles per structural character.\n",
|
||||||
|
(double)cy4 / (iterations * pj.n_structural_indexes));
|
||||||
|
|
||||||
printf(" all stages: %.2f cycles per input byte.\n", (double) total / (iterations * p.second));
|
printf(" all stages: %.2f cycles per input byte.\n",
|
||||||
|
(double)total / (iterations * p.second));
|
||||||
#endif
|
#endif
|
||||||
// colorfuldisplay(pj, p.first);
|
// colorfuldisplay(pj, p.first);
|
||||||
double min_result = *min_element(res.begin(), res.end());
|
double min_result = *min_element(res.begin(), res.end());
|
||||||
cout << "Min: " << min_result << " bytes read: " << p.second << " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0) << "\n";
|
cout << "Min: " << min_result << " bytes read: " << p.second
|
||||||
|
<< " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0)
|
||||||
|
<< "\n";
|
||||||
|
|
||||||
free(pj.structurals);
|
free(pj.structurals);
|
||||||
free(p.first);
|
free(p.first);
|
||||||
delete[] pj.structural_indexes;
|
delete[] pj.structural_indexes;
|
||||||
delete pj_ptr;
|
delete pj_ptr;
|
||||||
if(! isok ) {
|
if (!isok) {
|
||||||
printf(" Parsing failed. \n ");
|
printf(" Parsing failed. \n ");
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
typedef unsigned short u16;
|
typedef unsigned short u16;
|
||||||
typedef unsigned int u32;
|
typedef unsigned int u32;
|
||||||
|
@ -9,47 +11,49 @@ typedef signed short s16;
|
||||||
typedef signed int s32;
|
typedef signed int s32;
|
||||||
typedef signed long long s64;
|
typedef signed long long s64;
|
||||||
|
|
||||||
#include <x86intrin.h>
|
#ifdef _MSC_VER
|
||||||
|
/* Microsoft C/C++-compatible compiler */
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef __m128i m128;
|
typedef __m128i m128;
|
||||||
typedef __m256i m256;
|
typedef __m256i m256;
|
||||||
|
|
||||||
// Snippets from Hyperscan
|
|
||||||
|
|
||||||
// Align to N-byte boundary
|
// Align to N-byte boundary
|
||||||
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
|
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
|
||||||
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
|
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
|
||||||
|
|
||||||
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)
|
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
|
||||||
|
|
||||||
#define really_inline inline __attribute__ ((always_inline, unused))
|
#define really_inline inline __attribute__((always_inline, unused))
|
||||||
#define never_inline inline __attribute__ ((noinline, unused))
|
#define never_inline inline __attribute__((noinline, unused))
|
||||||
|
|
||||||
#define UNUSED __attribute__ ((unused))
|
#define UNUSED __attribute__((unused))
|
||||||
|
|
||||||
#ifndef likely
|
#ifndef likely
|
||||||
#define likely(x) __builtin_expect(!!(x), 1)
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||||||
#endif
|
#endif
|
||||||
#ifndef unlikely
|
#ifndef unlikely
|
||||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline
|
static inline u32 ctz64(u64 x) {
|
||||||
u32 ctz64(u64 x) {
|
assert(x); // behaviour not defined for x == 0
|
||||||
assert(x); // behaviour not defined for x == 0
|
|
||||||
#if defined(_WIN64)
|
#if defined(_WIN64)
|
||||||
unsigned long r;
|
unsigned long r;
|
||||||
_BitScanForward64(&r, x);
|
_BitScanForward64(&r, x);
|
||||||
return r;
|
return r;
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
unsigned long r;
|
unsigned long r;
|
||||||
if (_BitScanForward(&r, (u32)x)) {
|
if (_BitScanForward(&r, (u32)x)) {
|
||||||
return (u32)r;
|
return (u32)r;
|
||||||
}
|
}
|
||||||
_BitScanForward(&r, x >> 32);
|
_BitScanForward(&r, x >> 32);
|
||||||
return (u32)(r + 32);
|
return (u32)(r + 32);
|
||||||
#else
|
#else
|
||||||
return (u32)__builtin_ctzll(x);
|
return (u32)__builtin_ctzll(x);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,25 +7,10 @@
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "common_defs.h"
|
||||||
|
|
||||||
// get a corpus; pad out to cache line so we can always use SIMD
|
// get a corpus; pad out to cache line so we can always use SIMD
|
||||||
// throws exceptions in case of failure
|
// throws exceptions in case of failure
|
||||||
std::pair<u8 *, size_t> get_corpus(std::string filename) {
|
std::pair<u8 *, size_t> get_corpus(std::string filename);
|
||||||
std::ifstream is(filename, std::ios::binary);
|
|
||||||
if (is) {
|
|
||||||
std::stringstream buffer;
|
|
||||||
buffer << is.rdbuf();
|
|
||||||
size_t length = buffer.str().size();
|
|
||||||
char *aligned_buffer;
|
|
||||||
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
|
|
||||||
throw std::runtime_error("Could not allocate sufficient memory");
|
|
||||||
};
|
|
||||||
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
|
|
||||||
memcpy(aligned_buffer, buffer.str().c_str(), length);
|
|
||||||
is.close();
|
|
||||||
return std::make_pair((u8 *)aligned_buffer, length);
|
|
||||||
}
|
|
||||||
throw std::runtime_error("could not load corpus");
|
|
||||||
return std::make_pair((u8 *)0, (size_t)0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
// Take input from buf and remove useless whitespace, write it to out; buf and
|
||||||
|
// out can be the same pointer.
|
||||||
|
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);
|
|
@ -0,0 +1,23 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common_defs.h"
|
||||||
|
#include "jsonioutil.h"
|
||||||
|
#include "simdjson_internal.h"
|
||||||
|
#include "stage1_find_marks.h"
|
||||||
|
#include "stage2_flatten.h"
|
||||||
|
#include "stage3_ape_machine.h"
|
||||||
|
#include "stage4_shovel_machine.h"
|
||||||
|
|
||||||
|
// Allocate a ParsedJson structure that can support document
|
||||||
|
// up to len bytes.
|
||||||
|
// Return NULL if memory cannot be allocated.
|
||||||
|
// This structure is meant to be reused from document to document, as needed.
|
||||||
|
// you can use deallocate_ParsedJson to deallocate the memory.
|
||||||
|
ParsedJson *allocate_ParsedJson(size_t len);
|
||||||
|
|
||||||
|
// deallocate a ParsedJson struct (see allocate_ParsedJson)
|
||||||
|
void deallocate_ParsedJson(ParsedJson *pj_ptr);
|
||||||
|
|
||||||
|
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||||
|
// Return false in case of a failure.
|
||||||
|
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);
|
|
@ -1,93 +0,0 @@
|
||||||
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
|
|
||||||
#pragma once
|
|
||||||
#ifdef __linux__
|
|
||||||
|
|
||||||
#include <unistd.h> // for syscall
|
|
||||||
#include <sys/ioctl.h> // for ioctl
|
|
||||||
#include <asm/unistd.h> // for __NR_perf_event_open
|
|
||||||
#include <linux/perf_event.h> // for perf event constants
|
|
||||||
|
|
||||||
#include <cerrno> // for errno
|
|
||||||
#include <cstring> // for memset
|
|
||||||
#include <stdexcept>
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
template <int TYPE = PERF_TYPE_HARDWARE>
|
|
||||||
class LinuxEvents {
|
|
||||||
int fd;
|
|
||||||
perf_event_attr attribs;
|
|
||||||
int num_events;
|
|
||||||
std::vector<uint64_t> temp_result_vec;
|
|
||||||
std::vector<uint64_t> ids;
|
|
||||||
public:
|
|
||||||
LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
|
||||||
memset(&attribs, 0, sizeof(attribs));
|
|
||||||
attribs.type = TYPE;
|
|
||||||
attribs.size = sizeof(attribs);
|
|
||||||
attribs.disabled = 1;
|
|
||||||
attribs.exclude_kernel = 1;
|
|
||||||
attribs.exclude_hv = 1;
|
|
||||||
|
|
||||||
attribs.sample_period = 0;
|
|
||||||
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
|
||||||
const int pid = 0; // the current process
|
|
||||||
const int cpu = -1; // all CPUs
|
|
||||||
const unsigned long flags = 0;
|
|
||||||
|
|
||||||
int group = -1; // no group
|
|
||||||
num_events = config_vec.size();
|
|
||||||
u32 i = 0;
|
|
||||||
for (auto config: config_vec) {
|
|
||||||
attribs.config = config;
|
|
||||||
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
|
|
||||||
if (fd == -1) {
|
|
||||||
report_error("perf_event_open");
|
|
||||||
}
|
|
||||||
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
|
|
||||||
if (group == -1) {
|
|
||||||
group = fd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
temp_result_vec.resize(num_events*2 + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
~LinuxEvents() {
|
|
||||||
close(fd);
|
|
||||||
}
|
|
||||||
|
|
||||||
really_inline void start() {
|
|
||||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
|
||||||
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
|
||||||
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
really_inline void end(std::vector<unsigned long long> & results) {
|
|
||||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
|
||||||
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
|
|
||||||
report_error("read");
|
|
||||||
}
|
|
||||||
// our actual results are in slots 1,3,5, ... of this structure
|
|
||||||
// we really should be checking our ids obtained earlier to be safe
|
|
||||||
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
|
|
||||||
results[i/2] = temp_result_vec[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
void report_error(const std::string& context) {
|
|
||||||
throw std::runtime_error(context + ": " + std::string(strerror(errno)));
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
|
@ -1,32 +1,39 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
/* Microsoft C/C++-compatible compiler */
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <immintrin.h>
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
const u32 MAX_DEPTH = 256;
|
const u32 MAX_DEPTH = 256;
|
||||||
const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this with a modulo in our
|
const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this
|
||||||
// hot stage 3 loop
|
// with a modulo in our hot stage 3 loop
|
||||||
const u32 START_DEPTH = DEPTH_SAFETY_MARGIN;
|
const u32 START_DEPTH = DEPTH_SAFETY_MARGIN;
|
||||||
const u32 REDLINE_DEPTH = MAX_DEPTH - DEPTH_SAFETY_MARGIN;
|
const u32 REDLINE_DEPTH = MAX_DEPTH - DEPTH_SAFETY_MARGIN;
|
||||||
const size_t MAX_TAPE_ENTRIES = 127*1024;
|
const size_t MAX_TAPE_ENTRIES = 127 * 1024;
|
||||||
const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
|
const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
|
||||||
|
|
||||||
struct ParsedJson {
|
struct ParsedJson {
|
||||||
u8 * structurals;
|
size_t bytecapacity; // indicates how many bits are meant to be supported by
|
||||||
u32 n_structural_indexes;
|
// structurals
|
||||||
u32 * structural_indexes;
|
u8 *structurals;
|
||||||
|
u32 n_structural_indexes;
|
||||||
|
u32 *structural_indexes;
|
||||||
|
|
||||||
// grossly overprovisioned
|
// grossly overprovisioned
|
||||||
u64 tape[MAX_TAPE];
|
u64 tape[MAX_TAPE];
|
||||||
u32 tape_locs[MAX_DEPTH];
|
u32 tape_locs[MAX_DEPTH];
|
||||||
u8 string_buf[512*1024];
|
u8 string_buf[512 * 1024];
|
||||||
u8 * current_string_buf_loc;
|
u8 *current_string_buf_loc;
|
||||||
u8 number_buf[512*1024]; // holds either doubles or longs, really
|
u8 number_buf[512 * 1024]; // holds either doubles or longs, really
|
||||||
u8 * current_number_buf_loc;
|
u8 *current_number_buf_loc;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// all of this stuff needs to get moved somewhere reasonable
|
// all of this stuff needs to get moved somewhere reasonable
|
||||||
// like our ParsedJson structure
|
// like our ParsedJson structure
|
||||||
/*
|
/*
|
||||||
|
@ -38,37 +45,36 @@ extern u8 number_buf[512*1024]; // holds either doubles or longs, really
|
||||||
extern u8 * current_number_buf_loc;
|
extern u8 * current_number_buf_loc;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
inline void dump256(m256 d, string msg) {
|
inline void dump256(m256 d, string msg) {
|
||||||
for (u32 i = 0; i < 32; i++) {
|
for (u32 i = 0; i < 32; i++) {
|
||||||
std::cout << setw(3) << (int)*(((u8 *)(&d)) + i);
|
std::cout << setw(3) << (int)*(((u8 *)(&d)) + i);
|
||||||
if (!((i+1)%8))
|
if (!((i + 1) % 8))
|
||||||
std::cout << "|";
|
std::cout << "|";
|
||||||
else if (!((i+1)%4))
|
else if (!((i + 1) % 4))
|
||||||
std::cout << ":";
|
std::cout << ":";
|
||||||
else
|
else
|
||||||
std::cout << " ";
|
std::cout << " ";
|
||||||
}
|
}
|
||||||
std::cout << " " << msg << "\n";
|
std::cout << " " << msg << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
// dump bits low to high
|
// dump bits low to high
|
||||||
void dumpbits(u64 v, string msg) {
|
void dumpbits(u64 v, string msg) {
|
||||||
for (u32 i = 0; i < 64; i++) {
|
for (u32 i = 0; i < 64; i++) {
|
||||||
std::cout << (((v>>(u64)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
std::cout << " " << msg << "\n";
|
std::cout << " " << msg << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
void dumpbits32(u32 v, string msg) {
|
void dumpbits32(u32 v, string msg) {
|
||||||
for (u32 i = 0; i < 32; i++) {
|
for (u32 i = 0; i < 32; i++) {
|
||||||
std::cout << (((v>>(u32)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
std::cout << " " << msg << "\n";
|
std::cout << " " << msg << "\n";
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define dump256(a,b) ;
|
#define dump256(a, b) ;
|
||||||
#define dumpbits(a,b) ;
|
#define dumpbits(a, b) ;
|
||||||
#define dumpbits32(a,b) ;
|
#define dumpbits32(a, b) ;
|
||||||
#endif
|
#endif
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,4 +3,4 @@
|
||||||
#include "common_defs.h"
|
#include "common_defs.h"
|
||||||
#include "simdjson_internal.h"
|
#include "simdjson_internal.h"
|
||||||
|
|
||||||
bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj);
|
bool find_structural_bits(const u8 *buf, size_t len, ParsedJson &pj);
|
||||||
|
|
|
@ -3,5 +3,4 @@
|
||||||
#include "common_defs.h"
|
#include "common_defs.h"
|
||||||
#include "simdjson_internal.h"
|
#include "simdjson_internal.h"
|
||||||
|
|
||||||
bool flatten_indexes(size_t len, ParsedJson & pj);
|
bool flatten_indexes(size_t len, ParsedJson &pj);
|
||||||
|
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
#include "simdjson_internal.h"
|
#include "simdjson_internal.h"
|
||||||
|
|
||||||
void init_state_machine();
|
void init_state_machine();
|
||||||
bool ape_machine(const u8 * buf, size_t len, ParsedJson & pj);
|
bool ape_machine(const u8 *buf, size_t len, ParsedJson &pj);
|
||||||
|
|
|
@ -3,5 +3,4 @@
|
||||||
#include "common_defs.h"
|
#include "common_defs.h"
|
||||||
#include "simdjson_internal.h"
|
#include "simdjson_internal.h"
|
||||||
|
|
||||||
bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj);
|
bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,182 @@
|
||||||
|
// automatically generated by generatetransitions.cpp
|
||||||
|
u32 trans[MAX_STATES][256] = {
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
|
||||||
|
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 11, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 11, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0,
|
||||||
|
0, 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 11, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
|
|
@ -0,0 +1,10 @@
|
||||||
|
all:../../include/transitions.h
|
||||||
|
|
||||||
|
../../include/transitions.h: generatetransitions
|
||||||
|
./generatetransitions > ../../include/transitions.h
|
||||||
|
|
||||||
|
generatetransitions: generatetransitions.cpp
|
||||||
|
$(CXX) -o generatetransitions generatetransitions.cpp -I../../include
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f generatetransitions
|
|
@ -0,0 +1,20 @@
|
||||||
|
#include "../src/stage3_ape_machine.cpp"
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
init_state_machine();
|
||||||
|
std::cout << "// automatically generated by generatetransitions.cpp"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << " u32 trans[MAX_STATES][256] = {" << std::endl;
|
||||||
|
for (int k = 0; k < MAX_STATES; k++) {
|
||||||
|
std::cout << "{";
|
||||||
|
for (int z = 0; z < 255; z++) {
|
||||||
|
std::cout << trans[k][z] << ",";
|
||||||
|
}
|
||||||
|
std::cout << trans[k][255];
|
||||||
|
std::cout << "}";
|
||||||
|
if (k + 1 < MAX_STATES)
|
||||||
|
std::cout << ",";
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "};" << std::endl;
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
#include "jsonioutil.h"
|
||||||
|
|
||||||
|
std::pair<u8 *, size_t> get_corpus(std::string filename) {
|
||||||
|
std::ifstream is(filename, std::ios::binary);
|
||||||
|
if (is) {
|
||||||
|
std::stringstream buffer;
|
||||||
|
buffer << is.rdbuf();
|
||||||
|
size_t length = buffer.str().size();
|
||||||
|
char *aligned_buffer;
|
||||||
|
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
|
||||||
|
throw std::runtime_error("Could not allocate sufficient memory");
|
||||||
|
};
|
||||||
|
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
|
||||||
|
memcpy(aligned_buffer, buffer.str().c_str(), length);
|
||||||
|
is.close();
|
||||||
|
return std::make_pair((u8 *)aligned_buffer, length);
|
||||||
|
}
|
||||||
|
throw std::runtime_error("could not load corpus");
|
||||||
|
return std::make_pair((u8 *)0, (size_t)0);
|
||||||
|
}
|
|
@ -0,0 +1,275 @@
|
||||||
|
|
||||||
|
#ifndef __AVX2__
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
static uint8_t jump_table[256 * 3] = {
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
|
||||||
|
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
||||||
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
||||||
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t jsonminify(const unsigned char *bytes, size_t howmany,
|
||||||
|
unsigned char *out) {
|
||||||
|
size_t i = 0, pos = 0;
|
||||||
|
uint8_t quote = 0;
|
||||||
|
uint8_t nonescape = 1;
|
||||||
|
|
||||||
|
while (i < howmany) {
|
||||||
|
unsigned char c = bytes[i];
|
||||||
|
uint8_t *meta = jump_table + 3 * c;
|
||||||
|
|
||||||
|
quote = quote ^ (meta[0] & nonescape);
|
||||||
|
out[pos] = c;
|
||||||
|
pos += meta[2] | quote;
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
nonescape = (~nonescape) | (meta[1]);
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
/* Microsoft C/C++-compatible compiler */
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#endif // _MSC_VER
|
||||||
|
|
||||||
|
#include "simdprune_tables.h"
|
||||||
|
#include <cstring>
|
||||||
|
#ifndef __clang__
|
||||||
|
static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi,
|
||||||
|
__m128i const *__addr_lo) {
|
||||||
|
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
|
||||||
|
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
||||||
|
__m256i __a) {
|
||||||
|
__m128i __v128;
|
||||||
|
|
||||||
|
__v128 = _mm256_castsi256_si128(__a);
|
||||||
|
_mm_storeu_si128(__addr_lo, __v128);
|
||||||
|
__v128 = _mm256_extractf128_si256(__a, 1);
|
||||||
|
_mm_storeu_si128(__addr_hi, __v128);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// a straightforward comparison of a mask against input.
|
||||||
|
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
||||||
|
__m256i mask) {
|
||||||
|
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||||
|
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
|
||||||
|
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||||
|
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||||
|
return res_0 | (res_1 << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
// take input from buf and remove useless whitespace, input and output can be
|
||||||
|
// the same
|
||||||
|
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
|
// Useful constant masks
|
||||||
|
const uint64_t even_bits = 0x5555555555555555ULL;
|
||||||
|
const uint64_t odd_bits = ~even_bits;
|
||||||
|
uint8_t *initout(out);
|
||||||
|
uint64_t prev_iter_ends_odd_backslash =
|
||||||
|
0ULL; // either 0 or 1, but a 64-bit value
|
||||||
|
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
||||||
|
size_t idx = 0;
|
||||||
|
if (len >= 64) {
|
||||||
|
size_t avxlen = len - 63;
|
||||||
|
|
||||||
|
for (; idx < avxlen; idx += 64) {
|
||||||
|
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
|
||||||
|
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
|
||||||
|
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||||
|
_mm256_set1_epi8('\\'));
|
||||||
|
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||||
|
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
||||||
|
uint64_t even_starts = start_edges & even_start_mask;
|
||||||
|
uint64_t odd_starts = start_edges & ~even_start_mask;
|
||||||
|
uint64_t even_carries = bs_bits + even_starts;
|
||||||
|
uint64_t odd_carries;
|
||||||
|
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
|
||||||
|
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
|
||||||
|
odd_carries |= prev_iter_ends_odd_backslash;
|
||||||
|
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
||||||
|
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
||||||
|
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
||||||
|
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
||||||
|
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
||||||
|
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
||||||
|
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
||||||
|
_mm256_set1_epi8('"'));
|
||||||
|
quote_bits = quote_bits & ~odd_ends;
|
||||||
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||||
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
|
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
|
||||||
|
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||||
|
// 0 9 a b c d
|
||||||
|
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||||
|
const __m256i high_nibble_mask = _mm256_setr_epi8(
|
||||||
|
// 0 2 3 5 7
|
||||||
|
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
|
||||||
|
1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||||
|
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
||||||
|
__m256i v_lo = _mm256_and_si256(
|
||||||
|
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
||||||
|
_mm256_shuffle_epi8(high_nibble_mask,
|
||||||
|
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
||||||
|
_mm256_set1_epi8(0x7f))));
|
||||||
|
|
||||||
|
__m256i v_hi = _mm256_and_si256(
|
||||||
|
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
||||||
|
_mm256_shuffle_epi8(high_nibble_mask,
|
||||||
|
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
||||||
|
_mm256_set1_epi8(0x7f))));
|
||||||
|
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
|
||||||
|
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||||
|
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
|
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
||||||
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||||
|
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||||
|
whitespace &= ~quote_mask;
|
||||||
|
int mask1 = whitespace & 0xFFFF;
|
||||||
|
int mask2 = (whitespace >> 16) & 0xFFFF;
|
||||||
|
int mask3 = (whitespace >> 32) & 0xFFFF;
|
||||||
|
int mask4 = (whitespace >> 48) & 0xFFFF;
|
||||||
|
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
|
||||||
|
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
|
||||||
|
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||||
|
int pop4 = _popcnt64((~whitespace));
|
||||||
|
__m256i vmask1 =
|
||||||
|
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
||||||
|
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
||||||
|
__m256i vmask2 =
|
||||||
|
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
||||||
|
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
||||||
|
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
||||||
|
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
||||||
|
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
|
||||||
|
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
|
||||||
|
result2);
|
||||||
|
out += pop4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we finish off the job... copying and pasting the code is not ideal here,
|
||||||
|
// but it gets the job done.
|
||||||
|
if (idx < len) {
|
||||||
|
uint8_t buffer[64];
|
||||||
|
memset(buffer, 0, 64);
|
||||||
|
memcpy(buffer, buf + idx, len - idx);
|
||||||
|
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
|
||||||
|
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
|
||||||
|
uint64_t bs_bits =
|
||||||
|
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
||||||
|
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||||
|
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
||||||
|
uint64_t even_starts = start_edges & even_start_mask;
|
||||||
|
uint64_t odd_starts = start_edges & ~even_start_mask;
|
||||||
|
uint64_t even_carries = bs_bits + even_starts;
|
||||||
|
uint64_t odd_carries;
|
||||||
|
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
|
||||||
|
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
|
||||||
|
odd_carries |= prev_iter_ends_odd_backslash;
|
||||||
|
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
||||||
|
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
||||||
|
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
||||||
|
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
||||||
|
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
||||||
|
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
||||||
|
uint64_t quote_bits =
|
||||||
|
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
|
||||||
|
quote_bits = quote_bits & ~odd_ends;
|
||||||
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||||
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
|
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
|
||||||
|
|
||||||
|
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
||||||
|
__m256i mask_70 =
|
||||||
|
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
|
||||||
|
// but moves any value >= 16 above 128
|
||||||
|
|
||||||
|
__m256i lut_cntrl = _mm256_setr_epi8(
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
|
||||||
|
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
|
||||||
|
|
||||||
|
__m256i tmp_ws_lo = _mm256_or_si256(
|
||||||
|
_mm256_cmpeq_epi8(mask_20, input_lo),
|
||||||
|
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
|
||||||
|
__m256i tmp_ws_hi = _mm256_or_si256(
|
||||||
|
_mm256_cmpeq_epi8(mask_20, input_hi),
|
||||||
|
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
||||||
|
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
|
||||||
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||||
|
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
||||||
|
whitespace &= ~quote_mask;
|
||||||
|
|
||||||
|
if (len - idx < 64) {
|
||||||
|
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
|
||||||
|
}
|
||||||
|
int mask1 = whitespace & 0xFFFF;
|
||||||
|
int mask2 = (whitespace >> 16) & 0xFFFF;
|
||||||
|
int mask3 = (whitespace >> 32) & 0xFFFF;
|
||||||
|
int mask4 = (whitespace >> 48) & 0xFFFF;
|
||||||
|
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
|
||||||
|
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
|
||||||
|
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
||||||
|
int pop4 = _popcnt64((~whitespace));
|
||||||
|
__m256i vmask1 =
|
||||||
|
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
|
||||||
|
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
|
||||||
|
__m256i vmask2 =
|
||||||
|
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
|
||||||
|
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
|
||||||
|
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
||||||
|
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
||||||
|
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
|
||||||
|
result1);
|
||||||
|
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
|
||||||
|
result2);
|
||||||
|
memcpy(out, buffer, pop4);
|
||||||
|
out += pop4;
|
||||||
|
}
|
||||||
|
return out - initout;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,65 @@
|
||||||
|
#include "jsonparser.h"
|
||||||
|
|
||||||
|
// allocate a ParsedJson structure that can support document
|
||||||
|
// up to len bytes.
|
||||||
|
// returns NULL if memory cannot be allocated
|
||||||
|
// This structure is meant to be reused from document to document, as needed.
|
||||||
|
// you can use deallocate_ParsedJson to deallocate the memory.
|
||||||
|
ParsedJson *allocate_ParsedJson(size_t len) {
|
||||||
|
if (len > 0xffffff) {
|
||||||
|
std::cerr << "Currently only support JSON files < 16MB, requested length: "
|
||||||
|
<< len << std::endl;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
ParsedJson *pj_ptr = new ParsedJson;
|
||||||
|
if (pj_ptr == NULL) {
|
||||||
|
std::cerr << "Could not allocate memory for core struct." << std::endl;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
ParsedJson &pj(*pj_ptr);
|
||||||
|
pj.bytecapacity = len;
|
||||||
|
if (posix_memalign((void **)&pj.structurals, 8, ROUNDUP_N(len, 64) / 8)) {
|
||||||
|
std::cerr << "Could not allocate memory for structurals" << std::endl;
|
||||||
|
delete pj_ptr;
|
||||||
|
return NULL;
|
||||||
|
};
|
||||||
|
pj.n_structural_indexes = 0;
|
||||||
|
u32 max_structures = ROUNDUP_N(len, 64) + 2 + 7;
|
||||||
|
pj.structural_indexes = new u32[max_structures];
|
||||||
|
if (pj.structural_indexes == NULL) {
|
||||||
|
std::cerr << "Could not allocate memory for structural_indexes"
|
||||||
|
<< std::endl;
|
||||||
|
delete[] pj.structurals;
|
||||||
|
delete pj_ptr;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return pj_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocate_ParsedJson(ParsedJson *pj_ptr) {
|
||||||
|
if (pj_ptr == NULL)
|
||||||
|
return;
|
||||||
|
delete[] pj_ptr->structural_indexes;
|
||||||
|
delete[] pj_ptr->structurals;
|
||||||
|
delete pj_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse a document found in buf, need to preallocate ParsedJson.
|
||||||
|
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
|
||||||
|
if (pj.bytecapacity < len) {
|
||||||
|
std::cerr << "Your ParsedJson cannot support documents that big: " << len
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool isok = find_structural_bits(buf, len, pj);
|
||||||
|
if (isok) {
|
||||||
|
isok = flatten_indexes(len, pj);
|
||||||
|
}
|
||||||
|
if (isok) {
|
||||||
|
isok = ape_machine(buf, len, pj);
|
||||||
|
}
|
||||||
|
if (isok) {
|
||||||
|
isok = shovel_machine(buf, len, pj);
|
||||||
|
}
|
||||||
|
return isok;
|
||||||
|
}
|
|
@ -1,196 +1,219 @@
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
/* Microsoft C/C++-compatible compiler */
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <immintrin.h>
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#include <assert.h>
|
#endif
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
#include "common_defs.h"
|
#include "common_defs.h"
|
||||||
#include "simdjson_internal.h"
|
#include "simdjson_internal.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
// a straightforward comparison of a mask against input. 5 uops; would be cheaper in AVX512.
|
// a straightforward comparison of a mask against input. 5 uops; would be
|
||||||
really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) {
|
// cheaper in AVX512.
|
||||||
m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi,
|
||||||
u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0);
|
m256 mask) {
|
||||||
m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||||
u64 res_1 = _mm256_movemask_epi8(cmp_res_1);
|
u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0);
|
||||||
return res_0 | (res_1 << 32);
|
m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||||
|
u64 res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||||
|
return res_0 | (res_1 << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*never_inline*/ bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
|
/*never_inline*/ bool find_structural_bits(const u8 *buf, size_t len,
|
||||||
if (len > 0xffffff) {
|
ParsedJson &pj) {
|
||||||
cerr << "Currently only support JSON files < 16MB\n";
|
if (len > 0xffffff) {
|
||||||
return false;
|
cerr << "Currently only support JSON files < 16MB\n";
|
||||||
}
|
return false;
|
||||||
// Useful constant masks
|
}
|
||||||
const u64 even_bits = 0x5555555555555555ULL;
|
// Useful constant masks
|
||||||
const u64 odd_bits = ~even_bits;
|
const u64 even_bits = 0x5555555555555555ULL;
|
||||||
|
const u64 odd_bits = ~even_bits;
|
||||||
|
|
||||||
// for now, just work in 64-byte chunks
|
// for now, just work in 64-byte chunks
|
||||||
// we have padded the input out to 64 byte multiple with the remainder being zeros
|
// we have padded the input out to 64 byte multiple with the remainder being
|
||||||
|
// zeros
|
||||||
|
|
||||||
// persistent state across loop
|
// persistent state across loop
|
||||||
u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value
|
u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value
|
||||||
u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
||||||
u64 prev_iter_ends_pseudo_pred = 0ULL;
|
u64 prev_iter_ends_pseudo_pred = 0ULL;
|
||||||
|
|
||||||
for (size_t idx = 0; idx < len; idx+=64) {
|
for (size_t idx = 0; idx < len; idx += 64) {
|
||||||
__builtin_prefetch(buf + idx + 128);
|
__builtin_prefetch(buf + idx + 128);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
cout << "Idx is " << idx << "\n";
|
cout << "Idx is " << idx << "\n";
|
||||||
for (u32 j = 0; j < 64; j++) {
|
for (u32 j = 0; j < 64; j++) {
|
||||||
char c = *(buf+idx+j);
|
char c = *(buf + idx + j);
|
||||||
if (isprint(c)) {
|
if (isprint(c)) {
|
||||||
cout << c;
|
cout << c;
|
||||||
} else {
|
} else {
|
||||||
cout << '_';
|
cout << '_';
|
||||||
}
|
}
|
||||||
}
|
|
||||||
cout << "| ... input\n";
|
|
||||||
#endif
|
|
||||||
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
|
|
||||||
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Step 1: detect odd sequences of backslashes
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
u64 bs_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
|
||||||
dumpbits(bs_bits, "backslash bits");
|
|
||||||
u64 start_edges = bs_bits & ~(bs_bits << 1);
|
|
||||||
dumpbits(start_edges, "start_edges");
|
|
||||||
|
|
||||||
// flip lowest if we have an odd-length run at the end of the prior iteration
|
|
||||||
u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
|
||||||
u64 even_starts = start_edges & even_start_mask;
|
|
||||||
u64 odd_starts = start_edges & ~even_start_mask;
|
|
||||||
|
|
||||||
dumpbits(even_starts, "even_starts");
|
|
||||||
dumpbits(odd_starts, "odd_starts");
|
|
||||||
|
|
||||||
u64 even_carries = bs_bits + even_starts;
|
|
||||||
|
|
||||||
u64 odd_carries;
|
|
||||||
// must record the carry-out of our odd-carries out of bit 63; this indicates whether the
|
|
||||||
// sense of any edge going to the next iteration should be flipped
|
|
||||||
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
|
|
||||||
|
|
||||||
odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end
|
|
||||||
// if we had an odd-numbered run at the end of
|
|
||||||
// the previous iteration
|
|
||||||
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
|
||||||
|
|
||||||
dumpbits(even_carries, "even_carries");
|
|
||||||
dumpbits(odd_carries, "odd_carries");
|
|
||||||
|
|
||||||
u64 even_carry_ends = even_carries & ~bs_bits;
|
|
||||||
u64 odd_carry_ends = odd_carries & ~bs_bits;
|
|
||||||
dumpbits(even_carry_ends, "even_carry_ends");
|
|
||||||
dumpbits(odd_carry_ends, "odd_carry_ends");
|
|
||||||
|
|
||||||
u64 even_start_odd_end = even_carry_ends & odd_bits;
|
|
||||||
u64 odd_start_even_end = odd_carry_ends & even_bits;
|
|
||||||
dumpbits(even_start_odd_end, "esoe");
|
|
||||||
dumpbits(odd_start_even_end, "osee");
|
|
||||||
|
|
||||||
u64 odd_ends = even_start_odd_end | odd_start_even_end;
|
|
||||||
dumpbits(odd_ends, "odd_ends");
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Step 2: detect insides of quote pairs
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
|
|
||||||
quote_bits = quote_bits & ~odd_ends;
|
|
||||||
dumpbits(quote_bits, "quote_bits");
|
|
||||||
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0ULL, quote_bits),
|
|
||||||
_mm_set1_epi8(0xFF), 0));
|
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
|
||||||
prev_iter_inside_quote = (u64)((s64)quote_mask>>63);
|
|
||||||
dumpbits(quote_mask, "quote_mask");
|
|
||||||
|
|
||||||
// How do we build up a user traversable data structure
|
|
||||||
// first, do a 'shufti' to detect structural JSON characters
|
|
||||||
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
|
||||||
// these go into the first 3 buckets of the comparison (1/2/4)
|
|
||||||
|
|
||||||
// we are also interested in the four whitespace characters
|
|
||||||
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
|
||||||
// these go into the next 2 buckets of the comparison (8/16)
|
|
||||||
const m256 low_nibble_mask = _mm256_setr_epi8(
|
|
||||||
// 0 9 a b c d
|
|
||||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0,
|
|
||||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0
|
|
||||||
);
|
|
||||||
const m256 high_nibble_mask = _mm256_setr_epi8(
|
|
||||||
// 0 2 3 5 7
|
|
||||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0,
|
|
||||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0
|
|
||||||
);
|
|
||||||
|
|
||||||
m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
|
|
||||||
m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
|
||||||
|
|
||||||
m256 v_lo = _mm256_and_si256(
|
|
||||||
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
|
||||||
_mm256_shuffle_epi8(high_nibble_mask,
|
|
||||||
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4), _mm256_set1_epi8(0x7f))));
|
|
||||||
|
|
||||||
m256 v_hi = _mm256_and_si256(
|
|
||||||
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
|
||||||
_mm256_shuffle_epi8(high_nibble_mask,
|
|
||||||
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4), _mm256_set1_epi8(0x7f))));
|
|
||||||
m256 tmp_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, structural_shufti_mask),
|
|
||||||
_mm256_set1_epi8(0));
|
|
||||||
m256 tmp_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, structural_shufti_mask),
|
|
||||||
_mm256_set1_epi8(0));
|
|
||||||
|
|
||||||
u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
|
|
||||||
u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
|
||||||
u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
|
||||||
|
|
||||||
// this additional mask and transfer is non-trivially expensive, unfortunately
|
|
||||||
m256 tmp_ws_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, whitespace_shufti_mask),
|
|
||||||
_mm256_set1_epi8(0));
|
|
||||||
m256 tmp_ws_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, whitespace_shufti_mask),
|
|
||||||
_mm256_set1_epi8(0));
|
|
||||||
|
|
||||||
u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
|
|
||||||
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
|
||||||
u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
|
||||||
|
|
||||||
dumpbits(structurals, "structurals");
|
|
||||||
dumpbits(whitespace, "whitespace");
|
|
||||||
|
|
||||||
// mask off anything inside quotes
|
|
||||||
structurals &= ~quote_mask;
|
|
||||||
|
|
||||||
// add the real quote bits back into our bitmask as well, so we can
|
|
||||||
// quickly traverse the strings we've spent all this trouble gathering
|
|
||||||
structurals |= quote_bits;
|
|
||||||
|
|
||||||
// Now, establish "pseudo-structural characters". These are non-whitespace characters
|
|
||||||
// that are (a) outside quotes and (b) have a predecessor that's either whitespace or a structural
|
|
||||||
// character. This means that subsequent passes will get a chance to encounter the first character
|
|
||||||
// of every string of non-whitespace and, if we're parsing an atom like true/false/null or a number
|
|
||||||
// we can stop at the first whitespace or structural character following it.
|
|
||||||
|
|
||||||
// a qualified predecessor is something that can happen 1 position before an
|
|
||||||
// psuedo-structural character
|
|
||||||
u64 pseudo_pred = structurals | whitespace;
|
|
||||||
dumpbits(pseudo_pred, "pseudo_pred");
|
|
||||||
u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
|
|
||||||
dumpbits(shifted_pseudo_pred, "shifted_pseudo_pred");
|
|
||||||
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
|
|
||||||
u64 pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask);
|
|
||||||
dumpbits(pseudo_structurals, "pseudo_structurals");
|
|
||||||
dumpbits(structurals, "final structurals without pseudos");
|
|
||||||
structurals |= pseudo_structurals;
|
|
||||||
dumpbits(structurals, "final structurals and pseudo structurals");
|
|
||||||
|
|
||||||
// now, we've used our close quotes all we need to. So let's switch them off
|
|
||||||
// they will be off in the quote mask and on in quote bits.
|
|
||||||
structurals &= ~(quote_bits & ~quote_mask);
|
|
||||||
dumpbits(structurals, "final structurals and pseudo structurals after close quote removal");
|
|
||||||
*(u64 *)(pj.structurals + idx/8) = structurals;
|
|
||||||
}
|
}
|
||||||
return true;
|
cout << "| ... input\n";
|
||||||
|
#endif
|
||||||
|
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
|
||||||
|
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Step 1: detect odd sequences of backslashes
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
u64 bs_bits =
|
||||||
|
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
||||||
|
dumpbits(bs_bits, "backslash bits");
|
||||||
|
u64 start_edges = bs_bits & ~(bs_bits << 1);
|
||||||
|
dumpbits(start_edges, "start_edges");
|
||||||
|
|
||||||
|
// flip lowest if we have an odd-length run at the end of the prior
|
||||||
|
// iteration
|
||||||
|
u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
||||||
|
u64 even_starts = start_edges & even_start_mask;
|
||||||
|
u64 odd_starts = start_edges & ~even_start_mask;
|
||||||
|
|
||||||
|
dumpbits(even_starts, "even_starts");
|
||||||
|
dumpbits(odd_starts, "odd_starts");
|
||||||
|
|
||||||
|
u64 even_carries = bs_bits + even_starts;
|
||||||
|
|
||||||
|
u64 odd_carries;
|
||||||
|
// must record the carry-out of our odd-carries out of bit 63; this
|
||||||
|
// indicates whether the sense of any edge going to the next iteration
|
||||||
|
// should be flipped
|
||||||
|
bool iter_ends_odd_backslash =
|
||||||
|
__builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
|
||||||
|
|
||||||
|
odd_carries |=
|
||||||
|
prev_iter_ends_odd_backslash; // push in bit zero as a potential end
|
||||||
|
// if we had an odd-numbered run at the
|
||||||
|
// end of the previous iteration
|
||||||
|
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
||||||
|
|
||||||
|
dumpbits(even_carries, "even_carries");
|
||||||
|
dumpbits(odd_carries, "odd_carries");
|
||||||
|
|
||||||
|
u64 even_carry_ends = even_carries & ~bs_bits;
|
||||||
|
u64 odd_carry_ends = odd_carries & ~bs_bits;
|
||||||
|
dumpbits(even_carry_ends, "even_carry_ends");
|
||||||
|
dumpbits(odd_carry_ends, "odd_carry_ends");
|
||||||
|
|
||||||
|
u64 even_start_odd_end = even_carry_ends & odd_bits;
|
||||||
|
u64 odd_start_even_end = odd_carry_ends & even_bits;
|
||||||
|
dumpbits(even_start_odd_end, "esoe");
|
||||||
|
dumpbits(odd_start_even_end, "osee");
|
||||||
|
|
||||||
|
u64 odd_ends = even_start_odd_end | odd_start_even_end;
|
||||||
|
dumpbits(odd_ends, "odd_ends");
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Step 2: detect insides of quote pairs
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
u64 quote_bits =
|
||||||
|
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
|
||||||
|
quote_bits = quote_bits & ~odd_ends;
|
||||||
|
dumpbits(quote_bits, "quote_bits");
|
||||||
|
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||||
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
|
prev_iter_inside_quote = (u64)((s64)quote_mask >> 63);
|
||||||
|
dumpbits(quote_mask, "quote_mask");
|
||||||
|
|
||||||
|
// How do we build up a user traversable data structure
|
||||||
|
// first, do a 'shufti' to detect structural JSON characters
|
||||||
|
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
||||||
|
// these go into the first 3 buckets of the comparison (1/2/4)
|
||||||
|
|
||||||
|
// we are also interested in the four whitespace characters
|
||||||
|
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
||||||
|
// these go into the next 2 buckets of the comparison (8/16)
|
||||||
|
const m256 low_nibble_mask = _mm256_setr_epi8(
|
||||||
|
// 0 9 a b c d
|
||||||
|
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||||
|
const m256 high_nibble_mask = _mm256_setr_epi8(
|
||||||
|
// 0 2 3 5 7
|
||||||
|
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
|
||||||
|
1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||||
|
|
||||||
|
m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
|
||||||
|
m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
||||||
|
|
||||||
|
m256 v_lo = _mm256_and_si256(
|
||||||
|
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
||||||
|
_mm256_shuffle_epi8(high_nibble_mask,
|
||||||
|
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
||||||
|
_mm256_set1_epi8(0x7f))));
|
||||||
|
|
||||||
|
m256 v_hi = _mm256_and_si256(
|
||||||
|
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
||||||
|
_mm256_shuffle_epi8(high_nibble_mask,
|
||||||
|
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
||||||
|
_mm256_set1_epi8(0x7f))));
|
||||||
|
m256 tmp_lo = _mm256_cmpeq_epi8(
|
||||||
|
_mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
m256 tmp_hi = _mm256_cmpeq_epi8(
|
||||||
|
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
|
u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
|
||||||
|
u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||||
|
u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||||
|
|
||||||
|
// this additional mask and transfer is non-trivially expensive,
|
||||||
|
// unfortunately
|
||||||
|
m256 tmp_ws_lo = _mm256_cmpeq_epi8(
|
||||||
|
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
m256 tmp_ws_hi = _mm256_cmpeq_epi8(
|
||||||
|
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
||||||
|
|
||||||
|
u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
|
||||||
|
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||||
|
u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||||
|
|
||||||
|
dumpbits(structurals, "structurals");
|
||||||
|
dumpbits(whitespace, "whitespace");
|
||||||
|
|
||||||
|
// mask off anything inside quotes
|
||||||
|
structurals &= ~quote_mask;
|
||||||
|
|
||||||
|
// add the real quote bits back into our bitmask as well, so we can
|
||||||
|
// quickly traverse the strings we've spent all this trouble gathering
|
||||||
|
structurals |= quote_bits;
|
||||||
|
|
||||||
|
// Now, establish "pseudo-structural characters". These are non-whitespace
|
||||||
|
// characters that are (a) outside quotes and (b) have a predecessor that's
|
||||||
|
// either whitespace or a structural character. This means that subsequent
|
||||||
|
// passes will get a chance to encounter the first character of every string
|
||||||
|
// of non-whitespace and, if we're parsing an atom like true/false/null or a
|
||||||
|
// number we can stop at the first whitespace or structural character
|
||||||
|
// following it.
|
||||||
|
|
||||||
|
// a qualified predecessor is something that can happen 1 position before an
|
||||||
|
// psuedo-structural character
|
||||||
|
u64 pseudo_pred = structurals | whitespace;
|
||||||
|
dumpbits(pseudo_pred, "pseudo_pred");
|
||||||
|
u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
|
||||||
|
dumpbits(shifted_pseudo_pred, "shifted_pseudo_pred");
|
||||||
|
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
|
||||||
|
u64 pseudo_structurals =
|
||||||
|
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
|
||||||
|
dumpbits(pseudo_structurals, "pseudo_structurals");
|
||||||
|
dumpbits(structurals, "final structurals without pseudos");
|
||||||
|
structurals |= pseudo_structurals;
|
||||||
|
dumpbits(structurals, "final structurals and pseudo structurals");
|
||||||
|
|
||||||
|
// now, we've used our close quotes all we need to. So let's switch them off
|
||||||
|
// they will be off in the quote mask and on in quote bits.
|
||||||
|
structurals &= ~(quote_bits & ~quote_mask);
|
||||||
|
dumpbits(
|
||||||
|
structurals,
|
||||||
|
"final structurals and pseudo structurals after close quote removal");
|
||||||
|
*(u64 *)(pj.structurals + idx / 8) = structurals;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
/* Microsoft C/C++-compatible compiler */
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <immintrin.h>
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#include <assert.h>
|
#endif
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
#include "common_defs.h"
|
#include "common_defs.h"
|
||||||
#include "simdjson_internal.h"
|
#include "simdjson_internal.h"
|
||||||
|
@ -8,8 +15,9 @@
|
||||||
#define NO_PDEP_WIDTH 8
|
#define NO_PDEP_WIDTH 8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SET_BIT(i) base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);\
|
#define SET_BIT(i) \
|
||||||
s = s & (s - 1);
|
base_ptr[base + i] = (u32)idx + __builtin_ctzll(s); \
|
||||||
|
s = s & (s - 1);
|
||||||
|
|
||||||
#define SET_BIT1 SET_BIT(0)
|
#define SET_BIT1 SET_BIT(0)
|
||||||
#define SET_BIT2 SET_BIT1 SET_BIT(1)
|
#define SET_BIT2 SET_BIT1 SET_BIT(1)
|
||||||
|
@ -28,76 +36,84 @@ s = s & (s - 1);
|
||||||
#define SET_BIT15 SET_BIT14 SET_BIT(14)
|
#define SET_BIT15 SET_BIT14 SET_BIT(14)
|
||||||
#define SET_BIT16 SET_BIT15 SET_BIT(15)
|
#define SET_BIT16 SET_BIT15 SET_BIT(15)
|
||||||
|
|
||||||
#define CALL(macro, ...) macro(__VA_ARGS__)
|
#define CALL(macro, ...) macro(__VA_ARGS__)
|
||||||
|
|
||||||
#define SET_BITLOOPN(n) SET_BIT##n
|
#define SET_BITLOOPN(n) SET_BIT##n
|
||||||
|
|
||||||
|
|
||||||
// just transform the bitmask to a big list of 32-bit integers for now
|
// just transform the bitmask to a big list of 32-bit integers for now
|
||||||
// that's all; the type of character the offset points to will
|
// that's all; the type of character the offset points to will
|
||||||
// tell us exactly what we need to know. Naive but straightforward implementation
|
// tell us exactly what we need to know. Naive but straightforward
|
||||||
bool flatten_indexes(size_t len, ParsedJson & pj) {
|
// implementation
|
||||||
u32 * base_ptr = pj.structural_indexes;
|
bool flatten_indexes(size_t len, ParsedJson &pj) {
|
||||||
u32 base = 0;
|
u32 *base_ptr = pj.structural_indexes;
|
||||||
|
u32 base = 0;
|
||||||
#ifdef BUILDHISTOGRAM
|
#ifdef BUILDHISTOGRAM
|
||||||
uint32_t counters [65];
|
uint32_t counters[65];
|
||||||
uint32_t total = 0;
|
uint32_t total = 0;
|
||||||
for(int k = 0; k < 66; k++) counters[k] = 0;
|
for (int k = 0; k < 66; k++)
|
||||||
for (size_t idx = 0; idx < len; idx+=64) {
|
counters[k] = 0;
|
||||||
u64 s = *(u64 *)(pj.structurals + idx/8);
|
for (size_t idx = 0; idx < len; idx += 64) {
|
||||||
u32 cnt = __builtin_popcountll(s);
|
u64 s = *(u64 *)(pj.structurals + idx / 8);
|
||||||
total ++;
|
u32 cnt = __builtin_popcountll(s);
|
||||||
counters[cnt]++;
|
total++;
|
||||||
}
|
counters[cnt]++;
|
||||||
printf("\n histogram:\n");
|
}
|
||||||
for(int k = 0; k < 66; k++) {
|
printf("\n histogram:\n");
|
||||||
if(counters[k]>0)printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
|
for (int k = 0; k < 66; k++) {
|
||||||
}
|
if (counters[k] > 0)
|
||||||
printf("\n\n");
|
printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
|
||||||
|
}
|
||||||
|
printf("\n\n");
|
||||||
#endif
|
#endif
|
||||||
for (size_t idx = 0; idx < len; idx+=64) {
|
for (size_t idx = 0; idx < len; idx += 64) {
|
||||||
u64 s = *(u64 *)(pj.structurals + idx/8);
|
u64 s = *(u64 *)(pj.structurals + idx / 8);
|
||||||
#ifdef SUPPRESS_CHEESY_FLATTEN
|
#ifdef SUPPRESS_CHEESY_FLATTEN
|
||||||
while (s) {
|
while (s) {
|
||||||
base_ptr[base++] = (u32)idx + __builtin_ctzll(s); s &= s - 1ULL;
|
base_ptr[base++] = (u32)idx + __builtin_ctzll(s);
|
||||||
}
|
s &= s - 1ULL;
|
||||||
#elif defined(NO_PDEP_PLEASE)
|
|
||||||
u32 cnt = __builtin_popcountll(s);
|
|
||||||
u32 next_base = base + cnt;
|
|
||||||
while (s) {
|
|
||||||
CALL(SET_BITLOOPN,NO_PDEP_WIDTH)
|
|
||||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
|
||||||
base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);
|
|
||||||
s = s & (s - 1);
|
|
||||||
}*/
|
|
||||||
base += NO_PDEP_WIDTH;
|
|
||||||
}
|
|
||||||
base = next_base;
|
|
||||||
#else
|
|
||||||
u32 cnt = __builtin_popcountll(s);
|
|
||||||
u32 next_base = base + cnt;
|
|
||||||
while (s) {
|
|
||||||
// spoil the suspense by reducing dependency chains; actually a win even with cost of pdep
|
|
||||||
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
|
|
||||||
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
|
|
||||||
|
|
||||||
base_ptr[base+0] = (u32)idx + __builtin_ctzll(s); u64 s1 = s & (s - 1ULL);
|
|
||||||
base_ptr[base+1] = (u32)idx + __builtin_ctzll(s1); u64 s2 = s1 & (s1 - 1ULL);
|
|
||||||
base_ptr[base+2] = (u32)idx + __builtin_ctzll(s2); //u64 s3 = s2 & (s2 - 1ULL);
|
|
||||||
base_ptr[base+3] = (u32)idx + __builtin_ctzll(s3); u64 s4 = s3 & (s3 - 1ULL);
|
|
||||||
|
|
||||||
base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL);
|
|
||||||
base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL);
|
|
||||||
s = s6;
|
|
||||||
base += 6;
|
|
||||||
}
|
|
||||||
base = next_base;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
pj.n_structural_indexes = base;
|
#elif defined(NO_PDEP_PLEASE)
|
||||||
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
|
u32 cnt = __builtin_popcountll(s);
|
||||||
return true;
|
u32 next_base = base + cnt;
|
||||||
|
while (s) {
|
||||||
|
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||||
|
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||||
|
base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);
|
||||||
|
s = s & (s - 1);
|
||||||
|
}*/
|
||||||
|
base += NO_PDEP_WIDTH;
|
||||||
|
}
|
||||||
|
base = next_base;
|
||||||
|
#else
|
||||||
|
u32 cnt = __builtin_popcountll(s);
|
||||||
|
u32 next_base = base + cnt;
|
||||||
|
while (s) {
|
||||||
|
// spoil the suspense by reducing dependency chains; actually a win even
|
||||||
|
// with cost of pdep
|
||||||
|
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
|
||||||
|
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
|
||||||
|
|
||||||
|
base_ptr[base + 0] = (u32)idx + __builtin_ctzll(s);
|
||||||
|
u64 s1 = s & (s - 1ULL);
|
||||||
|
base_ptr[base + 1] = (u32)idx + __builtin_ctzll(s1);
|
||||||
|
u64 s2 = s1 & (s1 - 1ULL);
|
||||||
|
base_ptr[base + 2] =
|
||||||
|
(u32)idx + __builtin_ctzll(s2); // u64 s3 = s2 & (s2 - 1ULL);
|
||||||
|
base_ptr[base + 3] = (u32)idx + __builtin_ctzll(s3);
|
||||||
|
u64 s4 = s3 & (s3 - 1ULL);
|
||||||
|
|
||||||
|
base_ptr[base + 4] =
|
||||||
|
(u32)idx + __builtin_ctzll(s4); // u64 s5 = s4 & (s4 - 1ULL);
|
||||||
|
base_ptr[base + 5] = (u32)idx + __builtin_ctzll(s5);
|
||||||
|
u64 s6 = s5 & (s5 - 1ULL);
|
||||||
|
s = s6;
|
||||||
|
base += 6;
|
||||||
|
}
|
||||||
|
base = next_base;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
pj.n_structural_indexes = base;
|
||||||
|
base_ptr[pj.n_structural_indexes] =
|
||||||
|
0; // make it safe to dereference one beyond this array
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
/* Microsoft C/C++-compatible compiler */
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <immintrin.h>
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#include <assert.h>
|
#endif
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "common_defs.h"
|
#include "common_defs.h"
|
||||||
|
@ -8,27 +15,30 @@
|
||||||
// the ape machine consists of two parts:
|
// the ape machine consists of two parts:
|
||||||
//
|
//
|
||||||
// 1) The "state machine", which is a multiple channel per-level state machine
|
// 1) The "state machine", which is a multiple channel per-level state machine
|
||||||
// It is a conventional DFA except in that it 'changes track' on {}[] characters
|
// It is a conventional DFA except in that it 'changes track' on {}[]
|
||||||
|
// characters
|
||||||
//
|
//
|
||||||
// 2) The "tape machine": this records offsets of various structures as they go by
|
// 2) The "tape machine": this records offsets of various structures as they go
|
||||||
// These structures are either u32 offsets of other tapes or u32 offsets into our input
|
// by
|
||||||
// or structures.
|
// These structures are either u32 offsets of other tapes or u32 offsets into
|
||||||
|
// our input or structures.
|
||||||
//
|
//
|
||||||
// The state machine doesn't record ouput.
|
// The state machine doesn't record ouput.
|
||||||
// The tape machine doesn't validate.
|
// The tape machine doesn't validate.
|
||||||
//
|
//
|
||||||
// The output of the tape machine is meaningful only if the state machine is in non-error states.
|
// The output of the tape machine is meaningful only if the state machine is in
|
||||||
|
// non-error states.
|
||||||
|
|
||||||
// depth adjustment is strictly based on whether we are {[ or }]
|
// depth adjustment is strictly based on whether we are {[ or }]
|
||||||
|
|
||||||
// depth adjustment is a pre-increment which, in effect, means that a {[ contained in an object
|
// depth adjustment is a pre-increment which, in effect, means that a {[
|
||||||
// is in the level one deeper, while the corresponding }] is at the level
|
// contained in an object is in the level one deeper, while the corresponding }]
|
||||||
|
// is at the level
|
||||||
|
|
||||||
// TAPE MACHINE DEFINITIONS
|
// TAPE MACHINE DEFINITIONS
|
||||||
|
|
||||||
const u32 DEPTH_PLUS_ONE = 0x01000000;
|
const u32 DEPTH_PLUS_ONE = 0x01000000;
|
||||||
const u32 DEPTH_ZERO = 0x00000000;
|
const u32 DEPTH_ZERO = 0x00000000;
|
||||||
const u32 DEPTH_MINUS_ONE = 0xff000000;
|
const u32 DEPTH_MINUS_ONE = 0xff000000;
|
||||||
const u32 WRITE_ZERO = 0x0;
|
const u32 WRITE_ZERO = 0x0;
|
||||||
const u32 WRITE_FOUR = 0x1;
|
const u32 WRITE_FOUR = 0x1;
|
||||||
|
@ -43,37 +53,44 @@ inline size_t get_write_size(u32 control) { return control & 0xff; }
|
||||||
|
|
||||||
const u32 char_control[256] = {
|
const u32 char_control[256] = {
|
||||||
// nothing interesting from 0x00-0x20
|
// nothing interesting from 0x00-0x20
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
|
CDF, CDF,
|
||||||
|
|
||||||
// " is 0x22, - is 0x2d
|
// " is 0x22, - is 0x2d
|
||||||
CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,C04,CDF,CDF,
|
CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF,
|
||||||
|
CDF,
|
||||||
|
|
||||||
// numbers are 0x30-0x39
|
// numbers are 0x30-0x39
|
||||||
C04,C04,C04,C04, C04,C04,C04,C04, C04,C04,CDF,CDF, CDF,CDF,CDF,CDF,
|
C04, C04, C04, C04, C04, C04, C04, C04, C04, C04, CDF, CDF, CDF, CDF, CDF,
|
||||||
|
CDF,
|
||||||
|
|
||||||
// nothing interesting from 0x40-0x49
|
// nothing interesting from 0x40-0x49
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
|
CDF,
|
||||||
|
|
||||||
// 0x5b/5d are []
|
// 0x5b/5d are []
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
|
||||||
|
CDF,
|
||||||
|
|
||||||
// f is 0x66 n is 0x6e
|
// f is 0x66 n is 0x6e
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04,
|
||||||
|
CDF,
|
||||||
|
|
||||||
// 0x7b/7d are {}, 74 is t
|
// 0x7b/7d are {}, 74 is t
|
||||||
CDF,CDF,CDF,CDF, C04,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
|
CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
|
||||||
|
CDF,
|
||||||
|
|
||||||
// nothing interesting from 0x80-0xff
|
// nothing interesting from 0x80-0xff
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||||
};
|
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF};
|
||||||
|
|
||||||
// all of this stuff needs to get moved somewhere reasonable
|
// all of this stuff needs to get moved somewhere reasonable
|
||||||
// like our ParsedJson structure
|
// like our ParsedJson structure
|
||||||
|
@ -88,7 +105,14 @@ u8 * current_number_buf_loc;
|
||||||
|
|
||||||
// STATE MACHINE DECLARATIONS
|
// STATE MACHINE DECLARATIONS
|
||||||
const u32 MAX_STATES = 16;
|
const u32 MAX_STATES = 16;
|
||||||
u32 trans[MAX_STATES][256];
|
|
||||||
|
/**
|
||||||
|
* It is annoying to have to call init_state_machine each time.
|
||||||
|
* Better to precompute the (small) result into a header file.
|
||||||
|
*/
|
||||||
|
// u32 trans[MAX_STATES][256];
|
||||||
|
#include "transitions.h"
|
||||||
|
|
||||||
u32 states[MAX_DEPTH];
|
u32 states[MAX_DEPTH];
|
||||||
const int START_STATE = 1;
|
const int START_STATE = 1;
|
||||||
|
|
||||||
|
@ -98,7 +122,8 @@ u32 valid_end_states[MAX_STATES] = {
|
||||||
1, // state 2: we've seen an { - if we left this level it's ok
|
1, // state 2: we've seen an { - if we left this level it's ok
|
||||||
0, // state 3 is abolished, we shouldn't be in it
|
0, // state 3 is abolished, we shouldn't be in it
|
||||||
|
|
||||||
0, // state 4 means we saw a string inside an object. We can't end like this!
|
0, // state 4 means we saw a string inside an object. We can't end like
|
||||||
|
// this!
|
||||||
0, // similarly state 5 means we saw a string followed by a colon.
|
0, // similarly state 5 means we saw a string followed by a colon.
|
||||||
0, // state 6 is abolished
|
0, // state 6 is abolished
|
||||||
1, // it's ok to finish on 7
|
1, // it's ok to finish on 7
|
||||||
|
@ -109,8 +134,10 @@ u32 valid_end_states[MAX_STATES] = {
|
||||||
1, // state 11 is ok to finish on, we just saw a unary inside a array
|
1, // state 11 is ok to finish on, we just saw a unary inside a array
|
||||||
|
|
||||||
0, // state 12 we've just seen a comma inside an array - can't finish
|
0, // state 12 we've just seen a comma inside an array - can't finish
|
||||||
0, // state 13 is our weird start state. I think we shouldn't end on it as we need to see something
|
0, // state 13 is our weird start state. I think we shouldn't end on it as
|
||||||
1, // state 14 is ok. Its an error to see something *more* here but not to be in this state
|
// we need to see something
|
||||||
|
1, // state 14 is ok. Its an error to see something *more* here but not to
|
||||||
|
// be in this state
|
||||||
0, // we don't use state 15
|
0, // we don't use state 15
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -129,182 +156,183 @@ const int START_DEPTH_START_STATE = 13;
|
||||||
const int ANYTHING_IS_ERROR_STATE = 14;
|
const int ANYTHING_IS_ERROR_STATE = 14;
|
||||||
|
|
||||||
void init_state_machine() {
|
void init_state_machine() {
|
||||||
// states 10 and 6 eliminated
|
// states 10 and 6 eliminated
|
||||||
|
|
||||||
trans[ 1]['{'] = 2;
|
trans[1][(int)'{'] = 2;
|
||||||
trans[ 2]['"'] = 4;
|
trans[2][(int)'"'] = 4;
|
||||||
trans[ 4][':'] = 5;
|
trans[4][(int)':'] = 5;
|
||||||
// 5->7 on all values ftn0123456789-"
|
// 5->7 on all values ftn0123456789-"
|
||||||
trans[ 7][','] = 8;
|
trans[7][(int)','] = 8;
|
||||||
trans[ 8]['"'] = 4;
|
trans[8][(int)'"'] = 4;
|
||||||
|
|
||||||
trans[ 1]['['] = 9;
|
trans[1][(int)'['] = 9;
|
||||||
// 9->11 on all values ftn0123456789-"
|
// 9->11 on all values ftn0123456789-"
|
||||||
trans[11][','] = 12;
|
trans[11][(int)','] = 12;
|
||||||
// 12->11 on all values ftn0123456789-"
|
// 12->11 on all values ftn0123456789-"
|
||||||
|
|
||||||
const char * UNARIES = "}]ftn0123456789-\"";
|
const char *UNARIES = "}]ftn0123456789-\"";
|
||||||
for (u32 i = 0; i < strlen(UNARIES); i++) {
|
for (u32 i = 0; i < strlen(UNARIES); i++) {
|
||||||
trans[ 5][(u32)UNARIES[i]] = 7;
|
trans[5][(u32)UNARIES[i]] = 7;
|
||||||
trans[ 9][(u32)UNARIES[i]] = 11;
|
trans[9][(u32)UNARIES[i]] = 11;
|
||||||
trans[12][(u32)UNARIES[i]] = 11;
|
trans[12][(u32)UNARIES[i]] = 11;
|
||||||
#ifdef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
|
#ifdef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
|
||||||
// NOTE: if we permit JSON documents that
|
// NOTE: if we permit JSON documents that
|
||||||
// contain a single number or string, then we
|
// contain a single number or string, then we
|
||||||
// allow all the unaries at the top level
|
// allow all the unaries at the top level
|
||||||
trans[13][(u32)UNARIES[i]] = 14;
|
trans[13][(u32)UNARIES[i]] = 14;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
|
|
||||||
// NOTE: if we don't permit JSON documents that
|
|
||||||
// that contain a single number or string, we must
|
|
||||||
// make sure we accept the top-level closing braces
|
|
||||||
// that are delivered to the start depth only
|
|
||||||
trans[13]['}'] = 14;
|
|
||||||
trans[13][']'] = 14;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// back transitions when new things are open
|
|
||||||
trans[2]['{'] = 2;
|
|
||||||
trans[7]['{'] = 2;
|
|
||||||
trans[9]['{'] = 2;
|
|
||||||
trans[11]['{'] = 2;
|
|
||||||
trans[2]['['] = 9;
|
|
||||||
trans[7]['['] = 9;
|
|
||||||
trans[9]['['] = 9;
|
|
||||||
trans[11]['['] = 9;
|
|
||||||
|
|
||||||
|
#ifndef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
|
||||||
|
// NOTE: if we don't permit JSON documents that
|
||||||
|
// that contain a single number or string, we must
|
||||||
|
// make sure we accept the top-level closing braces
|
||||||
|
// that are delivered to the start depth only
|
||||||
|
trans[13][(int)'}'] = 14;
|
||||||
|
trans[13][(int)']'] = 14;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// back transitions when new things are open
|
||||||
|
trans[2][(int)'{'] = 2;
|
||||||
|
trans[7][(int)'{'] = 2;
|
||||||
|
trans[9][(int)'{'] = 2;
|
||||||
|
trans[11][(int)'{'] = 2;
|
||||||
|
trans[2][(int)'['] = 9;
|
||||||
|
trans[7][(int)'['] = 9;
|
||||||
|
trans[9][(int)'['] = 9;
|
||||||
|
trans[11][(int)'['] = 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
|
bool ape_machine(const u8 *buf, UNUSED size_t len, ParsedJson &pj) {
|
||||||
|
|
||||||
// NOTE - our depth is used by both the tape machine and the state machine
|
// NOTE - our depth is used by both the tape machine and the state machine
|
||||||
// Further, in production we will set it to a largish value in a generous buffer as a rogue input
|
// Further, in production we will set it to a largish value in a generous
|
||||||
// could consist of many {[ characters or many }] characters. We aren't busily checking errors
|
// buffer as a rogue input could consist of many {[ characters or many }]
|
||||||
// (and in fact, a aggressive sequence of [ characters is actually valid input!) so something that
|
// characters. We aren't busily checking errors (and in fact, a aggressive
|
||||||
// blows out maximum depth will need to be periodically checked for, as will something that tries
|
// sequence of [ characters is actually valid input!) so something that blows
|
||||||
// to set depth very low. If we set our starting depth, say, to 256, we can tolerate 256 bogus close brace
|
// out maximum depth will need to be periodically checked for, as will
|
||||||
// characters without aggressively going wrong and writing to bad memory
|
// something that tries to set depth very low. If we set our starting depth,
|
||||||
// Note that any specious depth can have a specious tape associated with and all these specious depths
|
// say, to 256, we can tolerate 256 bogus close brace characters without
|
||||||
// can share a region of the tape - it's harmless. Since tape is one-way, any movement in a specious tape
|
// aggressively going wrong and writing to bad memory Note that any specious
|
||||||
// is an error (so we can detect max_depth violations by making sure that specious tape locations haven't
|
// depth can have a specious tape associated with and all these specious
|
||||||
// moved from their starting values)
|
// depths can share a region of the tape - it's harmless. Since tape is
|
||||||
|
// one-way, any movement in a specious tape is an error (so we can detect
|
||||||
|
// max_depth violations by making sure that specious tape locations haven't
|
||||||
|
// moved from their starting values)
|
||||||
|
|
||||||
u32 depth = START_DEPTH;
|
u32 depth = START_DEPTH;
|
||||||
|
|
||||||
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
||||||
pj.tape_locs[i] = i*MAX_TAPE_ENTRIES;
|
pj.tape_locs[i] = i * MAX_TAPE_ENTRIES;
|
||||||
if (i == START_DEPTH) {
|
if (i == START_DEPTH) {
|
||||||
states[i] = START_DEPTH_START_STATE;
|
states[i] = START_DEPTH_START_STATE;
|
||||||
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
|
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
|
||||||
states[i] = ANYTHING_IS_ERROR_STATE;
|
states[i] = ANYTHING_IS_ERROR_STATE;
|
||||||
} else {
|
} else {
|
||||||
states[i] = START_STATE;
|
states[i] = START_STATE;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pj.current_string_buf_loc = pj.string_buf;
|
||||||
|
pj.current_number_buf_loc = pj.number_buf;
|
||||||
|
|
||||||
|
u32 error_sump = 0;
|
||||||
|
u32 old_tape_loc = pj.tape_locs[depth]; // need to initialize for first write
|
||||||
|
|
||||||
|
u32 next_idx = pj.structural_indexes[0];
|
||||||
|
u8 next_c = buf[next_idx];
|
||||||
|
u32 next_control = char_control[next_c];
|
||||||
|
|
||||||
|
for (u32 i = 0; i < pj.n_structural_indexes; i++) {
|
||||||
|
|
||||||
|
// very periodic safety checking. This does NOT guarantee that we
|
||||||
|
// haven't been in our dangerous zones above or below our normal
|
||||||
|
// depths. It ONLY checks to be sure that we don't manage to leave
|
||||||
|
// these zones and write completely off our tape.
|
||||||
|
if (!(i % DEPTH_SAFETY_MARGIN)) {
|
||||||
|
if (depth < START_DEPTH || depth >= REDLINE_DEPTH) {
|
||||||
|
error_sump |= 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pj.current_string_buf_loc = pj.string_buf;
|
u32 idx = next_idx;
|
||||||
pj.current_number_buf_loc = pj.number_buf;
|
u8 c = next_c;
|
||||||
|
u32 control = next_control;
|
||||||
|
|
||||||
u32 error_sump = 0;
|
next_idx = pj.structural_indexes[i + 1];
|
||||||
u32 old_tape_loc = pj.tape_locs[depth]; // need to initialize for first write
|
next_c = buf[next_idx];
|
||||||
|
next_control = char_control[next_c];
|
||||||
|
|
||||||
u32 next_idx = pj.structural_indexes[0];
|
// TAPE MACHINE
|
||||||
u8 next_c = buf[next_idx];
|
s8 depth_adjust = get_depth_adjust(control);
|
||||||
u32 next_control = char_control[next_c];
|
u8 write_size = get_write_size(control);
|
||||||
|
u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx;
|
||||||
for (u32 i = 0; i < pj.n_structural_indexes; i++) {
|
depth += depth_adjust;
|
||||||
|
|
||||||
// very periodic safety checking. This does NOT guarantee that we
|
|
||||||
// haven't been in our dangerous zones above or below our normal
|
|
||||||
// depths. It ONLY checks to be sure that we don't manage to leave
|
|
||||||
// these zones and write completely off our tape.
|
|
||||||
if (!(i%DEPTH_SAFETY_MARGIN)) {
|
|
||||||
if (depth < START_DEPTH || depth >= REDLINE_DEPTH) {
|
|
||||||
error_sump |= 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 idx = next_idx;
|
|
||||||
u8 c = next_c;
|
|
||||||
u32 control = next_control;
|
|
||||||
|
|
||||||
next_idx = pj.structural_indexes[i+1];
|
|
||||||
next_c = buf[next_idx];
|
|
||||||
next_control = char_control[next_c];
|
|
||||||
|
|
||||||
// TAPE MACHINE
|
|
||||||
s8 depth_adjust = get_depth_adjust(control);
|
|
||||||
u8 write_size = get_write_size(control);
|
|
||||||
u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx;
|
|
||||||
depth += depth_adjust;
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
|
cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
|
||||||
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust
|
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust << " write_size "
|
||||||
<< " write_size " << (u32)write_size << " current_depth: " << depth << "\n";
|
<< (u32)write_size << " current_depth: " << depth << "\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// STATE MACHINE - hoisted here to fill in during the tape machine's latencies
|
// STATE MACHINE - hoisted here to fill in during the tape machine's
|
||||||
|
// latencies
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
|
cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
|
||||||
#endif
|
#endif
|
||||||
states[depth] = trans[states[depth]][c];
|
states[depth] = trans[states[depth]][c];
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
cout << "post " << states[depth] << "\n";
|
cout << "post " << states[depth] << "\n";
|
||||||
#endif
|
#endif
|
||||||
// TAPE MACHINE, again
|
// TAPE MACHINE, again
|
||||||
pj.tape[pj.tape_locs[depth]] = write_val | (((u64)c) << 56);
|
pj.tape[pj.tape_locs[depth]] = write_val | (((u64)c) << 56);
|
||||||
old_tape_loc = pj.tape_locs[depth] += write_size;
|
old_tape_loc = pj.tape_locs[depth] += write_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (depth != START_DEPTH) {
|
if (depth != START_DEPTH) {
|
||||||
// We haven't returned to our start depth, so our braces can't possibly match
|
// We haven't returned to our start depth, so our braces can't possibly
|
||||||
// Note this doesn't exclude the possibility that we have improperly matched { } or [] pairs
|
// match Note this doesn't exclude the possibility that we have improperly
|
||||||
return false;
|
// matched { } or [] pairs
|
||||||
}
|
return false;
|
||||||
|
}
|
||||||
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
|
||||||
if (!valid_end_states[states[i]]) {
|
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
||||||
#ifdef DEBUG
|
if (!valid_end_states[states[i]]) {
|
||||||
printf("Invalid ending state: states[%d] == %d\n", states[i]);
|
#ifdef DEBUG
|
||||||
#endif
|
printf("Invalid ending state: states[%d] == %d\n", states[i]);
|
||||||
return false;
|
#endif
|
||||||
}
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#define DUMP_TAPES
|
#define DUMP_TAPES
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
||||||
u32 start_loc = i*MAX_TAPE_ENTRIES;
|
u32 start_loc = i * MAX_TAPE_ENTRIES;
|
||||||
cout << " tape section i " << i;
|
cout << " tape section i " << i;
|
||||||
if (i == START_DEPTH) {
|
if (i == START_DEPTH) {
|
||||||
cout << " (START) ";
|
cout << " (START) ";
|
||||||
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
|
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
|
||||||
cout << " (REDLINE) ";
|
cout << " (REDLINE) ";
|
||||||
} else {
|
} else {
|
||||||
cout << " (NORMAL) ";
|
cout << " (NORMAL) ";
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << " from: " << start_loc
|
cout << " from: " << start_loc << " to: " << tape_locs[i] << " "
|
||||||
<< " to: " << tape_locs[i] << " "
|
<< " size: " << (tape_locs[i] - start_loc) << "\n";
|
||||||
<< " size: " << (tape_locs[i]-start_loc) << "\n";
|
cout << " state: " << states[i] << "\n";
|
||||||
cout << " state: " << states[i] << "\n";
|
|
||||||
#ifdef DUMP_TAPES
|
#ifdef DUMP_TAPES
|
||||||
for (u32 j = start_loc; j < tape_locs[i]; j++) {
|
for (u32 j = start_loc; j < tape_locs[i]; j++) {
|
||||||
if (tape[j]) {
|
if (tape[j]) {
|
||||||
cout << "j: " << j << " tape[j] char " << (char)(tape[j]>>56)
|
cout << "j: " << j << " tape[j] char " << (char)(tape[j] >> 56)
|
||||||
<< " tape[j][0..55]: " << (tape[j]&0xffffffffffffffULL ) << "\n";
|
<< " tape[j][0..55]: " << (tape[j] & 0xffffffffffffffULL) << "\n";
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (error_sump) {
|
}
|
||||||
return false;
|
#endif
|
||||||
}
|
if (error_sump) {
|
||||||
return true;
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -6,13 +6,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "common_defs.h"
|
#include "jsonparser.h"
|
||||||
#include "jsonioutil.h"
|
|
||||||
#include "simdjson_internal.h"
|
|
||||||
#include "stage1_find_marks.h"
|
|
||||||
#include "stage2_flatten.h"
|
|
||||||
#include "stage3_ape_machine.h"
|
|
||||||
#include "stage4_shovel_machine.h"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does the file filename ends with the given extension.
|
* Does the file filename ends with the given extension.
|
||||||
|
@ -29,7 +23,7 @@ bool startsWith(const char *pre, const char *str) {
|
||||||
|
|
||||||
bool validate(const char *dirname) {
|
bool validate(const char *dirname) {
|
||||||
bool everythingfine = true;
|
bool everythingfine = true;
|
||||||
init_state_machine(); // to be safe
|
// init_state_machine(); // no longer necessary
|
||||||
const char *extension = ".json";
|
const char *extension = ".json";
|
||||||
size_t dirlen = strlen(dirname);
|
size_t dirlen = strlen(dirname);
|
||||||
struct dirent **entry_list;
|
struct dirent **entry_list;
|
||||||
|
@ -58,26 +52,13 @@ bool validate(const char *dirname) {
|
||||||
}
|
}
|
||||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
||||||
// terrible hack but just to get it working
|
// terrible hack but just to get it working
|
||||||
ParsedJson *pj_ptr = new ParsedJson;
|
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
|
||||||
ParsedJson &pj(*pj_ptr);
|
if(pj_ptr == NULL) {
|
||||||
if (posix_memalign((void **)&pj.structurals, 8,
|
std::cerr<< "can't allocate memory"<<std::endl;
|
||||||
ROUNDUP_N(p.second, 64) / 8)) {
|
|
||||||
std::cerr << "Could not allocate memory" << std::endl;
|
|
||||||
return false;
|
return false;
|
||||||
};
|
|
||||||
pj.n_structural_indexes = 0;
|
|
||||||
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
|
||||||
pj.structural_indexes = new u32[max_structures];
|
|
||||||
bool isok = find_structural_bits(p.first, p.second, pj);
|
|
||||||
if (isok) {
|
|
||||||
isok = flatten_indexes(p.second, pj);
|
|
||||||
}
|
|
||||||
if (isok) {
|
|
||||||
isok = ape_machine(p.first, p.second, pj);
|
|
||||||
}
|
|
||||||
if (isok) {
|
|
||||||
isok = shovel_machine(p.first, p.second, pj);
|
|
||||||
}
|
}
|
||||||
|
ParsedJson &pj(*pj_ptr);
|
||||||
|
bool isok = json_parse(p.first, p.second, pj);
|
||||||
if (startsWith("pass", name)) {
|
if (startsWith("pass", name)) {
|
||||||
if (!isok) {
|
if (!isok) {
|
||||||
printf("warning: file %s should pass but it fails.\n", name);
|
printf("warning: file %s should pass but it fails.\n", name);
|
||||||
|
@ -92,10 +73,9 @@ bool validate(const char *dirname) {
|
||||||
printf("File %s %s.\n", name,
|
printf("File %s %s.\n", name,
|
||||||
isok ? " is valid JSON " : " is not valid JSON");
|
isok ? " is valid JSON " : " is not valid JSON");
|
||||||
}
|
}
|
||||||
free(pj.structurals);
|
|
||||||
free(p.first);
|
free(p.first);
|
||||||
delete[] pj.structural_indexes;
|
|
||||||
free(fullpath);
|
free(fullpath);
|
||||||
|
deallocate_ParsedJson(pj_ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < c; ++i)
|
for (int i = 0; i < c; ++i)
|
||||||
|
|
Loading…
Reference in New Issue