Major surgery.

This commit is contained in:
Daniel Lemire 2018-08-20 17:27:25 -04:00
parent 726eb5a030
commit fb65be64bb
26 changed files with 37172 additions and 1273 deletions

View File

@ -6,32 +6,48 @@
.PHONY: clean cleandist
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Iinclude/linux -Idependencies/double-conversion -Ldependencies/double-conversion/release
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux -Idependencies/double-conversion -Idependencies/rapidjson/include -Ldependencies/double-conversion/release
LIBFLAGS = -ldouble-conversion
EXECUTABLES=parse jsoncheck
HEADERS=include/common_defs.h include/jsonioutil.h include/linux/linux-perf-events.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h
LIBFILES=src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp
EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition
HEADERS=include/jsonparser.h include/common_defs.h include/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdprune_tables.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h include/jsonminifier.h
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp src/jsonminifier.cpp
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
LIBDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
RAPIDJSON_INCLUDE:=dependencies/rapidjson/include
LIBS=$(LIDDOUBLE)
LIBS=$(RAPIDJSON_INCLUDE) $(LIBDOUBLE)
all: $(LIBS) $(EXECUTABLES)
test: jsoncheck
./jsoncheck
$(LIDDOUBLE) : dependencies/double-conversion/README.md
$(RAPIDJSON_INCLUDE):
git submodule update --init --recursive
$(LIBDOUBLE) : dependencies/double-conversion/README.md
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS)
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp -I. $(LIBFLAGS)
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM

View File

@ -17,7 +17,7 @@ const char *unitname = "cycles";
: \
: /* no read only */ \
"%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
); \
); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)
@ -31,7 +31,7 @@ const char *unitname = "cycles";
: "=r"(cyc_high), "=r"(cyc_low) \
: /* no read only registers */ \
: "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
); \
); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)
@ -106,9 +106,9 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
float cycle_per_op = (min_diff) / (double)S; \
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
if (verbose) \
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
if (verbose) \
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
if (verbose) \
printf("\n"); \
if (!verbose) \
@ -117,7 +117,7 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
} while (0)
// like BEST_TIME, but no check
#define BEST_TIME_NOCHECK(test, pre, repeat, size, verbose) \
#define BEST_TIME_NOCHECK(test, pre, repeat, size, verbose) \
do { \
if (global_rdtsc_overhead == UINT64_MAX) { \
RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
@ -143,10 +143,10 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
float cycle_per_op = (min_diff) / (double)S; \
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
if (verbose) \
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
if (verbose) \
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
if (verbose) \
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
if (verbose) \
printf("\n"); \
if (!verbose) \
printf(" %.3f ", cycle_per_op); \

View File

@ -0,0 +1,88 @@
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
#pragma once
#ifdef __linux__
#include <asm/unistd.h> // for __NR_perf_event_open
#include <linux/perf_event.h> // for perf event constants
#include <sys/ioctl.h> // for ioctl
#include <unistd.h> // for syscall
#include <cerrno> // for errno
#include <cstring> // for memset
#include <stdexcept>
#include <vector>
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
int fd;
perf_event_attr attribs;
int num_events;
std::vector<uint64_t> temp_result_vec;
std::vector<uint64_t> ids;
public:
LinuxEvents(std::vector<int> config_vec) : fd(0) {
memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE;
attribs.size = sizeof(attribs);
attribs.disabled = 1;
attribs.exclude_kernel = 1;
attribs.exclude_hv = 1;
attribs.sample_period = 0;
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
const int pid = 0; // the current process
const int cpu = -1; // all CPUs
const unsigned long flags = 0;
int group = -1; // no group
num_events = config_vec.size();
u32 i = 0;
for (auto config : config_vec) {
attribs.config = config;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) {
report_error("perf_event_open");
}
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
if (group == -1) {
group = fd;
}
}
temp_result_vec.resize(num_events * 2 + 1);
}
~LinuxEvents() { close(fd); }
really_inline void start() {
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_RESET)");
}
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
really_inline void end(std::vector<unsigned long long> &results) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
}
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
report_error("read");
}
// our actual results are in slots 1,3,5, ... of this structure
// we really should be checking our ids obtained earlier to be safe
for (u32 i = 1; i < temp_result_vec.size(); i += 2) {
results[i / 2] = temp_result_vec[i];
}
}
private:
void report_error(const std::string &context) {
throw std::runtime_error(context + ": " + std::string(strerror(errno)));
}
};
#endif

View File

@ -1,27 +1,26 @@
#include "common_defs.h"
#include "double-conversion/double-conversion.h"
#include "linux-perf-events.h"
#include <algorithm>
#include <assert.h>
#include <chrono>
#include <cstring>
#include <dirent.h>
#include <fstream>
#include <inttypes.h>
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <iostream>
#include <iomanip>
#include <chrono>
#include <fstream>
#include <sstream>
#include <string>
#include <cstring>
#include <unistd.h>
#include <vector>
#include <set>
#include <map>
#include <algorithm>
#include <x86intrin.h>
#include <assert.h>
#include "double-conversion/double-conversion.h"
#include "common_defs.h"
#include "linux-perf-events.h"
/// Fixme: enable doube conv
// #define DOUBLECONV
@ -34,197 +33,230 @@ using namespace double_conversion;
//#define DEBUG
#include "jsonioutil.h"
#include "simdjson_internal.h"
#include "stage1_find_marks.h"
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
#include "jsonioutil.h"
using namespace std;
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
namespace Color {
enum Code {
FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32,
FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36,
FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91,
FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94,
FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97,
BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49
};
class Modifier {
Code code;
public:
Modifier(Code pCode) : code(pCode) {}
friend std::ostream&
operator<<(std::ostream& os, const Modifier& mod) {
return os << "\033[" << mod.code << "m";
}
};
enum Code {
FG_DEFAULT = 39,
FG_BLACK = 30,
FG_RED = 31,
FG_GREEN = 32,
FG_YELLOW = 33,
FG_BLUE = 34,
FG_MAGENTA = 35,
FG_CYAN = 36,
FG_LIGHT_GRAY = 37,
FG_DARK_GRAY = 90,
FG_LIGHT_RED = 91,
FG_LIGHT_GREEN = 92,
FG_LIGHT_YELLOW = 93,
FG_LIGHT_BLUE = 94,
FG_LIGHT_MAGENTA = 95,
FG_LIGHT_CYAN = 96,
FG_WHITE = 97,
BG_RED = 41,
BG_GREEN = 42,
BG_BLUE = 44,
BG_DEFAULT = 49
};
class Modifier {
Code code;
public:
Modifier(Code pCode) : code(pCode) {}
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
return os << "\033[" << mod.code << "m";
}
};
} // namespace Color
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
Color::Modifier greenfg(Color::FG_GREEN);
Color::Modifier yellowfg(Color::FG_YELLOW);
Color::Modifier deffg(Color::FG_DEFAULT);
size_t i = 0;
// skip initial fluff
while ((i + 1 < pj.n_structural_indexes) &&
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
i++;
}
for (; i < pj.n_structural_indexes; i++) {
u32 idx = pj.structural_indexes[i];
u8 c = buf[idx];
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
std::cout << greenfg << buf[idx] << deffg;
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
std::cout << greenfg << buf[idx] << deffg;
} else {
std::cout << yellowfg << buf[idx] << deffg;
}
if (i + 1 < pj.n_structural_indexes) {
u32 nextidx = pj.structural_indexes[i + 1];
for (u32 pos = idx + 1; pos < nextidx; pos++) {
std::cout << buf[pos];
}
}
}
std::cout << std::endl;
}
void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
Color::Modifier greenfg(Color::FG_GREEN);
Color::Modifier yellowfg(Color::FG_YELLOW);
Color::Modifier deffg(Color::FG_DEFAULT);
size_t i = 0;
// skip initial fluff
while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){
i++;
}
for (; i < pj.n_structural_indexes; i++) {
u32 idx = pj.structural_indexes[i];
u8 c = buf[idx];
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
std::cout << greenfg << buf[idx] << deffg;
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
std::cout << greenfg << buf[idx] << deffg;
} else {
std::cout << yellowfg << buf[idx] << deffg;
}
if(i + 1 < pj.n_structural_indexes) {
u32 nextidx = pj.structural_indexes[i + 1];
for(u32 pos = idx + 1 ; pos < nextidx; pos++) {
std::cout << buf[pos];
}
}
}
std::cout << std::endl;
}
int main(int argc, char *argv[]) {
if (argc != 2) {
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1);
}
pair<u8 *, size_t> p = get_corpus(argv[1]);
ParsedJson *pj_ptr = new ParsedJson;
ParsedJson &pj(*pj_ptr);
if (posix_memalign((void **)&pj.structurals, 8,
ROUNDUP_N(p.second, 64) / 8)) {
cerr << "Could not allocate memory" << endl;
exit(1);
};
if (p.second > 0xffffff) {
cerr << "Currently only support JSON files < 16MB\n";
exit(1);
}
init_state_machine();
int main(int argc, char * argv[]) {
if (argc != 2) {
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1);
}
pair<u8 *, size_t> p = get_corpus(argv[1]);
ParsedJson * pj_ptr = new ParsedJson;
ParsedJson & pj(*pj_ptr);
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
cerr << "Could not allocate memory" << endl;
exit(1);
};
if (p.second > 0xffffff) {
cerr << "Currently only support JSON files < 16MB\n";
exit(1);
}
init_state_machine();
pj.n_structural_indexes = 0;
// we have potentially 1 structure per byte of input
// as well as a dummy structure and a root structure
// we also potentially write up to 7 iterations beyond
// in our 'cheesy flatten', so make some worst-case
// space for that too
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
pj.n_structural_indexes = 0;
// we have potentially 1 structure per byte of input
// as well as a dummy structure and a root structure
// we also potentially write up to 7 iterations beyond
// in our 'cheesy flatten', so make some worst-case
// space for that too
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
#if defined(DEBUG)
const u32 iterations = 1;
const u32 iterations = 1;
#else
const u32 iterations = 1000;
const u32 iterations = 1000;
#endif
vector<double> res;
res.resize(iterations);
vector<double> res;
res.resize(iterations);
#if !defined(__linux__)
#define SQUASH_COUNTERS
#endif
#ifndef SQUASH_COUNTERS
vector<int> evts;
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
vector<u64> results;
results.resize(evts.size());
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
vector<int> evts;
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
vector<u64> results;
results.resize(evts.size());
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
#endif
bool isok = true;
for (u32 i = 0; i < iterations; i++) {
auto start = std::chrono::steady_clock::now();
bool isok = true;
for (u32 i = 0; i < iterations; i++) {
auto start = std::chrono::steady_clock::now();
#ifndef SQUASH_COUNTERS
unified.start();
unified.start();
#endif
isok = find_structural_bits(p.first, p.second, pj);
isok = find_structural_bits(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy1 += results[0]; cl1 += results[1];
if(! isok ) break;
unified.start();
unified.end(results);
cy1 += results[0];
cl1 += results[1];
if (!isok)
break;
unified.start();
#endif
isok = flatten_indexes(p.second, pj);
isok = flatten_indexes(p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy2 += results[0]; cl2 += results[1];
if(! isok ) break;
unified.start();
unified.end(results);
cy2 += results[0];
cl2 += results[1];
if (!isok)
break;
unified.start();
#endif
isok = ape_machine(p.first, p.second, pj);
isok = ape_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy3 += results[0]; cl3 += results[1];
if(! isok ) break;
unified.start();
unified.end(results);
cy3 += results[0];
cl3 += results[1];
if (!isok)
break;
unified.start();
#endif
isok = shovel_machine(p.first, p.second, pj);
isok = shovel_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy4 += results[0]; cl4 += results[1];
unified.end(results);
cy4 += results[0];
cl4 += results[1];
#endif
if(! isok ) break;
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;
res[i] = secs.count();
}
if (!isok)
break;
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;
res[i] = secs.count();
}
#ifndef SQUASH_COUNTERS
printf("number of bytes %ld number of structural chars %d ratio %.3f\n", p.second, pj.n_structural_indexes,
(double) pj.n_structural_indexes / p.second);
unsigned long total = cy1 + cy2 + cy3 + cy4;
printf("number of bytes %ld number of structural chars %d ratio %.3f\n",
p.second, pj.n_structural_indexes,
(double)pj.n_structural_indexes / p.second);
unsigned long total = cy1 + cy2 + cy3 + cy4;
printf("stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl1, cy1, 100. * cy1 / total, (double) cl1 / cy1);
printf(" stage 1 runs at %.2f cycles per input byte.\n", (double) cy1 / (iterations * p.second));
printf(
"stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl1, cy1, 100. * cy1 / total, (double)cl1 / cy1);
printf(" stage 1 runs at %.2f cycles per input byte.\n",
(double)cy1 / (iterations * p.second));
printf("stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl2, cy2, 100. * cy2 / total, (double) cl2 / cy2);
printf(" stage 2 runs at %.2f cycles per input byte and ", (double) cy2 / (iterations * p.second));
printf("%.2f cycles per structural character.\n", (double) cy2 / (iterations * pj.n_structural_indexes));
printf(
"stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl2, cy2, 100. * cy2 / total, (double)cl2 / cy2);
printf(" stage 2 runs at %.2f cycles per input byte and ",
(double)cy2 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy2 / (iterations * pj.n_structural_indexes));
printf("stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl3, cy3, 100. * cy3 / total, (double) cl3 / cy3);
printf(" stage 3 runs at %.2f cycles per input byte and ", (double) cy3 / (iterations * p.second));
printf("%.2f cycles per structural character.\n", (double) cy3 / (iterations * pj.n_structural_indexes));
printf(
"stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl3, cy3, 100. * cy3 / total, (double)cl3 / cy3);
printf(" stage 3 runs at %.2f cycles per input byte and ",
(double)cy3 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy3 / (iterations * pj.n_structural_indexes));
printf("stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl4, cy4, 100. * cy4 / total, (double) cl4 / cy4);
printf(" stage 4 runs at %.2f cycles per input byte and ", (double) cy4 / (iterations * p.second));
printf("%.2f cycles per structural character.\n", (double) cy4 / (iterations * pj.n_structural_indexes));
printf(
"stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
cl4, cy4, 100. * cy4 / total, (double)cl4 / cy4);
printf(" stage 4 runs at %.2f cycles per input byte and ",
(double)cy4 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy4 / (iterations * pj.n_structural_indexes));
printf(" all stages: %.2f cycles per input byte.\n", (double) total / (iterations * p.second));
printf(" all stages: %.2f cycles per input byte.\n",
(double)total / (iterations * p.second));
#endif
// colorfuldisplay(pj, p.first);
double min_result = *min_element(res.begin(), res.end());
cout << "Min: " << min_result << " bytes read: " << p.second << " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0) << "\n";
// colorfuldisplay(pj, p.first);
double min_result = *min_element(res.begin(), res.end());
cout << "Min: " << min_result << " bytes read: " << p.second
<< " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0)
<< "\n";
free(pj.structurals);
free(p.first);
delete[] pj.structural_indexes;
delete pj_ptr;
if(! isok ) {
printf(" Parsing failed. \n ");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
free(pj.structurals);
free(p.first);
delete[] pj.structural_indexes;
delete pj_ptr;
if (!isok) {
printf(" Parsing failed. \n ");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View File

@ -1,5 +1,7 @@
#pragma once
#include <cassert>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
@ -9,47 +11,49 @@ typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
#include <x86intrin.h>
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#endif
typedef __m128i m128;
typedef __m256i m256;
// Snippets from Hyperscan
// Align to N-byte boundary
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
#define really_inline inline __attribute__ ((always_inline, unused))
#define never_inline inline __attribute__ ((noinline, unused))
#define really_inline inline __attribute__((always_inline, unused))
#define never_inline inline __attribute__((noinline, unused))
#define UNUSED __attribute__ ((unused))
#define UNUSED __attribute__((unused))
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
static inline
u32 ctz64(u64 x) {
assert(x); // behaviour not defined for x == 0
static inline u32 ctz64(u64 x) {
assert(x); // behaviour not defined for x == 0
#if defined(_WIN64)
unsigned long r;
_BitScanForward64(&r, x);
return r;
unsigned long r;
_BitScanForward64(&r, x);
return r;
#elif defined(_WIN32)
unsigned long r;
if (_BitScanForward(&r, (u32)x)) {
return (u32)r;
}
_BitScanForward(&r, x >> 32);
return (u32)(r + 32);
unsigned long r;
if (_BitScanForward(&r, (u32)x)) {
return (u32)r;
}
_BitScanForward(&r, x >> 32);
return (u32)(r + 32);
#else
return (u32)__builtin_ctzll(x);
return (u32)__builtin_ctzll(x);
#endif
}

View File

@ -7,25 +7,10 @@
#include <sstream>
#include <string>
#include "common_defs.h"
// get a corpus; pad out to cache line so we can always use SIMD
// throws exceptions in case of failure
std::pair<u8 *, size_t> get_corpus(std::string filename) {
std::ifstream is(filename, std::ios::binary);
if (is) {
std::stringstream buffer;
buffer << is.rdbuf();
size_t length = buffer.str().size();
char *aligned_buffer;
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
throw std::runtime_error("Could not allocate sufficient memory");
};
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
memcpy(aligned_buffer, buffer.str().c_str(), length);
is.close();
return std::make_pair((u8 *)aligned_buffer, length);
}
throw std::runtime_error("could not load corpus");
return std::make_pair((u8 *)0, (size_t)0);
}
std::pair<u8 *, size_t> get_corpus(std::string filename);
#endif

8
include/jsonminifier.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include <cstddef>
#include <cstdint>
// Take input from buf and remove useless whitespace, write it to out; buf and
// out can be the same pointer.
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out);

23
include/jsonparser.h Normal file
View File

@ -0,0 +1,23 @@
#pragma once
#include "common_defs.h"
#include "jsonioutil.h"
#include "simdjson_internal.h"
#include "stage1_find_marks.h"
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
// Allocate a ParsedJson structure that can support document
// up to len bytes.
// Return NULL if memory cannot be allocated.
// This structure is meant to be reused from document to document, as needed.
// you can use deallocate_ParsedJson to deallocate the memory.
ParsedJson *allocate_ParsedJson(size_t len);
// deallocate a ParsedJson struct (see allocate_ParsedJson)
void deallocate_ParsedJson(ParsedJson *pj_ptr);
// Parse a document found in buf, need to preallocate ParsedJson.
// Return false in case of a failure.
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);

View File

@ -1,93 +0,0 @@
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
#pragma once
#ifdef __linux__
#include <unistd.h> // for syscall
#include <sys/ioctl.h> // for ioctl
#include <asm/unistd.h> // for __NR_perf_event_open
#include <linux/perf_event.h> // for perf event constants
#include <cerrno> // for errno
#include <cstring> // for memset
#include <stdexcept>
#include <vector>
template <int TYPE = PERF_TYPE_HARDWARE>
class LinuxEvents {
int fd;
perf_event_attr attribs;
int num_events;
std::vector<uint64_t> temp_result_vec;
std::vector<uint64_t> ids;
public:
LinuxEvents(std::vector<int> config_vec) : fd(0) {
memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE;
attribs.size = sizeof(attribs);
attribs.disabled = 1;
attribs.exclude_kernel = 1;
attribs.exclude_hv = 1;
attribs.sample_period = 0;
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
const int pid = 0; // the current process
const int cpu = -1; // all CPUs
const unsigned long flags = 0;
int group = -1; // no group
num_events = config_vec.size();
u32 i = 0;
for (auto config: config_vec) {
attribs.config = config;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) {
report_error("perf_event_open");
}
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
if (group == -1) {
group = fd;
}
}
temp_result_vec.resize(num_events*2 + 1);
}
~LinuxEvents() {
close(fd);
}
really_inline void start() {
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_RESET)");
}
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
really_inline void end(std::vector<unsigned long long> & results) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
}
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
report_error("read");
}
// our actual results are in slots 1,3,5, ... of this structure
// we really should be checking our ids obtained earlier to be safe
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
results[i/2] = temp_result_vec[i];
}
}
private:
void report_error(const std::string& context) {
throw std::runtime_error(context + ": " + std::string(strerror(errno)));
}
};
#endif

View File

@ -1,32 +1,39 @@
#pragma once
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#endif
#include <iostream>
const u32 MAX_DEPTH = 256;
const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this with a modulo in our
// hot stage 3 loop
const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this
// with a modulo in our hot stage 3 loop
const u32 START_DEPTH = DEPTH_SAFETY_MARGIN;
const u32 REDLINE_DEPTH = MAX_DEPTH - DEPTH_SAFETY_MARGIN;
const size_t MAX_TAPE_ENTRIES = 127*1024;
const size_t MAX_TAPE_ENTRIES = 127 * 1024;
const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
struct ParsedJson {
u8 * structurals;
u32 n_structural_indexes;
u32 * structural_indexes;
size_t bytecapacity; // indicates how many bits are meant to be supported by
// structurals
u8 *structurals;
u32 n_structural_indexes;
u32 *structural_indexes;
// grossly overprovisioned
u64 tape[MAX_TAPE];
u32 tape_locs[MAX_DEPTH];
u8 string_buf[512*1024];
u8 * current_string_buf_loc;
u8 number_buf[512*1024]; // holds either doubles or longs, really
u8 * current_number_buf_loc;
// grossly overprovisioned
u64 tape[MAX_TAPE];
u32 tape_locs[MAX_DEPTH];
u8 string_buf[512 * 1024];
u8 *current_string_buf_loc;
u8 number_buf[512 * 1024]; // holds either doubles or longs, really
u8 *current_number_buf_loc;
};
// all of this stuff needs to get moved somewhere reasonable
// like our ParsedJson structure
/*
@ -38,37 +45,36 @@ extern u8 number_buf[512*1024]; // holds either doubles or longs, really
extern u8 * current_number_buf_loc;
*/
#ifdef DEBUG
inline void dump256(m256 d, string msg) {
for (u32 i = 0; i < 32; i++) {
std::cout << setw(3) << (int)*(((u8 *)(&d)) + i);
if (!((i+1)%8))
std::cout << "|";
else if (!((i+1)%4))
std::cout << ":";
else
std::cout << " ";
}
std::cout << " " << msg << "\n";
for (u32 i = 0; i < 32; i++) {
std::cout << setw(3) << (int)*(((u8 *)(&d)) + i);
if (!((i + 1) % 8))
std::cout << "|";
else if (!((i + 1) % 4))
std::cout << ":";
else
std::cout << " ";
}
std::cout << " " << msg << "\n";
}
// dump bits low to high
void dumpbits(u64 v, string msg) {
for (u32 i = 0; i < 64; i++) {
std::cout << (((v>>(u64)i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg << "\n";
for (u32 i = 0; i < 64; i++) {
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg << "\n";
}
void dumpbits32(u32 v, string msg) {
for (u32 i = 0; i < 32; i++) {
std::cout << (((v>>(u32)i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg << "\n";
for (u32 i = 0; i < 32; i++) {
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg << "\n";
}
#else
#define dump256(a,b) ;
#define dumpbits(a,b) ;
#define dumpbits32(a,b) ;
#define dump256(a, b) ;
#define dumpbits(a, b) ;
#define dumpbits32(a, b) ;
#endif

35179
include/simdprune_tables.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,4 +3,4 @@
#include "common_defs.h"
#include "simdjson_internal.h"
bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj);
bool find_structural_bits(const u8 *buf, size_t len, ParsedJson &pj);

View File

@ -3,5 +3,4 @@
#include "common_defs.h"
#include "simdjson_internal.h"
bool flatten_indexes(size_t len, ParsedJson & pj);
bool flatten_indexes(size_t len, ParsedJson &pj);

View File

@ -4,4 +4,4 @@
#include "simdjson_internal.h"
void init_state_machine();
bool ape_machine(const u8 * buf, size_t len, ParsedJson & pj);
bool ape_machine(const u8 *buf, size_t len, ParsedJson &pj);

View File

@ -3,5 +3,4 @@
#include "common_defs.h"
#include "simdjson_internal.h"
bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj);
bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj);

182
include/transitions.h Normal file
View File

@ -0,0 +1,182 @@
// automatically generated by generatetransitions.cpp
u32 trans[MAX_STATES][256] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0,
0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 11, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 11, 0, 0, 0, 0, 0, 0,
0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0,
0, 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 11, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0,
0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0,
0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};

View File

@ -0,0 +1,10 @@
all:../../include/transitions.h
../../include/transitions.h: generatetransitions
./generatetransitions > ../../include/transitions.h
generatetransitions: generatetransitions.cpp
$(CXX) -o generatetransitions generatetransitions.cpp -I../../include
clean:
rm -f generatetransitions

View File

@ -0,0 +1,20 @@
#include "../src/stage3_ape_machine.cpp"
int main() {
init_state_machine();
std::cout << "// automatically generated by generatetransitions.cpp"
<< std::endl;
std::cout << " u32 trans[MAX_STATES][256] = {" << std::endl;
for (int k = 0; k < MAX_STATES; k++) {
std::cout << "{";
for (int z = 0; z < 255; z++) {
std::cout << trans[k][z] << ",";
}
std::cout << trans[k][255];
std::cout << "}";
if (k + 1 < MAX_STATES)
std::cout << ",";
std::cout << std::endl;
}
std::cout << "};" << std::endl;
}

20
src/jsonioutil.cpp Normal file
View File

@ -0,0 +1,20 @@
#include "jsonioutil.h"
std::pair<u8 *, size_t> get_corpus(std::string filename) {
std::ifstream is(filename, std::ios::binary);
if (is) {
std::stringstream buffer;
buffer << is.rdbuf();
size_t length = buffer.str().size();
char *aligned_buffer;
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
throw std::runtime_error("Could not allocate sufficient memory");
};
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
memcpy(aligned_buffer, buffer.str().c_str(), length);
is.close();
return std::make_pair((u8 *)aligned_buffer, length);
}
throw std::runtime_error("could not load corpus");
return std::make_pair((u8 *)0, (size_t)0);
}

275
src/jsonminifier.cpp Normal file
View File

@ -0,0 +1,275 @@
#ifndef __AVX2__
#include <cstdint>
static uint8_t jump_table[256 * 3] = {
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
};
size_t jsonminify(const unsigned char *bytes, size_t howmany,
unsigned char *out) {
size_t i = 0, pos = 0;
uint8_t quote = 0;
uint8_t nonescape = 1;
while (i < howmany) {
unsigned char c = bytes[i];
uint8_t *meta = jump_table + 3 * c;
quote = quote ^ (meta[0] & nonescape);
out[pos] = c;
pos += meta[2] | quote;
i += 1;
nonescape = (~nonescape) | (meta[1]);
}
return pos;
}
#else
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#endif // _MSC_VER
#include "simdprune_tables.h"
#include <cstring>
#ifndef __clang__
static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi,
__m128i const *__addr_lo) {
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
}
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
__m256i __a) {
__m128i __v128;
__v128 = _mm256_castsi256_si128(__a);
_mm_storeu_si128(__addr_lo, __v128);
__v128 = _mm256_extractf128_si256(__a, 1);
_mm_storeu_si128(__addr_hi, __v128);
}
#endif
// a straightforward comparison of a mask against input.
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
}
// take input from buf and remove useless whitespace, input and output can be
// the same
size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
// Useful constant masks
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint8_t *initout(out);
uint64_t prev_iter_ends_odd_backslash =
0ULL; // either 0 or 1, but a 64-bit value
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
size_t idx = 0;
if (len >= 64) {
size_t avxlen = len - 63;
for (; idx < avxlen; idx += 64) {
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buf + idx + 0));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buf + idx + 32));
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
const __m256i low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
const __m256i high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
1, 0, 0, 0, 3, 2, 1, 0, 0);
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
__m256i v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
_mm256_set1_epi8(0x7f))));
__m256i v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
_mm256_set1_epi8(0x7f))));
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
result2);
out += pop4;
}
}
// we finish off the job... copying and pasting the code is not ideal here,
// but it gets the job done.
if (idx < len) {
uint8_t buffer[64];
memset(buffer, 0, 64);
memcpy(buffer, buf + idx, len - idx);
__m256i input_lo = _mm256_loadu_si256((const __m256i *)(buffer));
__m256i input_hi = _mm256_loadu_si256((const __m256i *)(buffer + 32));
uint64_t bs_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(
bs_bits, odd_starts, (unsigned long long *)&odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
__m256i mask_70 =
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
// but moves any value >= 16 above 128
__m256i lut_cntrl = _mm256_setr_epi8(
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
__m256i tmp_ws_lo = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_lo),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
__m256i tmp_ws_hi = _mm256_or_si256(
_mm256_cmpeq_epi8(mask_20, input_hi),
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
if (len - idx < 64) {
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
}
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask2 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask1 & 0x7FFF));
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + (mask4 & 0x7FFF),
(const __m128i *)mask128_epi8 + (mask3 & 0x7FFF));
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(buffer + pop1), (__m128i *)buffer,
result1);
_mm256_storeu2_m128i((__m128i *)(buffer + pop3), (__m128i *)(buffer + pop2),
result2);
memcpy(out, buffer, pop4);
out += pop4;
}
return out - initout;
}
#endif

65
src/jsonparser.cpp Normal file
View File

@ -0,0 +1,65 @@
#include "jsonparser.h"
// allocate a ParsedJson structure that can support document
// up to len bytes.
// returns NULL if memory cannot be allocated
// This structure is meant to be reused from document to document, as needed.
// you can use deallocate_ParsedJson to deallocate the memory.
ParsedJson *allocate_ParsedJson(size_t len) {
if (len > 0xffffff) {
std::cerr << "Currently only support JSON files < 16MB, requested length: "
<< len << std::endl;
return NULL;
}
ParsedJson *pj_ptr = new ParsedJson;
if (pj_ptr == NULL) {
std::cerr << "Could not allocate memory for core struct." << std::endl;
return NULL;
}
ParsedJson &pj(*pj_ptr);
pj.bytecapacity = len;
if (posix_memalign((void **)&pj.structurals, 8, ROUNDUP_N(len, 64) / 8)) {
std::cerr << "Could not allocate memory for structurals" << std::endl;
delete pj_ptr;
return NULL;
};
pj.n_structural_indexes = 0;
u32 max_structures = ROUNDUP_N(len, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
if (pj.structural_indexes == NULL) {
std::cerr << "Could not allocate memory for structural_indexes"
<< std::endl;
delete[] pj.structurals;
delete pj_ptr;
return NULL;
}
return pj_ptr;
}
void deallocate_ParsedJson(ParsedJson *pj_ptr) {
if (pj_ptr == NULL)
return;
delete[] pj_ptr->structural_indexes;
delete[] pj_ptr->structurals;
delete pj_ptr;
}
// parse a document found in buf, need to preallocate ParsedJson.
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
if (pj.bytecapacity < len) {
std::cerr << "Your ParsedJson cannot support documents that big: " << len
<< std::endl;
return false;
}
bool isok = find_structural_bits(buf, len, pj);
if (isok) {
isok = flatten_indexes(len, pj);
}
if (isok) {
isok = ape_machine(buf, len, pj);
}
if (isok) {
isok = shovel_machine(buf, len, pj);
}
return isok;
}

View File

@ -1,196 +1,219 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#include <assert.h>
#endif
#include <cassert>
#include "common_defs.h"
#include "simdjson_internal.h"
using namespace std;
// a straightforward comparison of a mask against input. 5 uops; would be cheaper in AVX512.
really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) {
m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0);
m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
u64 res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
// a straightforward comparison of a mask against input. 5 uops; would be
// cheaper in AVX512.
really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi,
m256 mask) {
m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0);
m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
u64 res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
}
/*never_inline*/ bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
if (len > 0xffffff) {
cerr << "Currently only support JSON files < 16MB\n";
return false;
}
// Useful constant masks
const u64 even_bits = 0x5555555555555555ULL;
const u64 odd_bits = ~even_bits;
/*never_inline*/ bool find_structural_bits(const u8 *buf, size_t len,
ParsedJson &pj) {
if (len > 0xffffff) {
cerr << "Currently only support JSON files < 16MB\n";
return false;
}
// Useful constant masks
const u64 even_bits = 0x5555555555555555ULL;
const u64 odd_bits = ~even_bits;
// for now, just work in 64-byte chunks
// we have padded the input out to 64 byte multiple with the remainder being zeros
// for now, just work in 64-byte chunks
// we have padded the input out to 64 byte multiple with the remainder being
// zeros
// persistent state across loop
u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value
u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones
u64 prev_iter_ends_pseudo_pred = 0ULL;
// persistent state across loop
u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value
u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones
u64 prev_iter_ends_pseudo_pred = 0ULL;
for (size_t idx = 0; idx < len; idx+=64) {
__builtin_prefetch(buf + idx + 128);
for (size_t idx = 0; idx < len; idx += 64) {
__builtin_prefetch(buf + idx + 128);
#ifdef DEBUG
cout << "Idx is " << idx << "\n";
for (u32 j = 0; j < 64; j++) {
char c = *(buf+idx+j);
if (isprint(c)) {
cout << c;
} else {
cout << '_';
}
}
cout << "| ... input\n";
#endif
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
////////////////////////////////////////////////////////////////////////////////////////////
// Step 1: detect odd sequences of backslashes
////////////////////////////////////////////////////////////////////////////////////////////
u64 bs_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
dumpbits(bs_bits, "backslash bits");
u64 start_edges = bs_bits & ~(bs_bits << 1);
dumpbits(start_edges, "start_edges");
// flip lowest if we have an odd-length run at the end of the prior iteration
u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
u64 even_starts = start_edges & even_start_mask;
u64 odd_starts = start_edges & ~even_start_mask;
dumpbits(even_starts, "even_starts");
dumpbits(odd_starts, "odd_starts");
u64 even_carries = bs_bits + even_starts;
u64 odd_carries;
// must record the carry-out of our odd-carries out of bit 63; this indicates whether the
// sense of any edge going to the next iteration should be flipped
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end
// if we had an odd-numbered run at the end of
// the previous iteration
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
dumpbits(even_carries, "even_carries");
dumpbits(odd_carries, "odd_carries");
u64 even_carry_ends = even_carries & ~bs_bits;
u64 odd_carry_ends = odd_carries & ~bs_bits;
dumpbits(even_carry_ends, "even_carry_ends");
dumpbits(odd_carry_ends, "odd_carry_ends");
u64 even_start_odd_end = even_carry_ends & odd_bits;
u64 odd_start_even_end = odd_carry_ends & even_bits;
dumpbits(even_start_odd_end, "esoe");
dumpbits(odd_start_even_end, "osee");
u64 odd_ends = even_start_odd_end | odd_start_even_end;
dumpbits(odd_ends, "odd_ends");
////////////////////////////////////////////////////////////////////////////////////////////
// Step 2: detect insides of quote pairs
////////////////////////////////////////////////////////////////////////////////////////////
u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
dumpbits(quote_bits, "quote_bits");
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0ULL, quote_bits),
_mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (u64)((s64)quote_mask>>63);
dumpbits(quote_mask, "quote_mask");
// How do we build up a user traversable data structure
// first, do a 'shufti' to detect structural JSON characters
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
// these go into the first 3 buckets of the comparison (1/2/4)
// we are also interested in the four whitespace characters
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
// these go into the next 2 buckets of the comparison (8/16)
const m256 low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0,
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0
);
const m256 high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0,
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0
);
m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
m256 v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4), _mm256_set1_epi8(0x7f))));
m256 v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4), _mm256_set1_epi8(0x7f))));
m256 tmp_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, structural_shufti_mask),
_mm256_set1_epi8(0));
m256 tmp_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, structural_shufti_mask),
_mm256_set1_epi8(0));
u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
// this additional mask and transfer is non-trivially expensive, unfortunately
m256 tmp_ws_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, whitespace_shufti_mask),
_mm256_set1_epi8(0));
m256 tmp_ws_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, whitespace_shufti_mask),
_mm256_set1_epi8(0));
u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
dumpbits(structurals, "structurals");
dumpbits(whitespace, "whitespace");
// mask off anything inside quotes
structurals &= ~quote_mask;
// add the real quote bits back into our bitmask as well, so we can
// quickly traverse the strings we've spent all this trouble gathering
structurals |= quote_bits;
// Now, establish "pseudo-structural characters". These are non-whitespace characters
// that are (a) outside quotes and (b) have a predecessor that's either whitespace or a structural
// character. This means that subsequent passes will get a chance to encounter the first character
// of every string of non-whitespace and, if we're parsing an atom like true/false/null or a number
// we can stop at the first whitespace or structural character following it.
// a qualified predecessor is something that can happen 1 position before an
// psuedo-structural character
u64 pseudo_pred = structurals | whitespace;
dumpbits(pseudo_pred, "pseudo_pred");
u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
dumpbits(shifted_pseudo_pred, "shifted_pseudo_pred");
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
u64 pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask);
dumpbits(pseudo_structurals, "pseudo_structurals");
dumpbits(structurals, "final structurals without pseudos");
structurals |= pseudo_structurals;
dumpbits(structurals, "final structurals and pseudo structurals");
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
dumpbits(structurals, "final structurals and pseudo structurals after close quote removal");
*(u64 *)(pj.structurals + idx/8) = structurals;
cout << "Idx is " << idx << "\n";
for (u32 j = 0; j < 64; j++) {
char c = *(buf + idx + j);
if (isprint(c)) {
cout << c;
} else {
cout << '_';
}
}
return true;
cout << "| ... input\n";
#endif
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
////////////////////////////////////////////////////////////////////////////////////////////
// Step 1: detect odd sequences of backslashes
////////////////////////////////////////////////////////////////////////////////////////////
u64 bs_bits =
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
dumpbits(bs_bits, "backslash bits");
u64 start_edges = bs_bits & ~(bs_bits << 1);
dumpbits(start_edges, "start_edges");
// flip lowest if we have an odd-length run at the end of the prior
// iteration
u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
u64 even_starts = start_edges & even_start_mask;
u64 odd_starts = start_edges & ~even_start_mask;
dumpbits(even_starts, "even_starts");
dumpbits(odd_starts, "odd_starts");
u64 even_carries = bs_bits + even_starts;
u64 odd_carries;
// must record the carry-out of our odd-carries out of bit 63; this
// indicates whether the sense of any edge going to the next iteration
// should be flipped
bool iter_ends_odd_backslash =
__builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
odd_carries |=
prev_iter_ends_odd_backslash; // push in bit zero as a potential end
// if we had an odd-numbered run at the
// end of the previous iteration
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
dumpbits(even_carries, "even_carries");
dumpbits(odd_carries, "odd_carries");
u64 even_carry_ends = even_carries & ~bs_bits;
u64 odd_carry_ends = odd_carries & ~bs_bits;
dumpbits(even_carry_ends, "even_carry_ends");
dumpbits(odd_carry_ends, "odd_carry_ends");
u64 even_start_odd_end = even_carry_ends & odd_bits;
u64 odd_start_even_end = odd_carry_ends & even_bits;
dumpbits(even_start_odd_end, "esoe");
dumpbits(odd_start_even_end, "osee");
u64 odd_ends = even_start_odd_end | odd_start_even_end;
dumpbits(odd_ends, "odd_ends");
////////////////////////////////////////////////////////////////////////////////////////////
// Step 2: detect insides of quote pairs
////////////////////////////////////////////////////////////////////////////////////////////
u64 quote_bits =
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
dumpbits(quote_bits, "quote_bits");
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (u64)((s64)quote_mask >> 63);
dumpbits(quote_mask, "quote_mask");
// How do we build up a user traversable data structure
// first, do a 'shufti' to detect structural JSON characters
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
// these go into the first 3 buckets of the comparison (1/2/4)
// we are also interested in the four whitespace characters
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
// these go into the next 2 buckets of the comparison (8/16)
const m256 low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0,
0, 0, 8, 12, 1, 2, 9, 0, 0);
const m256 high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
1, 0, 0, 0, 3, 2, 1, 0, 0);
m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
m256 v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
_mm256_set1_epi8(0x7f))));
m256 v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
_mm256_set1_epi8(0x7f))));
m256 tmp_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
m256 tmp_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
// this additional mask and transfer is non-trivially expensive,
// unfortunately
m256 tmp_ws_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
m256 tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
dumpbits(structurals, "structurals");
dumpbits(whitespace, "whitespace");
// mask off anything inside quotes
structurals &= ~quote_mask;
// add the real quote bits back into our bitmask as well, so we can
// quickly traverse the strings we've spent all this trouble gathering
structurals |= quote_bits;
// Now, establish "pseudo-structural characters". These are non-whitespace
// characters that are (a) outside quotes and (b) have a predecessor that's
// either whitespace or a structural character. This means that subsequent
// passes will get a chance to encounter the first character of every string
// of non-whitespace and, if we're parsing an atom like true/false/null or a
// number we can stop at the first whitespace or structural character
// following it.
// a qualified predecessor is something that can happen 1 position before an
// psuedo-structural character
u64 pseudo_pred = structurals | whitespace;
dumpbits(pseudo_pred, "pseudo_pred");
u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
dumpbits(shifted_pseudo_pred, "shifted_pseudo_pred");
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
u64 pseudo_structurals =
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
dumpbits(pseudo_structurals, "pseudo_structurals");
dumpbits(structurals, "final structurals without pseudos");
structurals |= pseudo_structurals;
dumpbits(structurals, "final structurals and pseudo structurals");
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
dumpbits(
structurals,
"final structurals and pseudo structurals after close quote removal");
*(u64 *)(pj.structurals + idx / 8) = structurals;
}
return true;
}

View File

@ -1,5 +1,12 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#include <assert.h>
#endif
#include <cassert>
#include "common_defs.h"
#include "simdjson_internal.h"
@ -8,8 +15,9 @@
#define NO_PDEP_WIDTH 8
#endif
#define SET_BIT(i) base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);\
s = s & (s - 1);
#define SET_BIT(i) \
base_ptr[base + i] = (u32)idx + __builtin_ctzll(s); \
s = s & (s - 1);
#define SET_BIT1 SET_BIT(0)
#define SET_BIT2 SET_BIT1 SET_BIT(1)
@ -28,76 +36,84 @@ s = s & (s - 1);
#define SET_BIT15 SET_BIT14 SET_BIT(14)
#define SET_BIT16 SET_BIT15 SET_BIT(15)
#define CALL(macro, ...) macro(__VA_ARGS__)
#define CALL(macro, ...) macro(__VA_ARGS__)
#define SET_BITLOOPN(n) SET_BIT##n
// just transform the bitmask to a big list of 32-bit integers for now
// that's all; the type of character the offset points to will
// tell us exactly what we need to know. Naive but straightforward implementation
bool flatten_indexes(size_t len, ParsedJson & pj) {
u32 * base_ptr = pj.structural_indexes;
u32 base = 0;
// tell us exactly what we need to know. Naive but straightforward
// implementation
bool flatten_indexes(size_t len, ParsedJson &pj) {
u32 *base_ptr = pj.structural_indexes;
u32 base = 0;
#ifdef BUILDHISTOGRAM
uint32_t counters [65];
uint32_t total = 0;
for(int k = 0; k < 66; k++) counters[k] = 0;
for (size_t idx = 0; idx < len; idx+=64) {
u64 s = *(u64 *)(pj.structurals + idx/8);
u32 cnt = __builtin_popcountll(s);
total ++;
counters[cnt]++;
}
printf("\n histogram:\n");
for(int k = 0; k < 66; k++) {
if(counters[k]>0)printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
}
printf("\n\n");
uint32_t counters[65];
uint32_t total = 0;
for (int k = 0; k < 66; k++)
counters[k] = 0;
for (size_t idx = 0; idx < len; idx += 64) {
u64 s = *(u64 *)(pj.structurals + idx / 8);
u32 cnt = __builtin_popcountll(s);
total++;
counters[cnt]++;
}
printf("\n histogram:\n");
for (int k = 0; k < 66; k++) {
if (counters[k] > 0)
printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
}
printf("\n\n");
#endif
for (size_t idx = 0; idx < len; idx+=64) {
u64 s = *(u64 *)(pj.structurals + idx/8);
for (size_t idx = 0; idx < len; idx += 64) {
u64 s = *(u64 *)(pj.structurals + idx / 8);
#ifdef SUPPRESS_CHEESY_FLATTEN
while (s) {
base_ptr[base++] = (u32)idx + __builtin_ctzll(s); s &= s - 1ULL;
}
#elif defined(NO_PDEP_PLEASE)
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
CALL(SET_BITLOOPN,NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
#else
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
// spoil the suspense by reducing dependency chains; actually a win even with cost of pdep
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
base_ptr[base+0] = (u32)idx + __builtin_ctzll(s); u64 s1 = s & (s - 1ULL);
base_ptr[base+1] = (u32)idx + __builtin_ctzll(s1); u64 s2 = s1 & (s1 - 1ULL);
base_ptr[base+2] = (u32)idx + __builtin_ctzll(s2); //u64 s3 = s2 & (s2 - 1ULL);
base_ptr[base+3] = (u32)idx + __builtin_ctzll(s3); u64 s4 = s3 & (s3 - 1ULL);
base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL);
base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL);
s = s6;
base += 6;
}
base = next_base;
#endif
while (s) {
base_ptr[base++] = (u32)idx + __builtin_ctzll(s);
s &= s - 1ULL;
}
pj.n_structural_indexes = base;
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
return true;
#elif defined(NO_PDEP_PLEASE)
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (u32)idx + __builtin_ctzll(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
#else
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
// spoil the suspense by reducing dependency chains; actually a win even
// with cost of pdep
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
base_ptr[base + 0] = (u32)idx + __builtin_ctzll(s);
u64 s1 = s & (s - 1ULL);
base_ptr[base + 1] = (u32)idx + __builtin_ctzll(s1);
u64 s2 = s1 & (s1 - 1ULL);
base_ptr[base + 2] =
(u32)idx + __builtin_ctzll(s2); // u64 s3 = s2 & (s2 - 1ULL);
base_ptr[base + 3] = (u32)idx + __builtin_ctzll(s3);
u64 s4 = s3 & (s3 - 1ULL);
base_ptr[base + 4] =
(u32)idx + __builtin_ctzll(s4); // u64 s5 = s4 & (s4 - 1ULL);
base_ptr[base + 5] = (u32)idx + __builtin_ctzll(s5);
u64 s6 = s5 & (s5 - 1ULL);
s = s6;
base += 6;
}
base = next_base;
#endif
}
pj.n_structural_indexes = base;
base_ptr[pj.n_structural_indexes] =
0; // make it safe to dereference one beyond this array
return true;
}

View File

@ -1,5 +1,12 @@
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#include <assert.h>
#endif
#include <cassert>
#include <cstring>
#include "common_defs.h"
@ -8,27 +15,30 @@
// the ape machine consists of two parts:
//
// 1) The "state machine", which is a multiple channel per-level state machine
// It is a conventional DFA except in that it 'changes track' on {}[] characters
// It is a conventional DFA except in that it 'changes track' on {}[]
// characters
//
// 2) The "tape machine": this records offsets of various structures as they go by
// These structures are either u32 offsets of other tapes or u32 offsets into our input
// or structures.
// 2) The "tape machine": this records offsets of various structures as they go
// by
// These structures are either u32 offsets of other tapes or u32 offsets into
// our input or structures.
//
// The state machine doesn't record ouput.
// The tape machine doesn't validate.
//
// The output of the tape machine is meaningful only if the state machine is in non-error states.
// The output of the tape machine is meaningful only if the state machine is in
// non-error states.
// depth adjustment is strictly based on whether we are {[ or }]
// depth adjustment is a pre-increment which, in effect, means that a {[ contained in an object
// is in the level one deeper, while the corresponding }] is at the level
// depth adjustment is a pre-increment which, in effect, means that a {[
// contained in an object is in the level one deeper, while the corresponding }]
// is at the level
// TAPE MACHINE DEFINITIONS
const u32 DEPTH_PLUS_ONE = 0x01000000;
const u32 DEPTH_ZERO = 0x00000000;
const u32 DEPTH_PLUS_ONE = 0x01000000;
const u32 DEPTH_ZERO = 0x00000000;
const u32 DEPTH_MINUS_ONE = 0xff000000;
const u32 WRITE_ZERO = 0x0;
const u32 WRITE_FOUR = 0x1;
@ -43,37 +53,44 @@ inline size_t get_write_size(u32 control) { return control & 0xff; }
const u32 char_control[256] = {
// nothing interesting from 0x00-0x20
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF,
// " is 0x22, - is 0x2d
CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,C04,CDF,CDF,
CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF,
CDF,
// numbers are 0x30-0x39
C04,C04,C04,C04, C04,C04,C04,C04, C04,C04,CDF,CDF, CDF,CDF,CDF,CDF,
C04, C04, C04, C04, C04, C04, C04, C04, C04, C04, CDF, CDF, CDF, CDF, CDF,
CDF,
// nothing interesting from 0x40-0x49
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF,
// 0x5b/5d are []
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
CDF,
// f is 0x66 n is 0x6e
CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF,
CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04,
CDF,
// 0x7b/7d are {}, 74 is t
CDF,CDF,CDF,CDF, C04,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
CDF,
// nothing interesting from 0x80-0xff
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF
};
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF};
// all of this stuff needs to get moved somewhere reasonable
// like our ParsedJson structure
@ -88,7 +105,14 @@ u8 * current_number_buf_loc;
// STATE MACHINE DECLARATIONS
const u32 MAX_STATES = 16;
u32 trans[MAX_STATES][256];
/**
* It is annoying to have to call init_state_machine each time.
* Better to precompute the (small) result into a header file.
*/
// u32 trans[MAX_STATES][256];
#include "transitions.h"
u32 states[MAX_DEPTH];
const int START_STATE = 1;
@ -98,7 +122,8 @@ u32 valid_end_states[MAX_STATES] = {
1, // state 2: we've seen an { - if we left this level it's ok
0, // state 3 is abolished, we shouldn't be in it
0, // state 4 means we saw a string inside an object. We can't end like this!
0, // state 4 means we saw a string inside an object. We can't end like
// this!
0, // similarly state 5 means we saw a string followed by a colon.
0, // state 6 is abolished
1, // it's ok to finish on 7
@ -109,8 +134,10 @@ u32 valid_end_states[MAX_STATES] = {
1, // state 11 is ok to finish on, we just saw a unary inside a array
0, // state 12 we've just seen a comma inside an array - can't finish
0, // state 13 is our weird start state. I think we shouldn't end on it as we need to see something
1, // state 14 is ok. Its an error to see something *more* here but not to be in this state
0, // state 13 is our weird start state. I think we shouldn't end on it as
// we need to see something
1, // state 14 is ok. Its an error to see something *more* here but not to
// be in this state
0, // we don't use state 15
};
@ -129,182 +156,183 @@ const int START_DEPTH_START_STATE = 13;
const int ANYTHING_IS_ERROR_STATE = 14;
void init_state_machine() {
// states 10 and 6 eliminated
// states 10 and 6 eliminated
trans[ 1]['{'] = 2;
trans[ 2]['"'] = 4;
trans[ 4][':'] = 5;
// 5->7 on all values ftn0123456789-"
trans[ 7][','] = 8;
trans[ 8]['"'] = 4;
trans[1][(int)'{'] = 2;
trans[2][(int)'"'] = 4;
trans[4][(int)':'] = 5;
// 5->7 on all values ftn0123456789-"
trans[7][(int)','] = 8;
trans[8][(int)'"'] = 4;
trans[ 1]['['] = 9;
// 9->11 on all values ftn0123456789-"
trans[11][','] = 12;
// 12->11 on all values ftn0123456789-"
trans[1][(int)'['] = 9;
// 9->11 on all values ftn0123456789-"
trans[11][(int)','] = 12;
// 12->11 on all values ftn0123456789-"
const char * UNARIES = "}]ftn0123456789-\"";
for (u32 i = 0; i < strlen(UNARIES); i++) {
trans[ 5][(u32)UNARIES[i]] = 7;
trans[ 9][(u32)UNARIES[i]] = 11;
trans[12][(u32)UNARIES[i]] = 11;
const char *UNARIES = "}]ftn0123456789-\"";
for (u32 i = 0; i < strlen(UNARIES); i++) {
trans[5][(u32)UNARIES[i]] = 7;
trans[9][(u32)UNARIES[i]] = 11;
trans[12][(u32)UNARIES[i]] = 11;
#ifdef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
// NOTE: if we permit JSON documents that
// contain a single number or string, then we
// allow all the unaries at the top level
trans[13][(u32)UNARIES[i]] = 14;
// NOTE: if we permit JSON documents that
// contain a single number or string, then we
// allow all the unaries at the top level
trans[13][(u32)UNARIES[i]] = 14;
#endif
}
}
#ifndef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
// NOTE: if we don't permit JSON documents that
// that contain a single number or string, we must
// make sure we accept the top-level closing braces
// that are delivered to the start depth only
trans[13]['}'] = 14;
trans[13][']'] = 14;
// NOTE: if we don't permit JSON documents that
// that contain a single number or string, we must
// make sure we accept the top-level closing braces
// that are delivered to the start depth only
trans[13][(int)'}'] = 14;
trans[13][(int)']'] = 14;
#endif
// back transitions when new things are open
trans[2]['{'] = 2;
trans[7]['{'] = 2;
trans[9]['{'] = 2;
trans[11]['{'] = 2;
trans[2]['['] = 9;
trans[7]['['] = 9;
trans[9]['['] = 9;
trans[11]['['] = 9;
// back transitions when new things are open
trans[2][(int)'{'] = 2;
trans[7][(int)'{'] = 2;
trans[9][(int)'{'] = 2;
trans[11][(int)'{'] = 2;
trans[2][(int)'['] = 9;
trans[7][(int)'['] = 9;
trans[9][(int)'['] = 9;
trans[11][(int)'['] = 9;
}
bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
bool ape_machine(const u8 *buf, UNUSED size_t len, ParsedJson &pj) {
// NOTE - our depth is used by both the tape machine and the state machine
// Further, in production we will set it to a largish value in a generous buffer as a rogue input
// could consist of many {[ characters or many }] characters. We aren't busily checking errors
// (and in fact, a aggressive sequence of [ characters is actually valid input!) so something that
// blows out maximum depth will need to be periodically checked for, as will something that tries
// to set depth very low. If we set our starting depth, say, to 256, we can tolerate 256 bogus close brace
// characters without aggressively going wrong and writing to bad memory
// Note that any specious depth can have a specious tape associated with and all these specious depths
// can share a region of the tape - it's harmless. Since tape is one-way, any movement in a specious tape
// is an error (so we can detect max_depth violations by making sure that specious tape locations haven't
// moved from their starting values)
// NOTE - our depth is used by both the tape machine and the state machine
// Further, in production we will set it to a largish value in a generous
// buffer as a rogue input could consist of many {[ characters or many }]
// characters. We aren't busily checking errors (and in fact, a aggressive
// sequence of [ characters is actually valid input!) so something that blows
// out maximum depth will need to be periodically checked for, as will
// something that tries to set depth very low. If we set our starting depth,
// say, to 256, we can tolerate 256 bogus close brace characters without
// aggressively going wrong and writing to bad memory Note that any specious
// depth can have a specious tape associated with and all these specious
// depths can share a region of the tape - it's harmless. Since tape is
// one-way, any movement in a specious tape is an error (so we can detect
// max_depth violations by making sure that specious tape locations haven't
// moved from their starting values)
u32 depth = START_DEPTH;
u32 depth = START_DEPTH;
for (u32 i = 0; i < MAX_DEPTH; i++) {
pj.tape_locs[i] = i*MAX_TAPE_ENTRIES;
if (i == START_DEPTH) {
states[i] = START_DEPTH_START_STATE;
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
states[i] = ANYTHING_IS_ERROR_STATE;
} else {
states[i] = START_STATE;
}
for (u32 i = 0; i < MAX_DEPTH; i++) {
pj.tape_locs[i] = i * MAX_TAPE_ENTRIES;
if (i == START_DEPTH) {
states[i] = START_DEPTH_START_STATE;
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
states[i] = ANYTHING_IS_ERROR_STATE;
} else {
states[i] = START_STATE;
}
}
pj.current_string_buf_loc = pj.string_buf;
pj.current_number_buf_loc = pj.number_buf;
u32 error_sump = 0;
u32 old_tape_loc = pj.tape_locs[depth]; // need to initialize for first write
u32 next_idx = pj.structural_indexes[0];
u8 next_c = buf[next_idx];
u32 next_control = char_control[next_c];
for (u32 i = 0; i < pj.n_structural_indexes; i++) {
// very periodic safety checking. This does NOT guarantee that we
// haven't been in our dangerous zones above or below our normal
// depths. It ONLY checks to be sure that we don't manage to leave
// these zones and write completely off our tape.
if (!(i % DEPTH_SAFETY_MARGIN)) {
if (depth < START_DEPTH || depth >= REDLINE_DEPTH) {
error_sump |= 1;
break;
}
}
pj.current_string_buf_loc = pj.string_buf;
pj.current_number_buf_loc = pj.number_buf;
u32 idx = next_idx;
u8 c = next_c;
u32 control = next_control;
u32 error_sump = 0;
u32 old_tape_loc = pj.tape_locs[depth]; // need to initialize for first write
next_idx = pj.structural_indexes[i + 1];
next_c = buf[next_idx];
next_control = char_control[next_c];
u32 next_idx = pj.structural_indexes[0];
u8 next_c = buf[next_idx];
u32 next_control = char_control[next_c];
for (u32 i = 0; i < pj.n_structural_indexes; i++) {
// very periodic safety checking. This does NOT guarantee that we
// haven't been in our dangerous zones above or below our normal
// depths. It ONLY checks to be sure that we don't manage to leave
// these zones and write completely off our tape.
if (!(i%DEPTH_SAFETY_MARGIN)) {
if (depth < START_DEPTH || depth >= REDLINE_DEPTH) {
error_sump |= 1;
break;
}
}
u32 idx = next_idx;
u8 c = next_c;
u32 control = next_control;
next_idx = pj.structural_indexes[i+1];
next_c = buf[next_idx];
next_control = char_control[next_c];
// TAPE MACHINE
s8 depth_adjust = get_depth_adjust(control);
u8 write_size = get_write_size(control);
u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx;
depth += depth_adjust;
// TAPE MACHINE
s8 depth_adjust = get_depth_adjust(control);
u8 write_size = get_write_size(control);
u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx;
depth += depth_adjust;
#ifdef DEBUG
cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust
<< " write_size " << (u32)write_size << " current_depth: " << depth << "\n";
cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust << " write_size "
<< (u32)write_size << " current_depth: " << depth << "\n";
#endif
// STATE MACHINE - hoisted here to fill in during the tape machine's latencies
// STATE MACHINE - hoisted here to fill in during the tape machine's
// latencies
#ifdef DEBUG
cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
#endif
states[depth] = trans[states[depth]][c];
states[depth] = trans[states[depth]][c];
#ifdef DEBUG
cout << "post " << states[depth] << "\n";
cout << "post " << states[depth] << "\n";
#endif
// TAPE MACHINE, again
pj.tape[pj.tape_locs[depth]] = write_val | (((u64)c) << 56);
old_tape_loc = pj.tape_locs[depth] += write_size;
}
if (depth != START_DEPTH) {
// We haven't returned to our start depth, so our braces can't possibly match
// Note this doesn't exclude the possibility that we have improperly matched { } or [] pairs
return false;
}
for (u32 i = 0; i < MAX_DEPTH; i++) {
if (!valid_end_states[states[i]]) {
#ifdef DEBUG
printf("Invalid ending state: states[%d] == %d\n", states[i]);
#endif
return false;
}
// TAPE MACHINE, again
pj.tape[pj.tape_locs[depth]] = write_val | (((u64)c) << 56);
old_tape_loc = pj.tape_locs[depth] += write_size;
}
if (depth != START_DEPTH) {
// We haven't returned to our start depth, so our braces can't possibly
// match Note this doesn't exclude the possibility that we have improperly
// matched { } or [] pairs
return false;
}
for (u32 i = 0; i < MAX_DEPTH; i++) {
if (!valid_end_states[states[i]]) {
#ifdef DEBUG
printf("Invalid ending state: states[%d] == %d\n", states[i]);
#endif
return false;
}
}
#define DUMP_TAPES
#ifdef DEBUG
for (u32 i = 0; i < MAX_DEPTH; i++) {
u32 start_loc = i*MAX_TAPE_ENTRIES;
cout << " tape section i " << i;
if (i == START_DEPTH) {
cout << " (START) ";
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
cout << " (REDLINE) ";
} else {
cout << " (NORMAL) ";
}
for (u32 i = 0; i < MAX_DEPTH; i++) {
u32 start_loc = i * MAX_TAPE_ENTRIES;
cout << " tape section i " << i;
if (i == START_DEPTH) {
cout << " (START) ";
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
cout << " (REDLINE) ";
} else {
cout << " (NORMAL) ";
}
cout << " from: " << start_loc
<< " to: " << tape_locs[i] << " "
<< " size: " << (tape_locs[i]-start_loc) << "\n";
cout << " state: " << states[i] << "\n";
cout << " from: " << start_loc << " to: " << tape_locs[i] << " "
<< " size: " << (tape_locs[i] - start_loc) << "\n";
cout << " state: " << states[i] << "\n";
#ifdef DUMP_TAPES
for (u32 j = start_loc; j < tape_locs[i]; j++) {
if (tape[j]) {
cout << "j: " << j << " tape[j] char " << (char)(tape[j]>>56)
<< " tape[j][0..55]: " << (tape[j]&0xffffffffffffffULL ) << "\n";
}
}
#endif
for (u32 j = start_loc; j < tape_locs[i]; j++) {
if (tape[j]) {
cout << "j: " << j << " tape[j] char " << (char)(tape[j] >> 56)
<< " tape[j][0..55]: " << (tape[j] & 0xffffffffffffffULL) << "\n";
}
}
#endif
if (error_sump) {
return false;
}
return true;
}
#endif
if (error_sump) {
return false;
}
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -6,13 +6,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "common_defs.h"
#include "jsonioutil.h"
#include "simdjson_internal.h"
#include "stage1_find_marks.h"
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
#include "jsonparser.h"
/**
* Does the file filename ends with the given extension.
@ -29,7 +23,7 @@ bool startsWith(const char *pre, const char *str) {
bool validate(const char *dirname) {
bool everythingfine = true;
init_state_machine(); // to be safe
// init_state_machine(); // no longer necessary
const char *extension = ".json";
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
@ -58,26 +52,13 @@ bool validate(const char *dirname) {
}
std::pair<u8 *, size_t> p = get_corpus(fullpath);
// terrible hack but just to get it working
ParsedJson *pj_ptr = new ParsedJson;
ParsedJson &pj(*pj_ptr);
if (posix_memalign((void **)&pj.structurals, 8,
ROUNDUP_N(p.second, 64) / 8)) {
std::cerr << "Could not allocate memory" << std::endl;
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
if(pj_ptr == NULL) {
std::cerr<< "can't allocate memory"<<std::endl;
return false;
};
pj.n_structural_indexes = 0;
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
bool isok = find_structural_bits(p.first, p.second, pj);
if (isok) {
isok = flatten_indexes(p.second, pj);
}
if (isok) {
isok = ape_machine(p.first, p.second, pj);
}
if (isok) {
isok = shovel_machine(p.first, p.second, pj);
}
ParsedJson &pj(*pj_ptr);
bool isok = json_parse(p.first, p.second, pj);
if (startsWith("pass", name)) {
if (!isok) {
printf("warning: file %s should pass but it fails.\n", name);
@ -92,10 +73,9 @@ bool validate(const char *dirname) {
printf("File %s %s.\n", name,
isok ? " is valid JSON " : " is not valid JSON");
}
free(pj.structurals);
free(p.first);
delete[] pj.structural_indexes;
free(fullpath);
deallocate_ParsedJson(pj_ptr);
}
}
for (int i = 0; i < c; ++i)