Lots and lots of cleaning.
This commit is contained in:
parent
5fae7b2100
commit
a43b0772e1
|
@ -21,7 +21,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
||||||
std::vector<uint64_t> ids;
|
std::vector<uint64_t> ids;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
explicit LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
||||||
memset(&attribs, 0, sizeof(attribs));
|
memset(&attribs, 0, sizeof(attribs));
|
||||||
attribs.type = TYPE;
|
attribs.type = TYPE;
|
||||||
attribs.size = sizeof(attribs);
|
attribs.size = sizeof(attribs);
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <unistd.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "benchmark.h"
|
#include "benchmark.h"
|
||||||
|
@ -13,6 +14,7 @@
|
||||||
#include "rapidjson/writer.h"
|
#include "rapidjson/writer.h"
|
||||||
#include "sajson.h"
|
#include "sajson.h"
|
||||||
|
|
||||||
|
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -43,17 +45,29 @@ std::string rapidstringme(char *json) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
if (argc < 2) {
|
int c;
|
||||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
bool verbose = false;
|
||||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
while ((c = getopt (argc, argv, "v")) != -1)
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 'v':
|
||||||
|
verbose = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
if (optind >= argc) {
|
||||||
|
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
bool verbose = false;
|
const char * filename = argv[optind];
|
||||||
if (argc > 2) {
|
pair<u8 *, size_t> p;
|
||||||
if (strcmp(argv[1], "-v"))
|
try {
|
||||||
verbose = true;
|
p = get_corpus(filename);
|
||||||
|
} catch (const std::exception& e) { // caught by reference to base
|
||||||
|
std::cout << "Could not load the file " << filename << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
std::cout << "Input has ";
|
std::cout << "Input has ";
|
||||||
if (p.second > 1024 * 1024)
|
if (p.second > 1024 * 1024)
|
||||||
|
|
|
@ -31,79 +31,14 @@
|
||||||
#include "jsonparser/stage34_unified.h"
|
#include "jsonparser/stage34_unified.h"
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
|
||||||
namespace Color {
|
|
||||||
enum Code {
|
|
||||||
FG_DEFAULT = 39,
|
|
||||||
FG_BLACK = 30,
|
|
||||||
FG_RED = 31,
|
|
||||||
FG_GREEN = 32,
|
|
||||||
FG_YELLOW = 33,
|
|
||||||
FG_BLUE = 34,
|
|
||||||
FG_MAGENTA = 35,
|
|
||||||
FG_CYAN = 36,
|
|
||||||
FG_LIGHT_GRAY = 37,
|
|
||||||
FG_DARK_GRAY = 90,
|
|
||||||
FG_LIGHT_RED = 91,
|
|
||||||
FG_LIGHT_GREEN = 92,
|
|
||||||
FG_LIGHT_YELLOW = 93,
|
|
||||||
FG_LIGHT_BLUE = 94,
|
|
||||||
FG_LIGHT_MAGENTA = 95,
|
|
||||||
FG_LIGHT_CYAN = 96,
|
|
||||||
FG_WHITE = 97,
|
|
||||||
BG_RED = 41,
|
|
||||||
BG_GREEN = 42,
|
|
||||||
BG_BLUE = 44,
|
|
||||||
BG_DEFAULT = 49
|
|
||||||
};
|
|
||||||
class Modifier {
|
|
||||||
Code code;
|
|
||||||
|
|
||||||
public:
|
|
||||||
Modifier(Code pCode) : code(pCode) {}
|
|
||||||
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
|
|
||||||
return os << "\033[" << mod.code << "m";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} // namespace Color
|
|
||||||
|
|
||||||
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
|
|
||||||
Color::Modifier greenfg(Color::FG_GREEN);
|
|
||||||
Color::Modifier yellowfg(Color::FG_YELLOW);
|
|
||||||
Color::Modifier deffg(Color::FG_DEFAULT);
|
|
||||||
size_t i = 0;
|
|
||||||
// skip initial fluff
|
|
||||||
while ((i + 1 < pj.n_structural_indexes) &&
|
|
||||||
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
for (; i < pj.n_structural_indexes; i++) {
|
|
||||||
u32 idx = pj.structural_indexes[i];
|
|
||||||
u8 c = buf[idx];
|
|
||||||
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
|
|
||||||
std::cout << greenfg << buf[idx] << deffg;
|
|
||||||
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
|
|
||||||
std::cout << greenfg << buf[idx] << deffg;
|
|
||||||
} else {
|
|
||||||
std::cout << yellowfg << buf[idx] << deffg;
|
|
||||||
}
|
|
||||||
if (i + 1 < pj.n_structural_indexes) {
|
|
||||||
u32 nextidx = pj.structural_indexes[i + 1];
|
|
||||||
for (u32 pos = idx + 1; pos < nextidx; pos++) {
|
|
||||||
std::cout << buf[pos];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
bool dump = false;
|
bool dump = false;
|
||||||
|
bool forceoneiteration = false;
|
||||||
|
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
while ((c = getopt (argc, argv, "vd")) != -1)
|
while ((c = getopt (argc, argv, "1vd")) != -1)
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
case 'v':
|
case 'v':
|
||||||
|
@ -112,6 +47,9 @@ int main(int argc, char *argv[]) {
|
||||||
case 'd':
|
case 'd':
|
||||||
dump = true;
|
dump = true;
|
||||||
break;
|
break;
|
||||||
|
case '1':
|
||||||
|
forceoneiteration = true;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
|
@ -124,7 +62,13 @@ int main(int argc, char *argv[]) {
|
||||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||||
}
|
}
|
||||||
if(verbose) cout << "[verbose] loading " << filename << endl;
|
if(verbose) cout << "[verbose] loading " << filename << endl;
|
||||||
pair<u8 *, size_t> p = get_corpus(filename);
|
pair<u8 *, size_t> p;
|
||||||
|
try {
|
||||||
|
p = get_corpus(filename);
|
||||||
|
} catch (const std::exception& e) { // caught by reference to base
|
||||||
|
std::cout << "Could not load the file " << filename << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
|
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
|
||||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||||
ParsedJson &pj(*pj_ptr);
|
ParsedJson &pj(*pj_ptr);
|
||||||
|
@ -133,7 +77,7 @@ int main(int argc, char *argv[]) {
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
const u32 iterations = 1;
|
const u32 iterations = 1;
|
||||||
#else
|
#else
|
||||||
const u32 iterations = p.second < 1 * 1000 * 1000? 1000 : 10;
|
const u32 iterations = forceoneiteration ? 1 : ( p.second < 1 * 1000 * 1000? 1000 : 10);
|
||||||
#endif
|
#endif
|
||||||
vector<double> res;
|
vector<double> res;
|
||||||
res.resize(iterations);
|
res.resize(iterations);
|
||||||
|
@ -174,7 +118,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
unified.start();
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
isok = flatten_indexes(p.second, pj);
|
isok = isok && flatten_indexes(p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
cy2 += results[0];
|
cy2 += results[0];
|
||||||
|
@ -187,7 +131,7 @@ int main(int argc, char *argv[]) {
|
||||||
unified.start();
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
isok = unified_machine(p.first, p.second, pj);
|
isok = isok && unified_machine(p.first, p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
unified.end(results);
|
unified.end(results);
|
||||||
cy3 += results[0];
|
cy3 += results[0];
|
||||||
|
|
|
@ -31,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
|
||||||
bool fastjson_parse(const char *input) {
|
bool fastjson_parse(const char *input) {
|
||||||
fastjson::Token token;
|
fastjson::Token token;
|
||||||
fastjson::dom::Chunk chunk;
|
fastjson::dom::Chunk chunk;
|
||||||
std::string error_message;
|
|
||||||
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
|
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
|
||||||
}
|
}
|
||||||
// end of fastjson stuff
|
// end of fastjson stuff
|
||||||
|
@ -62,7 +61,14 @@ int main(int argc, char *argv[]) {
|
||||||
if(optind + 1 < argc) {
|
if(optind + 1 < argc) {
|
||||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||||
}
|
}
|
||||||
pair<u8 *, size_t> p = get_corpus(filename);
|
pair<u8 *, size_t> p;
|
||||||
|
try {
|
||||||
|
p = get_corpus(filename);
|
||||||
|
} catch (const std::exception& e) { // caught by reference to base
|
||||||
|
std::cout << "Could not load the file " << filename << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
std::cout << "Input has ";
|
std::cout << "Input has ";
|
||||||
if (p.second > 1024 * 1024)
|
if (p.second > 1024 * 1024)
|
||||||
|
|
|
@ -20,6 +20,13 @@ char * allocate_aligned_buffer(size_t length);
|
||||||
// first element of the pair is a string (null terminated)
|
// first element of the pair is a string (null terminated)
|
||||||
// whereas the second element is the length.
|
// whereas the second element is the length.
|
||||||
// caller is responsible to free (free std::pair<u8 *, size_t>.first)
|
// caller is responsible to free (free std::pair<u8 *, size_t>.first)
|
||||||
|
//
|
||||||
|
// throws an exception if the file cannot be opened, use try/catch
|
||||||
|
// try {
|
||||||
|
// p = get_corpus(filename);
|
||||||
|
// } catch (const std::exception& e) {
|
||||||
|
// std::cout << "Could not load the file " << filename << std::endl;
|
||||||
|
// }
|
||||||
std::pair<u8 *, size_t> get_corpus(std::string filename);
|
std::pair<u8 *, size_t> get_corpus(std::string filename);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -128,7 +128,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
||||||
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
|
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
|
||||||
const __m128i mul_1_10000 =
|
const __m128i mul_1_10000 =
|
||||||
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
|
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
|
||||||
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((__m128i *)chars), ascii0);
|
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
|
||||||
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
|
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
|
||||||
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
|
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
|
||||||
const __m128i t3 = _mm_packus_epi32(t2, t2);
|
const __m128i t3 = _mm_packus_epi32(t2, t2);
|
||||||
|
@ -149,7 +149,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
||||||
//
|
//
|
||||||
static never_inline bool
|
static never_inline bool
|
||||||
parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
||||||
ParsedJson &pj, const u32 depth, const u32 offset,
|
ParsedJson &pj, UNUSED const u32 depth, const u32 offset,
|
||||||
UNUSED bool found_zero, bool found_minus) {
|
UNUSED bool found_zero, bool found_minus) {
|
||||||
const char *p = (const char *)(buf + offset);
|
const char *p = (const char *)(buf + offset);
|
||||||
|
|
||||||
|
@ -193,7 +193,6 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
||||||
}
|
}
|
||||||
exponent = firstafterperiod - p;
|
exponent = firstafterperiod - p;
|
||||||
}
|
}
|
||||||
int64_t expnumber = 0; // exponential part
|
|
||||||
if (('e' == *p) || ('E' == *p)) {
|
if (('e' == *p) || ('E' == *p)) {
|
||||||
++p;
|
++p;
|
||||||
bool negexp = false;
|
bool negexp = false;
|
||||||
|
@ -210,7 +209,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
unsigned char digit = *p - '0';
|
unsigned char digit = *p - '0';
|
||||||
expnumber = digit;
|
int64_t expnumber = digit; // exponential part
|
||||||
p++;
|
p++;
|
||||||
if (is_integer(*p)) {
|
if (is_integer(*p)) {
|
||||||
digit = *p - '0';
|
digit = *p - '0';
|
||||||
|
@ -270,7 +269,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
||||||
//
|
//
|
||||||
static never_inline bool parse_large_integer(const u8 *const buf,
|
static never_inline bool parse_large_integer(const u8 *const buf,
|
||||||
UNUSED size_t len, ParsedJson &pj,
|
UNUSED size_t len, ParsedJson &pj,
|
||||||
const u32 depth, const u32 offset,
|
UNUSED const u32 depth, const u32 offset,
|
||||||
UNUSED bool found_zero,
|
UNUSED bool found_zero,
|
||||||
bool found_minus) {
|
bool found_minus) {
|
||||||
const char *p = (const char *)(buf + offset);
|
const char *p = (const char *)(buf + offset);
|
||||||
|
@ -340,10 +339,12 @@ static never_inline bool parse_large_integer(const u8 *const buf,
|
||||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// parse the number at buf + offset
|
// parse the number at buf + offset
|
||||||
// define JSON_TEST_NUMBERS for unit testing
|
// define JSON_TEST_NUMBERS for unit testing
|
||||||
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
|
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
|
||||||
ParsedJson &pj, const u32 depth,
|
ParsedJson &pj, UNUSED const u32 depth,
|
||||||
const u32 offset, UNUSED bool found_zero,
|
const u32 offset, UNUSED bool found_zero,
|
||||||
bool found_minus) {
|
bool found_minus) {
|
||||||
const char *p = (const char *)(buf + offset);
|
const char *p = (const char *)(buf + offset);
|
||||||
|
|
|
@ -105,14 +105,14 @@ public:
|
||||||
|
|
||||||
|
|
||||||
void write_tape_s64(s64 i) {
|
void write_tape_s64(s64 i) {
|
||||||
*((s64 *)current_number_buf_loc) = i;
|
*((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy
|
||||||
current_number_buf_loc += 8;
|
current_number_buf_loc += sizeof(s64);
|
||||||
write_tape(current_number_buf_loc - number_buf, 'l');
|
write_tape(current_number_buf_loc - number_buf, 'l');
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_tape_double(double d) {
|
void write_tape_double(double d) {
|
||||||
*((double *)current_number_buf_loc) = d;
|
*((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy
|
||||||
current_number_buf_loc += 8;
|
current_number_buf_loc += sizeof(double);
|
||||||
write_tape(current_number_buf_loc - number_buf, 'd');
|
write_tape(current_number_buf_loc - number_buf, 'd');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,7 +137,7 @@ public:
|
||||||
u32 scope_header; // the start of our current scope that contains our current location
|
u32 scope_header; // the start of our current scope that contains our current location
|
||||||
u32 location; // our current location on a tape
|
u32 location; // our current location on a tape
|
||||||
|
|
||||||
ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
|
explicit ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
|
||||||
// OK with default copy constructor as the way to clone the POD structure
|
// OK with default copy constructor as the way to clone the POD structure
|
||||||
|
|
||||||
// some placeholder navigation. Will convert over to a more native C++-ish way of doing
|
// some placeholder navigation. Will convert over to a more native C++-ish way of doing
|
||||||
|
@ -167,7 +167,7 @@ public:
|
||||||
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
inline void dump256(m256 d, std::string msg) {
|
inline void dump256(m256 d, const std::string msg) {
|
||||||
for (u32 i = 0; i < 32; i++) {
|
for (u32 i = 0; i < 32; i++) {
|
||||||
std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i);
|
std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i);
|
||||||
if (!((i + 1) % 8))
|
if (!((i + 1) % 8))
|
||||||
|
@ -181,14 +181,14 @@ inline void dump256(m256 d, std::string msg) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// dump bits low to high
|
// dump bits low to high
|
||||||
inline void dumpbits(u64 v, std::string msg) {
|
inline void dumpbits(u64 v, const std::string msg) {
|
||||||
for (u32 i = 0; i < 64; i++) {
|
for (u32 i = 0; i < 64; i++) {
|
||||||
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
std::cout << " " << msg << "\n";
|
std::cout << " " << msg << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void dumpbits32(u32 v, std::string msg) {
|
inline void dumpbits32(u32 v, const std::string msg) {
|
||||||
for (u32 i = 0; i < 32; i++) {
|
for (u32 i = 0; i < 32; i++) {
|
||||||
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
|
@ -201,14 +201,14 @@ inline void dumpbits32(u32 v, std::string msg) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// dump bits low to high
|
// dump bits low to high
|
||||||
inline void dumpbits_always(u64 v, std::string msg) {
|
inline void dumpbits_always(u64 v, const std::string msg) {
|
||||||
for (u32 i = 0; i < 64; i++) {
|
for (u32 i = 0; i < 64; i++) {
|
||||||
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
std::cout << " " << msg << "\n";
|
std::cout << " " << msg << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void dumpbits32_always(u32 v, std::string msg) {
|
inline void dumpbits32_always(u32 v, const std::string msg) {
|
||||||
for (u32 i = 0; i < 32; i++) {
|
for (u32 i = 0; i < 32; i++) {
|
||||||
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
|
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,7 +58,7 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
|
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
|
||||||
ParsedJson &pj, u32 depth, u32 offset) {
|
ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
|
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
|
||||||
u8 *dst = pj.current_string_buf_loc;
|
u8 *dst = pj.current_string_buf_loc;
|
||||||
|
|
|
@ -137,7 +137,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
|
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior
|
||||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||||
// 0 9 a b c d
|
// 0 9 a b c d
|
||||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
||||||
|
@ -220,7 +220,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
||||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||||
quote_mask ^= prev_iter_inside_quote;
|
quote_mask ^= prev_iter_inside_quote;
|
||||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
|
// prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore
|
||||||
|
|
||||||
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
||||||
__m256i mask_70 =
|
__m256i mask_70 =
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "jsonparser/common_defs.h"
|
#include "jsonparser/common_defs.h"
|
||||||
#include "jsonparser/simdjson_internal.h"
|
|
||||||
#include "jsonparser/jsoncharutils.h"
|
#include "jsonparser/jsoncharutils.h"
|
||||||
#include "jsonparser/numberparsing.h"
|
#include "jsonparser/numberparsing.h"
|
||||||
|
#include "jsonparser/simdjson_internal.h"
|
||||||
#include "jsonparser/stringparsing.h"
|
#include "jsonparser/stringparsing.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
@ -20,390 +20,430 @@
|
||||||
#define PATH_SEP '/'
|
#define PATH_SEP '/'
|
||||||
|
|
||||||
#if defined(DEBUG) && !defined(DEBUG_PRINTF)
|
#if defined(DEBUG) && !defined(DEBUG_PRINTF)
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \
|
#include <string.h>
|
||||||
strrchr(__FILE__, PATH_SEP) + 1, \
|
#define DEBUG_PRINTF(format, ...) \
|
||||||
__func__, __LINE__, ## __VA_ARGS__)
|
printf("%s:%s:%d:" format, strrchr(__FILE__, PATH_SEP) + 1, __func__, \
|
||||||
|
__LINE__, ##__VA_ARGS__)
|
||||||
#elif !defined(DEBUG_PRINTF)
|
#elif !defined(DEBUG_PRINTF)
|
||||||
#define DEBUG_PRINTF(format, ...) do { } while(0)
|
#define DEBUG_PRINTF(format, ...) \
|
||||||
|
do { \
|
||||||
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool is_valid_true_atom(const u8 * loc) {
|
really_inline bool is_valid_true_atom(const u8 *loc) {
|
||||||
u64 tv = *(const u64 *)"true ";
|
u64 tv = *(const u64 *)"true ";
|
||||||
u64 mask4 = 0x00000000ffffffff;
|
u64 mask4 = 0x00000000ffffffff;
|
||||||
u32 error = 0;
|
u32 error = 0;
|
||||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||||
std::memcpy(&locval, loc, sizeof(u64));
|
std::memcpy(&locval, loc, sizeof(u64));
|
||||||
error = (locval & mask4) ^ tv;
|
error = (locval & mask4) ^ tv;
|
||||||
error |= is_not_structural_or_whitespace(loc[4]);
|
error |= is_not_structural_or_whitespace(loc[4]);
|
||||||
return error == 0;
|
return error == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool is_valid_false_atom(const u8 * loc) {
|
really_inline bool is_valid_false_atom(const u8 *loc) {
|
||||||
u64 fv = *(const u64 *)"false ";
|
u64 fv = *(const u64 *)"false ";
|
||||||
u64 mask5 = 0x000000ffffffffff;
|
u64 mask5 = 0x000000ffffffffff;
|
||||||
u32 error = 0;
|
u32 error = 0;
|
||||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||||
std::memcpy(&locval, loc, sizeof(u64));
|
std::memcpy(&locval, loc, sizeof(u64));
|
||||||
error = (locval & mask5) ^ fv;
|
error = (locval & mask5) ^ fv;
|
||||||
error |= is_not_structural_or_whitespace(loc[5]);
|
error |= is_not_structural_or_whitespace(loc[5]);
|
||||||
return error == 0;
|
return error == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
really_inline bool is_valid_null_atom(const u8 * loc) {
|
really_inline bool is_valid_null_atom(const u8 *loc) {
|
||||||
u64 nv = *(const u64 *)"null ";
|
u64 nv = *(const u64 *)"null ";
|
||||||
u64 mask4 = 0x00000000ffffffff;
|
u64 mask4 = 0x00000000ffffffff;
|
||||||
u32 error = 0;
|
u32 error = 0;
|
||||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||||
std::memcpy(&locval, loc, sizeof(u64));
|
std::memcpy(&locval, loc, sizeof(u64));
|
||||||
error = (locval & mask4) ^ nv;
|
error = (locval & mask4) ^ nv;
|
||||||
error |= is_not_structural_or_whitespace(loc[4]);
|
error |= is_not_structural_or_whitespace(loc[4]);
|
||||||
return error == 0;
|
return error == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implemented using Labels as Values which works in GCC and CLANG (and maybe also in Intel's compiler),
|
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
|
||||||
// but won't work in MSVC. This would need to be reimplemented differently
|
// also in Intel's compiler), but won't work in MSVC. This would need to be
|
||||||
// if one wants to be standard compliant.
|
// reimplemented differently if one wants to be standard compliant.
|
||||||
WARN_UNUSED
|
WARN_UNUSED
|
||||||
bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
|
bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
|
||||||
u32 i = 0; // index of the structural character (0,1,2,3...)
|
u32 i = 0; // index of the structural character (0,1,2,3...)
|
||||||
u32 idx; // location of the structural character in the input (buf)
|
u32 idx; // location of the structural character in the input (buf)
|
||||||
u8 c; // used to track the (structural) character we are looking at, updated by UPDATE_CHAR macro
|
u8 c; // used to track the (structural) character we are looking at, updated
|
||||||
u32 depth = 0;//START_DEPTH; // an arbitrary starting depth
|
// by UPDATE_CHAR macro
|
||||||
//void * ret_address[MAX_DEPTH]; // used to store "labels as value" (non-standard compiler extension)
|
u32 depth = 0; // could have an arbitrary starting depth
|
||||||
|
pj.init();
|
||||||
// a call site is the start of either an object or an array ('[' or '{')
|
|
||||||
// this is the location of the previous call site
|
|
||||||
// (in the tape, at the given depth);
|
|
||||||
// we only need one.
|
|
||||||
|
|
||||||
// We should also track the tape address of our containing
|
|
||||||
// scope for two reasons. First, we will need to put an
|
|
||||||
// up pointer there at each call site so we can navigate
|
|
||||||
// upwards. Second, when we encounter the end of the scope
|
|
||||||
// we can put the current offset into a record for the
|
|
||||||
// scope so we know where it is
|
|
||||||
|
|
||||||
//u32 containing_scope_offset[MAX_DEPTH];
|
|
||||||
|
|
||||||
pj.init();
|
|
||||||
|
|
||||||
// add a sentinel to the end to avoid premature exit
|
|
||||||
// need to be able to find the \0 at the 'padded length' end of the buffer
|
|
||||||
// FIXME: TERRIFYING!
|
|
||||||
//size_t j;
|
|
||||||
//for (j = len; buf[j] != 0; j++)
|
|
||||||
// ;
|
|
||||||
//pj.structural_indexes[pj.n_structural_indexes++] = j;
|
|
||||||
|
|
||||||
// this macro reads the next structural character, updating idx, i and c.
|
// this macro reads the next structural character, updating idx, i and c.
|
||||||
#define UPDATE_CHAR() { idx = pj.structural_indexes[i++]; c = buf[idx]; DEBUG_PRINTF("Got %c at %d (%d offset)\n", c, idx, i-1);}
|
#define UPDATE_CHAR() \
|
||||||
|
{ \
|
||||||
|
idx = pj.structural_indexes[i++]; \
|
||||||
|
c = buf[idx]; \
|
||||||
|
DEBUG_PRINTF("Got %c at %d (%d offset) (depth %d)\n", c, idx, i - 1, \
|
||||||
|
depth); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////// START STATE /////////////////////////////
|
||||||
|
DEBUG_PRINTF("at start\n");
|
||||||
|
pj.ret_address[depth] = &&start_continue;
|
||||||
|
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||||
////////////////////////////// START STATE /////////////////////////////
|
pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
|
||||||
printf("at start\n");
|
depth++; // everything starts at depth = 1, depth = 0 is just for the root
|
||||||
DEBUG_PRINTF("at start\n");
|
if (depth > pj.depthcapacity) {
|
||||||
pj.ret_address[depth] = &&start_continue;
|
goto fail;
|
||||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
}
|
||||||
pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
|
UPDATE_CHAR();
|
||||||
depth++;// everything starts at depth = 1, depth = 0 is just for the root
|
switch (c) {
|
||||||
if(depth > pj.depthcapacity) {
|
case '{':
|
||||||
goto fail;
|
goto object_begin;
|
||||||
}
|
case '[':
|
||||||
printf("got char %c \n",c);
|
goto array_begin;
|
||||||
UPDATE_CHAR();
|
|
||||||
switch (c) {
|
|
||||||
case '{': goto object_begin;
|
|
||||||
case '[': goto array_begin;
|
|
||||||
#define SIMDJSON_ALLOWANYTHINGINROOT
|
#define SIMDJSON_ALLOWANYTHINGINROOT
|
||||||
// A JSON text is a serialized value. Note that certain previous
|
// A JSON text is a serialized value. Note that certain previous
|
||||||
// specifications of JSON constrained a JSON text to be an object or an
|
// specifications of JSON constrained a JSON text to be an object or an
|
||||||
// array. Implementations that generate only objects or arrays where a
|
// array. Implementations that generate only objects or arrays where a
|
||||||
// JSON text is called for will be interoperable in the sense that all
|
// JSON text is called for will be interoperable in the sense that all
|
||||||
// implementations will accept these as conforming JSON texts.
|
// implementations will accept these as conforming JSON texts.
|
||||||
// https://tools.ietf.org/html/rfc8259
|
// https://tools.ietf.org/html/rfc8259
|
||||||
#ifdef SIMDJSON_ALLOWANYTHINGINROOT
|
#ifdef SIMDJSON_ALLOWANYTHINGINROOT
|
||||||
case '"': {
|
case '"': {
|
||||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
|
||||||
goto start_continue;
|
|
||||||
}
|
|
||||||
case 't':
|
|
||||||
if (!is_valid_true_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
goto start_continue;
|
|
||||||
case 'f':
|
|
||||||
if (!is_valid_false_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
goto start_continue;
|
|
||||||
case 'n':
|
|
||||||
if (!is_valid_null_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
goto start_continue;
|
|
||||||
case '0': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
goto start_continue;
|
|
||||||
}
|
|
||||||
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
goto start_continue;
|
|
||||||
}
|
|
||||||
case '-': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
goto start_continue;
|
|
||||||
}
|
|
||||||
#endif // ALLOWANYTHINGINROOT
|
|
||||||
default: goto fail;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 't':
|
||||||
|
if (!is_valid_true_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
if (!is_valid_false_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
if (!is_valid_null_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case '0': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case '1':
|
||||||
|
case '2':
|
||||||
|
case '3':
|
||||||
|
case '4':
|
||||||
|
case '5':
|
||||||
|
case '6':
|
||||||
|
case '7':
|
||||||
|
case '8':
|
||||||
|
case '9': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case '-': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif // ALLOWANYTHINGINROOT
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
#ifdef SIMDJSON_ALLOWANYTHINGINROOT
|
||||||
|
depth--; // for fall-through cases (e.g., documents containing just a string)
|
||||||
|
#endif // ALLOWANYTHINGINROOT
|
||||||
|
|
||||||
start_continue:
|
start_continue:
|
||||||
DEBUG_PRINTF("in start_object_close\n");
|
DEBUG_PRINTF("in start_object_close\n");
|
||||||
UPDATE_CHAR();
|
UPDATE_CHAR();
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 0: goto succeed;
|
case 0:
|
||||||
default: goto fail;
|
goto succeed;
|
||||||
}
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////// OBJECT STATES /////////////////////////////
|
////////////////////////////// OBJECT STATES /////////////////////////////
|
||||||
|
|
||||||
object_begin:
|
object_begin:
|
||||||
printf("in object_begin %c \n",c);
|
DEBUG_PRINTF("in object_begin\n");
|
||||||
DEBUG_PRINTF("in object_begin\n");
|
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
pj.write_tape(0, c);
|
||||||
pj.write_tape(0, c);
|
|
||||||
depth ++;
|
UPDATE_CHAR();
|
||||||
if(depth > pj.depthcapacity) {
|
switch (c) {
|
||||||
goto fail;
|
case '"': {
|
||||||
}
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
UPDATE_CHAR();
|
goto fail;
|
||||||
switch (c) {
|
|
||||||
case '"': {
|
|
||||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
goto object_key_state;
|
|
||||||
}
|
|
||||||
case '}': goto scope_end;
|
|
||||||
default: goto fail;
|
|
||||||
}
|
}
|
||||||
|
goto object_key_state;
|
||||||
|
}
|
||||||
|
case '}':
|
||||||
|
goto scope_end; // could also go to object_continue
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
object_key_state:
|
object_key_state:
|
||||||
printf("in object_key_state %c \n",c);
|
DEBUG_PRINTF("in object_key_state\n");
|
||||||
|
UPDATE_CHAR();
|
||||||
|
if (c != ':') {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
UPDATE_CHAR();
|
||||||
|
switch (c) {
|
||||||
|
case '"': {
|
||||||
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 't':
|
||||||
|
if (!is_valid_true_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
if (!is_valid_false_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
if (!is_valid_null_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case '0': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case '1':
|
||||||
|
case '2':
|
||||||
|
case '3':
|
||||||
|
case '4':
|
||||||
|
case '5':
|
||||||
|
case '6':
|
||||||
|
case '7':
|
||||||
|
case '8':
|
||||||
|
case '9': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case '-': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case '{': {
|
||||||
|
// we have not yet encountered } so we need to come back for it
|
||||||
|
pj.ret_address[depth] = &&object_continue;
|
||||||
|
// we found an object inside an object, so we need to increment the depth
|
||||||
|
depth++;
|
||||||
|
if (depth > pj.depthcapacity) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("in object_key_state\n");
|
goto object_begin;
|
||||||
UPDATE_CHAR();
|
}
|
||||||
if (c != ':') {
|
case '[': {
|
||||||
goto fail;
|
// we have not yet encountered } so we need to come back for it
|
||||||
}
|
pj.ret_address[depth] = &&object_continue;
|
||||||
UPDATE_CHAR();
|
// we found an array inside an object, so we need to increment the depth
|
||||||
switch (c) {
|
depth++;
|
||||||
case '"': {
|
if (depth > pj.depthcapacity) {
|
||||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
goto fail;
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 't': if (!is_valid_true_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
break;
|
|
||||||
case 'f': if (!is_valid_false_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
break;
|
|
||||||
case 'n': if (!is_valid_null_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
break;
|
|
||||||
case '0': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case '-': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case '{': {
|
|
||||||
pj.ret_address[depth] = &&object_continue;
|
|
||||||
goto object_begin;
|
|
||||||
}
|
|
||||||
case '[': {
|
|
||||||
pj.ret_address[depth] = &&object_continue;
|
|
||||||
goto array_begin;
|
|
||||||
}
|
|
||||||
default: goto fail;
|
|
||||||
}
|
}
|
||||||
|
goto array_begin;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
object_continue:
|
object_continue:
|
||||||
printf("in object_continue %c \n",c);
|
DEBUG_PRINTF("in object_continue\n");
|
||||||
|
UPDATE_CHAR();
|
||||||
DEBUG_PRINTF("in object_continue\n");
|
switch (c) {
|
||||||
|
case ',':
|
||||||
UPDATE_CHAR();
|
UPDATE_CHAR();
|
||||||
switch (c) {
|
if (c != '"') {
|
||||||
case ',':
|
goto fail;
|
||||||
UPDATE_CHAR();
|
} else {
|
||||||
if (c != '"') {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
} else {
|
}
|
||||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
goto object_key_state;
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
goto object_key_state;
|
|
||||||
}
|
|
||||||
case '}': goto scope_end;
|
|
||||||
default: goto fail;
|
|
||||||
}
|
}
|
||||||
|
case '}':
|
||||||
|
goto scope_end;
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////// COMMON STATE /////////////////////////////
|
////////////////////////////// COMMON STATE /////////////////////////////
|
||||||
|
|
||||||
scope_end:
|
scope_end:
|
||||||
// write our tape location to the header scope
|
// write our tape location to the header scope
|
||||||
depth--;
|
depth--;
|
||||||
pj.write_tape(pj.containing_scope_offset[depth], c);
|
pj.write_tape(pj.containing_scope_offset[depth], c);
|
||||||
pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
|
pj.annotate_previousloc(pj.containing_scope_offset[depth],
|
||||||
// goto saved_state
|
pj.get_current_loc());
|
||||||
goto *pj.ret_address[depth];
|
// goto saved_state
|
||||||
|
goto *pj.ret_address[depth];
|
||||||
|
|
||||||
////////////////////////////// ARRAY STATES /////////////////////////////
|
|
||||||
|
|
||||||
|
////////////////////////////// ARRAY STATES /////////////////////////////
|
||||||
array_begin:
|
array_begin:
|
||||||
printf("in array_begin %c \n",c);
|
DEBUG_PRINTF("in array_begin\n");
|
||||||
|
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||||
DEBUG_PRINTF("in array_begin\n");
|
pj.write_tape(0, c);
|
||||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
UPDATE_CHAR();
|
||||||
pj.write_tape(0, c);
|
if (c == ']') {
|
||||||
depth ++;
|
goto scope_end; // could also go to array_continue
|
||||||
if(depth > pj.depthcapacity) {
|
}
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
UPDATE_CHAR();
|
|
||||||
if (c == ']') {
|
|
||||||
goto scope_end;
|
|
||||||
}
|
|
||||||
|
|
||||||
main_array_switch:
|
main_array_switch:
|
||||||
// we call update char on all paths in, so we can peek at c on the
|
// we call update char on all paths in, so we can peek at c on the
|
||||||
// on paths that can accept a close square brace (post-, and at start)
|
// on paths that can accept a close square brace (post-, and at start)
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '"': {
|
case '"': {
|
||||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
|
||||||
goto array_continue;
|
|
||||||
}
|
|
||||||
case 't': if (!is_valid_true_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
break;
|
|
||||||
case 'f': if (!is_valid_false_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
break;
|
|
||||||
case 'n': if (!is_valid_null_atom(buf + idx)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
pj.write_tape(0, c);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '0': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case '-': {
|
|
||||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case '{': {
|
|
||||||
pj.ret_address[depth] = &&array_continue;
|
|
||||||
goto object_begin;
|
|
||||||
}
|
|
||||||
case '[': {
|
|
||||||
pj.ret_address[depth] = &&array_continue;
|
|
||||||
goto array_begin;
|
|
||||||
}
|
|
||||||
default: goto fail;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 't':
|
||||||
|
if (!is_valid_true_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
if (!is_valid_false_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
if (!is_valid_null_atom(buf + idx)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pj.write_tape(0, c);
|
||||||
|
break; // goto array_continue;
|
||||||
|
|
||||||
|
case '0': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break; // goto array_continue;
|
||||||
|
}
|
||||||
|
case '1':
|
||||||
|
case '2':
|
||||||
|
case '3':
|
||||||
|
case '4':
|
||||||
|
case '5':
|
||||||
|
case '6':
|
||||||
|
case '7':
|
||||||
|
case '8':
|
||||||
|
case '9': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break; // goto array_continue;
|
||||||
|
}
|
||||||
|
case '-': {
|
||||||
|
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break; // goto array_continue;
|
||||||
|
}
|
||||||
|
case '{': {
|
||||||
|
// we have not yet encountered ] so we need to come back for it
|
||||||
|
pj.ret_address[depth] = &&array_continue;
|
||||||
|
|
||||||
|
// we found an object inside an array, so we need to increment the depth
|
||||||
|
depth++;
|
||||||
|
if (depth > pj.depthcapacity) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto object_begin;
|
||||||
|
}
|
||||||
|
case '[': {
|
||||||
|
// we have not yet encountered ] so we need to come back for it
|
||||||
|
pj.ret_address[depth] = &&array_continue;
|
||||||
|
|
||||||
|
// we found an array inside an array, so we need to increment the depth
|
||||||
|
depth++;
|
||||||
|
if (depth > pj.depthcapacity) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto array_begin;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
array_continue:
|
array_continue:
|
||||||
printf("in array_begin %c \n",c);
|
DEBUG_PRINTF("in array_continue\n");
|
||||||
|
UPDATE_CHAR();
|
||||||
DEBUG_PRINTF("in array_continue\n");
|
switch (c) {
|
||||||
|
case ',':
|
||||||
UPDATE_CHAR();
|
UPDATE_CHAR();
|
||||||
switch (c) {
|
goto main_array_switch;
|
||||||
case ',': UPDATE_CHAR(); goto main_array_switch;
|
case ']':
|
||||||
case ']': goto scope_end;
|
goto scope_end;
|
||||||
default: goto fail;
|
default:
|
||||||
}
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////// FINAL STATES /////////////////////////////
|
////////////////////////////// FINAL STATES /////////////////////////////
|
||||||
|
|
||||||
succeed:
|
succeed:
|
||||||
DEBUG_PRINTF("in succeed\n");
|
DEBUG_PRINTF("in succeed, depth = %d \n", depth);
|
||||||
// we annotate the root node
|
// we annotate the root node
|
||||||
depth--;
|
// depth--;
|
||||||
// next line allows us to go back to the start
|
// next line allows us to go back to the start
|
||||||
pj.write_tape(pj.containing_scope_offset[depth], 'r');// r is root
|
pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
|
||||||
// next line tells the root node how to go to the end
|
// next line tells the root node how to go to the end
|
||||||
pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
|
pj.annotate_previousloc(pj.containing_scope_offset[depth],
|
||||||
|
pj.get_current_loc());
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
pj.dump_tapes();
|
pj.dump_tapes();
|
||||||
#endif
|
#endif
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
DEBUG_PRINTF("in fail\n");
|
DEBUG_PRINTF("in fail\n");
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
pj.dump_tapes();
|
pj.dump_tapes();
|
||||||
#endif
|
#endif
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "jsonparser/jsonparser.h"
|
#include "jsonparser/jsonparser.h"
|
||||||
|
|
||||||
|
@ -30,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
|
||||||
bool fastjson_parse(const char *input) {
|
bool fastjson_parse(const char *input) {
|
||||||
fastjson::Token token;
|
fastjson::Token token;
|
||||||
fastjson::dom::Chunk chunk;
|
fastjson::dom::Chunk chunk;
|
||||||
std::string error_message;
|
|
||||||
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
|
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
|
||||||
}
|
}
|
||||||
// end of fastjson stuff
|
// end of fastjson stuff
|
||||||
|
@ -41,17 +41,30 @@ using namespace rapidjson;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
if (argc < 2) {
|
bool verbose = false;
|
||||||
|
int c;
|
||||||
|
while ((c = getopt (argc, argv, "v")) != -1)
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 'v':
|
||||||
|
verbose = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
if (optind >= argc) {
|
||||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
bool verbose = false;
|
const char * filename = argv[optind];
|
||||||
if (argc > 2) {
|
std::pair<u8 *, size_t> p;
|
||||||
if (strcmp(argv[1], "-v"))
|
try {
|
||||||
verbose = true;
|
p = get_corpus(filename);
|
||||||
|
} catch (const std::exception& e) { // caught by reference to base
|
||||||
|
std::cout << "Could not load the file " << filename << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
std::cout << "Input has ";
|
std::cout << "Input has ";
|
||||||
if (p.second > 1024 * 1024)
|
if (p.second > 1024 * 1024)
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "jsonparser/jsonparser.h"
|
#include "jsonparser/jsonparser.h"
|
||||||
|
|
||||||
|
@ -41,6 +42,8 @@ bool validate(const char *dirname) {
|
||||||
printf("nothing in dir %s \n", dirname);
|
printf("nothing in dir %s \n", dirname);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
bool * isfileasexpected = new bool[c];
|
||||||
|
for(int i = 0; i < c; i++) isfileasexpected[i] = true;
|
||||||
size_t howmany = 0;
|
size_t howmany = 0;
|
||||||
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
||||||
for (int i = 0; i < c; i++) {
|
for (int i = 0; i < c; i++) {
|
||||||
|
@ -56,7 +59,13 @@ bool validate(const char *dirname) {
|
||||||
} else {
|
} else {
|
||||||
strcpy(fullpath + dirlen, name);
|
strcpy(fullpath + dirlen, name);
|
||||||
}
|
}
|
||||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
std::pair<u8 *, size_t> p;
|
||||||
|
try {
|
||||||
|
p = get_corpus(fullpath);
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||||
if(pj_ptr == NULL) {
|
if(pj_ptr == NULL) {
|
||||||
std::cerr<< "can't allocate memory"<<std::endl;
|
std::cerr<< "can't allocate memory"<<std::endl;
|
||||||
|
@ -70,11 +79,13 @@ bool validate(const char *dirname) {
|
||||||
howmany--;
|
howmany--;
|
||||||
} else if (startsWith("pass", name)) {
|
} else if (startsWith("pass", name)) {
|
||||||
if (!isok) {
|
if (!isok) {
|
||||||
|
isfileasexpected[i] = false;
|
||||||
printf("warning: file %s should pass but it fails.\n", name);
|
printf("warning: file %s should pass but it fails.\n", name);
|
||||||
everythingfine = false;
|
everythingfine = false;
|
||||||
}
|
}
|
||||||
} else if (startsWith("fail", name)) {
|
} else if (startsWith("fail", name)) {
|
||||||
if (isok) {
|
if (isok) {
|
||||||
|
isfileasexpected[i] = false;
|
||||||
printf("warning: file %s should fail but it passes.\n", name);
|
printf("warning: file %s should fail but it passes.\n", name);
|
||||||
everythingfine = false;
|
everythingfine = false;
|
||||||
}
|
}
|
||||||
|
@ -87,11 +98,20 @@ bool validate(const char *dirname) {
|
||||||
deallocate_ParsedJson(pj_ptr);
|
deallocate_ParsedJson(pj_ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
printf("%zu files checked.\n", howmany);
|
||||||
|
if(everythingfine) {
|
||||||
|
printf("All ok!\n");
|
||||||
|
} else {
|
||||||
|
printf("There were problems! Consider reviewing the following files:\n");
|
||||||
|
for(int i = 0; i < c; i++) {
|
||||||
|
if(!isfileasexpected[i]) printf("%s \n", entry_list[i]->d_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
for (int i = 0; i < c; ++i)
|
for (int i = 0; i < c; ++i)
|
||||||
free(entry_list[i]);
|
free(entry_list[i]);
|
||||||
free(entry_list);
|
free(entry_list);
|
||||||
printf("%zu files checked.\n", howmany);
|
delete[] isfileasexpected;
|
||||||
if(everythingfine) printf("All ok!\n");
|
|
||||||
return everythingfine;
|
return everythingfine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
|
||||||
size_t lenpre = strlen(pre), lenstr = strlen(str);
|
size_t lenpre = strlen(pre), lenstr = strlen(str);
|
||||||
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
|
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
|
||||||
}
|
}
|
||||||
bool is_in_bad_list(char *buf) {
|
bool is_in_bad_list(const char *buf) {
|
||||||
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
|
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
|
||||||
if (startsWith(really_bad[i], buf))
|
if (startsWith(really_bad[i], buf))
|
||||||
return true;
|
return true;
|
||||||
|
@ -38,9 +38,9 @@ bool is_in_bad_list(char *buf) {
|
||||||
inline void foundInvalidNumber(const u8 *buf) {
|
inline void foundInvalidNumber(const u8 *buf) {
|
||||||
invalid_count++;
|
invalid_count++;
|
||||||
char *endptr;
|
char *endptr;
|
||||||
double expected = strtod((char *)buf, &endptr);
|
double expected = strtod((const char *)buf, &endptr);
|
||||||
if (endptr != (char *)buf) {
|
if (endptr != (const char *)buf) {
|
||||||
if (!is_in_bad_list((char *)buf)) {
|
if (!is_in_bad_list((const char *)buf)) {
|
||||||
printf(
|
printf(
|
||||||
"Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
|
"Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
|
||||||
buf, expected);
|
buf, expected);
|
||||||
|
@ -53,8 +53,8 @@ inline void foundInvalidNumber(const u8 *buf) {
|
||||||
inline void foundInteger(int64_t result, const u8 *buf) {
|
inline void foundInteger(int64_t result, const u8 *buf) {
|
||||||
int_count++;
|
int_count++;
|
||||||
char *endptr;
|
char *endptr;
|
||||||
long long expected = strtoll((char *)buf, &endptr, 10);
|
long long expected = strtoll((const char *)buf, &endptr, 10);
|
||||||
if ((endptr == (char *)buf) || (expected != result)) {
|
if ((endptr == (const char *)buf) || (expected != result)) {
|
||||||
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
|
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
|
||||||
printf(" while parsing %s \n", fullpath);
|
printf(" while parsing %s \n", fullpath);
|
||||||
parse_error |= PARSE_ERROR;
|
parse_error |= PARSE_ERROR;
|
||||||
|
@ -64,8 +64,8 @@ inline void foundInteger(int64_t result, const u8 *buf) {
|
||||||
inline void foundFloat(double result, const u8 *buf) {
|
inline void foundFloat(double result, const u8 *buf) {
|
||||||
char *endptr;
|
char *endptr;
|
||||||
float_count++;
|
float_count++;
|
||||||
double expected = strtod((char *)buf, &endptr);
|
double expected = strtod((const char *)buf, &endptr);
|
||||||
if (endptr == (char *)buf) {
|
if (endptr == (const char *)buf) {
|
||||||
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||||
result, buf);
|
result, buf);
|
||||||
printf(" while parsing %s \n", fullpath);
|
printf(" while parsing %s \n", fullpath);
|
||||||
|
@ -123,7 +123,13 @@ bool validate(const char *dirname) {
|
||||||
} else {
|
} else {
|
||||||
strcpy(fullpath + dirlen, name);
|
strcpy(fullpath + dirlen, name);
|
||||||
}
|
}
|
||||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
std::pair<u8 *, size_t> p;
|
||||||
|
try {
|
||||||
|
p = get_corpus(fullpath);
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
// terrible hack but just to get it working
|
// terrible hack but just to get it working
|
||||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||||
if (pj_ptr == NULL) {
|
if (pj_ptr == NULL) {
|
||||||
|
|
|
@ -241,7 +241,7 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
|
||||||
// we have a zero-length string
|
// we have a zero-length string
|
||||||
if (parsed_begin != parsed_end) {
|
if (parsed_begin != parsed_end) {
|
||||||
printf("WARNING: We have a zero-length but gap is %zu \n",
|
printf("WARNING: We have a zero-length but gap is %zu \n",
|
||||||
parsed_end - parsed_begin);
|
(size_t)(parsed_end - parsed_begin));
|
||||||
probable_bug = true;
|
probable_bug = true;
|
||||||
}
|
}
|
||||||
empty_string++;
|
empty_string++;
|
||||||
|
@ -252,12 +252,12 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
|
||||||
printf("WARNING: lengths on parsed strings disagree %zu %zu \n", thislen,
|
printf("WARNING: lengths on parsed strings disagree %zu %zu \n", thislen,
|
||||||
len);
|
len);
|
||||||
printf("\nour parsed string : '%*s'\n\n", (int)thislen,
|
printf("\nour parsed string : '%*s'\n\n", (int)thislen,
|
||||||
(char *)parsed_begin);
|
(const char *)parsed_begin);
|
||||||
print_hex((char *)parsed_begin, thislen);
|
print_hex((const char *)parsed_begin, thislen);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
printf("reference parsing :'%*s'\n\n", (int)len, bigbuffer);
|
printf("reference parsing :'%*s'\n\n", (int)len, bigbuffer);
|
||||||
print_hex((char *)bigbuffer, len);
|
print_hex((const char *)bigbuffer, len);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
probable_bug = true;
|
probable_bug = true;
|
||||||
|
@ -267,15 +267,15 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
|
||||||
printf("Lengths %zu %zu \n", thislen, len);
|
printf("Lengths %zu %zu \n", thislen, len);
|
||||||
|
|
||||||
printf("\nour parsed string : '%*s'\n", (int)thislen,
|
printf("\nour parsed string : '%*s'\n", (int)thislen,
|
||||||
(char *)parsed_begin);
|
(const char *)parsed_begin);
|
||||||
print_hex((char *)parsed_begin, thislen);
|
print_hex((const char *)parsed_begin, thislen);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
printf("reference parsing :'%*s'\n", (int)len, bigbuffer);
|
printf("reference parsing :'%*s'\n", (int)len, bigbuffer);
|
||||||
print_hex((char *)bigbuffer, len);
|
print_hex((const char *)bigbuffer, len);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
print_cmp_hex((char *)parsed_begin, bigbuffer, thislen);
|
print_cmp_hex((const char *)parsed_begin, bigbuffer, thislen);
|
||||||
|
|
||||||
probable_bug = true;
|
probable_bug = true;
|
||||||
}
|
}
|
||||||
|
@ -325,8 +325,13 @@ bool validate(const char *dirname) {
|
||||||
} else {
|
} else {
|
||||||
strcpy(fullpath + dirlen, name);
|
strcpy(fullpath + dirlen, name);
|
||||||
}
|
}
|
||||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
std::pair<u8 *, size_t> p;
|
||||||
// terrible hack but just to get it working
|
try {
|
||||||
|
p = get_corpus(fullpath);
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||||
if (pj_ptr == NULL) {
|
if (pj_ptr == NULL) {
|
||||||
std::cerr << "can't allocate memory" << std::endl;
|
std::cerr << "can't allocate memory" << std::endl;
|
||||||
|
|
Loading…
Reference in New Issue