Lots and lots of cleaning.
This commit is contained in:
parent
5fae7b2100
commit
a43b0772e1
|
@ -21,7 +21,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
|||
std::vector<uint64_t> ids;
|
||||
|
||||
public:
|
||||
LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
||||
explicit LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
||||
memset(&attribs, 0, sizeof(attribs));
|
||||
attribs.type = TYPE;
|
||||
attribs.size = sizeof(attribs);
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "benchmark.h"
|
||||
|
@ -13,6 +14,7 @@
|
|||
#include "rapidjson/writer.h"
|
||||
#include "sajson.h"
|
||||
|
||||
|
||||
using namespace rapidjson;
|
||||
using namespace std;
|
||||
|
||||
|
@ -43,17 +45,29 @@ std::string rapidstringme(char *json) {
|
|||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||
int c;
|
||||
bool verbose = false;
|
||||
while ((c = getopt (argc, argv, "v")) != -1)
|
||||
switch (c)
|
||||
{
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
if (optind >= argc) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
bool verbose = false;
|
||||
if (argc > 2) {
|
||||
if (strcmp(argv[1], "-v"))
|
||||
verbose = true;
|
||||
const char * filename = argv[optind];
|
||||
pair<u8 *, size_t> p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
} catch (const std::exception& e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
|
||||
if (verbose) {
|
||||
std::cout << "Input has ";
|
||||
if (p.second > 1024 * 1024)
|
||||
|
|
|
@ -31,79 +31,14 @@
|
|||
#include "jsonparser/stage34_unified.h"
|
||||
using namespace std;
|
||||
|
||||
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
||||
namespace Color {
|
||||
enum Code {
|
||||
FG_DEFAULT = 39,
|
||||
FG_BLACK = 30,
|
||||
FG_RED = 31,
|
||||
FG_GREEN = 32,
|
||||
FG_YELLOW = 33,
|
||||
FG_BLUE = 34,
|
||||
FG_MAGENTA = 35,
|
||||
FG_CYAN = 36,
|
||||
FG_LIGHT_GRAY = 37,
|
||||
FG_DARK_GRAY = 90,
|
||||
FG_LIGHT_RED = 91,
|
||||
FG_LIGHT_GREEN = 92,
|
||||
FG_LIGHT_YELLOW = 93,
|
||||
FG_LIGHT_BLUE = 94,
|
||||
FG_LIGHT_MAGENTA = 95,
|
||||
FG_LIGHT_CYAN = 96,
|
||||
FG_WHITE = 97,
|
||||
BG_RED = 41,
|
||||
BG_GREEN = 42,
|
||||
BG_BLUE = 44,
|
||||
BG_DEFAULT = 49
|
||||
};
|
||||
class Modifier {
|
||||
Code code;
|
||||
|
||||
public:
|
||||
Modifier(Code pCode) : code(pCode) {}
|
||||
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
|
||||
return os << "\033[" << mod.code << "m";
|
||||
}
|
||||
};
|
||||
} // namespace Color
|
||||
|
||||
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
|
||||
Color::Modifier greenfg(Color::FG_GREEN);
|
||||
Color::Modifier yellowfg(Color::FG_YELLOW);
|
||||
Color::Modifier deffg(Color::FG_DEFAULT);
|
||||
size_t i = 0;
|
||||
// skip initial fluff
|
||||
while ((i + 1 < pj.n_structural_indexes) &&
|
||||
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
|
||||
i++;
|
||||
}
|
||||
for (; i < pj.n_structural_indexes; i++) {
|
||||
u32 idx = pj.structural_indexes[i];
|
||||
u8 c = buf[idx];
|
||||
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
|
||||
std::cout << greenfg << buf[idx] << deffg;
|
||||
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
|
||||
std::cout << greenfg << buf[idx] << deffg;
|
||||
} else {
|
||||
std::cout << yellowfg << buf[idx] << deffg;
|
||||
}
|
||||
if (i + 1 < pj.n_structural_indexes) {
|
||||
u32 nextidx = pj.structural_indexes[i + 1];
|
||||
for (u32 pos = idx + 1; pos < nextidx; pos++) {
|
||||
std::cout << buf[pos];
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
bool verbose = false;
|
||||
bool dump = false;
|
||||
bool forceoneiteration = false;
|
||||
|
||||
int c;
|
||||
|
||||
while ((c = getopt (argc, argv, "vd")) != -1)
|
||||
while ((c = getopt (argc, argv, "1vd")) != -1)
|
||||
switch (c)
|
||||
{
|
||||
case 'v':
|
||||
|
@ -112,6 +47,9 @@ int main(int argc, char *argv[]) {
|
|||
case 'd':
|
||||
dump = true;
|
||||
break;
|
||||
case '1':
|
||||
forceoneiteration = true;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
@ -124,7 +62,13 @@ int main(int argc, char *argv[]) {
|
|||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
}
|
||||
if(verbose) cout << "[verbose] loading " << filename << endl;
|
||||
pair<u8 *, size_t> p = get_corpus(filename);
|
||||
pair<u8 *, size_t> p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
} catch (const std::exception& e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
|
||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||
ParsedJson &pj(*pj_ptr);
|
||||
|
@ -133,7 +77,7 @@ int main(int argc, char *argv[]) {
|
|||
#if defined(DEBUG)
|
||||
const u32 iterations = 1;
|
||||
#else
|
||||
const u32 iterations = p.second < 1 * 1000 * 1000? 1000 : 10;
|
||||
const u32 iterations = forceoneiteration ? 1 : ( p.second < 1 * 1000 * 1000? 1000 : 10);
|
||||
#endif
|
||||
vector<double> res;
|
||||
res.resize(iterations);
|
||||
|
@ -174,7 +118,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
unified.start();
|
||||
#endif
|
||||
isok = flatten_indexes(p.second, pj);
|
||||
isok = isok && flatten_indexes(p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy2 += results[0];
|
||||
|
@ -187,7 +131,7 @@ int main(int argc, char *argv[]) {
|
|||
unified.start();
|
||||
#endif
|
||||
|
||||
isok = unified_machine(p.first, p.second, pj);
|
||||
isok = isok && unified_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy3 += results[0];
|
||||
|
|
|
@ -31,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
|
|||
bool fastjson_parse(const char *input) {
|
||||
fastjson::Token token;
|
||||
fastjson::dom::Chunk chunk;
|
||||
std::string error_message;
|
||||
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
|
||||
}
|
||||
// end of fastjson stuff
|
||||
|
@ -62,7 +61,14 @@ int main(int argc, char *argv[]) {
|
|||
if(optind + 1 < argc) {
|
||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
}
|
||||
pair<u8 *, size_t> p = get_corpus(filename);
|
||||
pair<u8 *, size_t> p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
} catch (const std::exception& e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
std::cout << "Input has ";
|
||||
if (p.second > 1024 * 1024)
|
||||
|
|
|
@ -20,6 +20,13 @@ char * allocate_aligned_buffer(size_t length);
|
|||
// first element of the pair is a string (null terminated)
|
||||
// whereas the second element is the length.
|
||||
// caller is responsible to free (free std::pair<u8 *, size_t>.first)
|
||||
//
|
||||
// throws an exception if the file cannot be opened, use try/catch
|
||||
// try {
|
||||
// p = get_corpus(filename);
|
||||
// } catch (const std::exception& e) {
|
||||
// std::cout << "Could not load the file " << filename << std::endl;
|
||||
// }
|
||||
std::pair<u8 *, size_t> get_corpus(std::string filename);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -128,7 +128,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
|||
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
|
||||
const __m128i mul_1_10000 =
|
||||
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
|
||||
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((__m128i *)chars), ascii0);
|
||||
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
|
||||
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
|
||||
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
|
||||
const __m128i t3 = _mm_packus_epi32(t2, t2);
|
||||
|
@ -149,7 +149,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
|||
//
|
||||
static never_inline bool
|
||||
parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
||||
ParsedJson &pj, const u32 depth, const u32 offset,
|
||||
ParsedJson &pj, UNUSED const u32 depth, const u32 offset,
|
||||
UNUSED bool found_zero, bool found_minus) {
|
||||
const char *p = (const char *)(buf + offset);
|
||||
|
||||
|
@ -193,7 +193,6 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
|||
}
|
||||
exponent = firstafterperiod - p;
|
||||
}
|
||||
int64_t expnumber = 0; // exponential part
|
||||
if (('e' == *p) || ('E' == *p)) {
|
||||
++p;
|
||||
bool negexp = false;
|
||||
|
@ -210,7 +209,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
|||
return false;
|
||||
}
|
||||
unsigned char digit = *p - '0';
|
||||
expnumber = digit;
|
||||
int64_t expnumber = digit; // exponential part
|
||||
p++;
|
||||
if (is_integer(*p)) {
|
||||
digit = *p - '0';
|
||||
|
@ -270,7 +269,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
|
|||
//
|
||||
static never_inline bool parse_large_integer(const u8 *const buf,
|
||||
UNUSED size_t len, ParsedJson &pj,
|
||||
const u32 depth, const u32 offset,
|
||||
UNUSED const u32 depth, const u32 offset,
|
||||
UNUSED bool found_zero,
|
||||
bool found_minus) {
|
||||
const char *p = (const char *)(buf + offset);
|
||||
|
@ -340,10 +339,12 @@ static never_inline bool parse_large_integer(const u8 *const buf,
|
|||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// parse the number at buf + offset
|
||||
// define JSON_TEST_NUMBERS for unit testing
|
||||
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
|
||||
ParsedJson &pj, const u32 depth,
|
||||
ParsedJson &pj, UNUSED const u32 depth,
|
||||
const u32 offset, UNUSED bool found_zero,
|
||||
bool found_minus) {
|
||||
const char *p = (const char *)(buf + offset);
|
||||
|
|
|
@ -105,14 +105,14 @@ public:
|
|||
|
||||
|
||||
void write_tape_s64(s64 i) {
|
||||
*((s64 *)current_number_buf_loc) = i;
|
||||
current_number_buf_loc += 8;
|
||||
*((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy
|
||||
current_number_buf_loc += sizeof(s64);
|
||||
write_tape(current_number_buf_loc - number_buf, 'l');
|
||||
}
|
||||
|
||||
void write_tape_double(double d) {
|
||||
*((double *)current_number_buf_loc) = d;
|
||||
current_number_buf_loc += 8;
|
||||
*((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy
|
||||
current_number_buf_loc += sizeof(double);
|
||||
write_tape(current_number_buf_loc - number_buf, 'd');
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ public:
|
|||
u32 scope_header; // the start of our current scope that contains our current location
|
||||
u32 location; // our current location on a tape
|
||||
|
||||
ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
|
||||
explicit ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
|
||||
// OK with default copy constructor as the way to clone the POD structure
|
||||
|
||||
// some placeholder navigation. Will convert over to a more native C++-ish way of doing
|
||||
|
@ -167,7 +167,7 @@ public:
|
|||
|
||||
|
||||
#ifdef DEBUG
|
||||
inline void dump256(m256 d, std::string msg) {
|
||||
inline void dump256(m256 d, const std::string msg) {
|
||||
for (u32 i = 0; i < 32; i++) {
|
||||
std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i);
|
||||
if (!((i + 1) % 8))
|
||||
|
@ -181,14 +181,14 @@ inline void dump256(m256 d, std::string msg) {
|
|||
}
|
||||
|
||||
// dump bits low to high
|
||||
inline void dumpbits(u64 v, std::string msg) {
|
||||
inline void dumpbits(u64 v, const std::string msg) {
|
||||
for (u32 i = 0; i < 64; i++) {
|
||||
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
|
||||
}
|
||||
std::cout << " " << msg << "\n";
|
||||
}
|
||||
|
||||
inline void dumpbits32(u32 v, std::string msg) {
|
||||
inline void dumpbits32(u32 v, const std::string msg) {
|
||||
for (u32 i = 0; i < 32; i++) {
|
||||
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
|
||||
}
|
||||
|
@ -201,14 +201,14 @@ inline void dumpbits32(u32 v, std::string msg) {
|
|||
#endif
|
||||
|
||||
// dump bits low to high
|
||||
inline void dumpbits_always(u64 v, std::string msg) {
|
||||
inline void dumpbits_always(u64 v, const std::string msg) {
|
||||
for (u32 i = 0; i < 64; i++) {
|
||||
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
|
||||
}
|
||||
std::cout << " " << msg << "\n";
|
||||
}
|
||||
|
||||
inline void dumpbits32_always(u32 v, std::string msg) {
|
||||
inline void dumpbits32_always(u32 v, const std::string msg) {
|
||||
for (u32 i = 0; i < 32; i++) {
|
||||
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
|
|||
}
|
||||
|
||||
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
|
||||
ParsedJson &pj, u32 depth, u32 offset) {
|
||||
ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
|
||||
using namespace std;
|
||||
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
|
||||
u8 *dst = pj.current_string_buf_loc;
|
||||
|
|
|
@ -137,7 +137,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
|
||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior
|
||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 9 a b c d
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
||||
|
@ -220,7 +220,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
|
||||
// prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore
|
||||
|
||||
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
||||
__m256i mask_70 =
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
#include <cstring>
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "jsonparser/jsoncharutils.h"
|
||||
#include "jsonparser/numberparsing.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "jsonparser/stringparsing.h"
|
||||
|
||||
#include <iostream>
|
||||
|
@ -20,390 +20,430 @@
|
|||
#define PATH_SEP '/'
|
||||
|
||||
#if defined(DEBUG) && !defined(DEBUG_PRINTF)
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \
|
||||
strrchr(__FILE__, PATH_SEP) + 1, \
|
||||
__func__, __LINE__, ## __VA_ARGS__)
|
||||
#include <string.h>
|
||||
#define DEBUG_PRINTF(format, ...) \
|
||||
printf("%s:%s:%d:" format, strrchr(__FILE__, PATH_SEP) + 1, __func__, \
|
||||
__LINE__, ##__VA_ARGS__)
|
||||
#elif !defined(DEBUG_PRINTF)
|
||||
#define DEBUG_PRINTF(format, ...) do { } while(0)
|
||||
#define DEBUG_PRINTF(format, ...) \
|
||||
do { \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_true_atom(const u8 * loc) {
|
||||
u64 tv = *(const u64 *)"true ";
|
||||
u64 mask4 = 0x00000000ffffffff;
|
||||
u32 error = 0;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error = (locval & mask4) ^ tv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
return error == 0;
|
||||
really_inline bool is_valid_true_atom(const u8 *loc) {
|
||||
u64 tv = *(const u64 *)"true ";
|
||||
u64 mask4 = 0x00000000ffffffff;
|
||||
u32 error = 0;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error = (locval & mask4) ^ tv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
return error == 0;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_false_atom(const u8 * loc) {
|
||||
u64 fv = *(const u64 *)"false ";
|
||||
u64 mask5 = 0x000000ffffffffff;
|
||||
u32 error = 0;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error = (locval & mask5) ^ fv;
|
||||
error |= is_not_structural_or_whitespace(loc[5]);
|
||||
return error == 0;
|
||||
really_inline bool is_valid_false_atom(const u8 *loc) {
|
||||
u64 fv = *(const u64 *)"false ";
|
||||
u64 mask5 = 0x000000ffffffffff;
|
||||
u32 error = 0;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error = (locval & mask5) ^ fv;
|
||||
error |= is_not_structural_or_whitespace(loc[5]);
|
||||
return error == 0;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_null_atom(const u8 * loc) {
|
||||
u64 nv = *(const u64 *)"null ";
|
||||
u64 mask4 = 0x00000000ffffffff;
|
||||
u32 error = 0;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error = (locval & mask4) ^ nv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
return error == 0;
|
||||
really_inline bool is_valid_null_atom(const u8 *loc) {
|
||||
u64 nv = *(const u64 *)"null ";
|
||||
u64 mask4 = 0x00000000ffffffff;
|
||||
u32 error = 0;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error = (locval & mask4) ^ nv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
return error == 0;
|
||||
}
|
||||
|
||||
// Implemented using Labels as Values which works in GCC and CLANG (and maybe also in Intel's compiler),
|
||||
// but won't work in MSVC. This would need to be reimplemented differently
|
||||
// if one wants to be standard compliant.
|
||||
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
|
||||
// also in Intel's compiler), but won't work in MSVC. This would need to be
|
||||
// reimplemented differently if one wants to be standard compliant.
|
||||
WARN_UNUSED
|
||||
bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
|
||||
u32 i = 0; // index of the structural character (0,1,2,3...)
|
||||
u32 idx; // location of the structural character in the input (buf)
|
||||
u8 c; // used to track the (structural) character we are looking at, updated by UPDATE_CHAR macro
|
||||
u32 depth = 0;//START_DEPTH; // an arbitrary starting depth
|
||||
//void * ret_address[MAX_DEPTH]; // used to store "labels as value" (non-standard compiler extension)
|
||||
|
||||
// a call site is the start of either an object or an array ('[' or '{')
|
||||
// this is the location of the previous call site
|
||||
// (in the tape, at the given depth);
|
||||
// we only need one.
|
||||
|
||||
// We should also track the tape address of our containing
|
||||
// scope for two reasons. First, we will need to put an
|
||||
// up pointer there at each call site so we can navigate
|
||||
// upwards. Second, when we encounter the end of the scope
|
||||
// we can put the current offset into a record for the
|
||||
// scope so we know where it is
|
||||
|
||||
//u32 containing_scope_offset[MAX_DEPTH];
|
||||
|
||||
pj.init();
|
||||
|
||||
// add a sentinel to the end to avoid premature exit
|
||||
// need to be able to find the \0 at the 'padded length' end of the buffer
|
||||
// FIXME: TERRIFYING!
|
||||
//size_t j;
|
||||
//for (j = len; buf[j] != 0; j++)
|
||||
// ;
|
||||
//pj.structural_indexes[pj.n_structural_indexes++] = j;
|
||||
|
||||
u32 i = 0; // index of the structural character (0,1,2,3...)
|
||||
u32 idx; // location of the structural character in the input (buf)
|
||||
u8 c; // used to track the (structural) character we are looking at, updated
|
||||
// by UPDATE_CHAR macro
|
||||
u32 depth = 0; // could have an arbitrary starting depth
|
||||
pj.init();
|
||||
// this macro reads the next structural character, updating idx, i and c.
|
||||
#define UPDATE_CHAR() { idx = pj.structural_indexes[i++]; c = buf[idx]; DEBUG_PRINTF("Got %c at %d (%d offset)\n", c, idx, i-1);}
|
||||
#define UPDATE_CHAR() \
|
||||
{ \
|
||||
idx = pj.structural_indexes[i++]; \
|
||||
c = buf[idx]; \
|
||||
DEBUG_PRINTF("Got %c at %d (%d offset) (depth %d)\n", c, idx, i - 1, \
|
||||
depth); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////// START STATE /////////////////////////////
|
||||
printf("at start\n");
|
||||
DEBUG_PRINTF("at start\n");
|
||||
pj.ret_address[depth] = &&start_continue;
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
|
||||
depth++;// everything starts at depth = 1, depth = 0 is just for the root
|
||||
if(depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
printf("got char %c \n",c);
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '{': goto object_begin;
|
||||
case '[': goto array_begin;
|
||||
////////////////////////////// START STATE /////////////////////////////
|
||||
DEBUG_PRINTF("at start\n");
|
||||
pj.ret_address[depth] = &&start_continue;
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
|
||||
depth++; // everything starts at depth = 1, depth = 0 is just for the root
|
||||
if (depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '{':
|
||||
goto object_begin;
|
||||
case '[':
|
||||
goto array_begin;
|
||||
#define SIMDJSON_ALLOWANYTHINGINROOT
|
||||
// A JSON text is a serialized value. Note that certain previous
|
||||
// specifications of JSON constrained a JSON text to be an object or an
|
||||
// array. Implementations that generate only objects or arrays where a
|
||||
// JSON text is called for will be interoperable in the sense that all
|
||||
// implementations will accept these as conforming JSON texts.
|
||||
// https://tools.ietf.org/html/rfc8259
|
||||
// A JSON text is a serialized value. Note that certain previous
|
||||
// specifications of JSON constrained a JSON text to be an object or an
|
||||
// array. Implementations that generate only objects or arrays where a
|
||||
// JSON text is called for will be interoperable in the sense that all
|
||||
// implementations will accept these as conforming JSON texts.
|
||||
// https://tools.ietf.org/html/rfc8259
|
||||
#ifdef SIMDJSON_ALLOWANYTHINGINROOT
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto start_continue;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
goto start_continue;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
goto start_continue;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
goto start_continue;
|
||||
case '0': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
goto start_continue;
|
||||
}
|
||||
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||
goto fail;
|
||||
}
|
||||
goto start_continue;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||
goto fail;
|
||||
}
|
||||
goto start_continue;
|
||||
}
|
||||
#endif // ALLOWANYTHINGINROOT
|
||||
default: goto fail;
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case '0': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif // ALLOWANYTHINGINROOT
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
#ifdef SIMDJSON_ALLOWANYTHINGINROOT
|
||||
depth--; // for fall-through cases (e.g., documents containing just a string)
|
||||
#endif // ALLOWANYTHINGINROOT
|
||||
|
||||
start_continue:
|
||||
DEBUG_PRINTF("in start_object_close\n");
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case 0: goto succeed;
|
||||
default: goto fail;
|
||||
}
|
||||
DEBUG_PRINTF("in start_object_close\n");
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case 0:
|
||||
goto succeed;
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
////////////////////////////// OBJECT STATES /////////////////////////////
|
||||
////////////////////////////// OBJECT STATES /////////////////////////////
|
||||
|
||||
object_begin:
|
||||
printf("in object_begin %c \n",c);
|
||||
DEBUG_PRINTF("in object_begin\n");
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c);
|
||||
depth ++;
|
||||
if(depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}': goto scope_end;
|
||||
default: goto fail;
|
||||
DEBUG_PRINTF("in object_begin\n");
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c);
|
||||
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}':
|
||||
goto scope_end; // could also go to object_continue
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
object_key_state:
|
||||
printf("in object_key_state %c \n",c);
|
||||
DEBUG_PRINTF("in object_key_state\n");
|
||||
UPDATE_CHAR();
|
||||
if (c != ':') {
|
||||
goto fail;
|
||||
}
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case '0': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '{': {
|
||||
// we have not yet encountered } so we need to come back for it
|
||||
pj.ret_address[depth] = &&object_continue;
|
||||
// we found an object inside an object, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("in object_key_state\n");
|
||||
UPDATE_CHAR();
|
||||
if (c != ':') {
|
||||
goto fail;
|
||||
}
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't': if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f': if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n': if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case '0': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '{': {
|
||||
pj.ret_address[depth] = &&object_continue;
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
pj.ret_address[depth] = &&object_continue;
|
||||
goto array_begin;
|
||||
}
|
||||
default: goto fail;
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
// we have not yet encountered } so we need to come back for it
|
||||
pj.ret_address[depth] = &&object_continue;
|
||||
// we found an array inside an object, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
goto array_begin;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
object_continue:
|
||||
printf("in object_continue %c \n",c);
|
||||
|
||||
DEBUG_PRINTF("in object_continue\n");
|
||||
DEBUG_PRINTF("in object_continue\n");
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case ',':
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case ',':
|
||||
UPDATE_CHAR();
|
||||
if (c != '"') {
|
||||
goto fail;
|
||||
} else {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}': goto scope_end;
|
||||
default: goto fail;
|
||||
if (c != '"') {
|
||||
goto fail;
|
||||
} else {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto object_key_state;
|
||||
}
|
||||
case '}':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
////////////////////////////// COMMON STATE /////////////////////////////
|
||||
////////////////////////////// COMMON STATE /////////////////////////////
|
||||
|
||||
scope_end:
|
||||
// write our tape location to the header scope
|
||||
depth--;
|
||||
pj.write_tape(pj.containing_scope_offset[depth], c);
|
||||
pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
|
||||
// goto saved_state
|
||||
goto *pj.ret_address[depth];
|
||||
|
||||
|
||||
////////////////////////////// ARRAY STATES /////////////////////////////
|
||||
// write our tape location to the header scope
|
||||
depth--;
|
||||
pj.write_tape(pj.containing_scope_offset[depth], c);
|
||||
pj.annotate_previousloc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
// goto saved_state
|
||||
goto *pj.ret_address[depth];
|
||||
|
||||
////////////////////////////// ARRAY STATES /////////////////////////////
|
||||
array_begin:
|
||||
printf("in array_begin %c \n",c);
|
||||
|
||||
DEBUG_PRINTF("in array_begin\n");
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c);
|
||||
depth ++;
|
||||
if(depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
UPDATE_CHAR();
|
||||
if (c == ']') {
|
||||
goto scope_end;
|
||||
}
|
||||
DEBUG_PRINTF("in array_begin\n");
|
||||
pj.containing_scope_offset[depth] = pj.get_current_loc();
|
||||
pj.write_tape(0, c);
|
||||
UPDATE_CHAR();
|
||||
if (c == ']') {
|
||||
goto scope_end; // could also go to array_continue
|
||||
}
|
||||
|
||||
main_array_switch:
|
||||
// we call update char on all paths in, so we can peek at c on the
|
||||
// on paths that can accept a close square brace (post-, and at start)
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
goto array_continue;
|
||||
}
|
||||
case 't': if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f': if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n': if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
|
||||
case '0': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '{': {
|
||||
pj.ret_address[depth] = &&array_continue;
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
pj.ret_address[depth] = &&array_continue;
|
||||
goto array_begin;
|
||||
}
|
||||
default: goto fail;
|
||||
// we call update char on all paths in, so we can peek at c on the
|
||||
// on paths that can accept a close square brace (post-, and at start)
|
||||
switch (c) {
|
||||
case '"': {
|
||||
if (!parse_string(buf, len, pj, depth, idx)) {
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
if (!is_valid_true_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'f':
|
||||
if (!is_valid_false_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break;
|
||||
case 'n':
|
||||
if (!is_valid_null_atom(buf + idx)) {
|
||||
goto fail;
|
||||
}
|
||||
pj.write_tape(0, c);
|
||||
break; // goto array_continue;
|
||||
|
||||
case '0': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break; // goto array_continue;
|
||||
}
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
|
||||
goto fail;
|
||||
}
|
||||
break; // goto array_continue;
|
||||
}
|
||||
case '-': {
|
||||
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
|
||||
goto fail;
|
||||
}
|
||||
break; // goto array_continue;
|
||||
}
|
||||
case '{': {
|
||||
// we have not yet encountered ] so we need to come back for it
|
||||
pj.ret_address[depth] = &&array_continue;
|
||||
|
||||
// we found an object inside an array, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
goto object_begin;
|
||||
}
|
||||
case '[': {
|
||||
// we have not yet encountered ] so we need to come back for it
|
||||
pj.ret_address[depth] = &&array_continue;
|
||||
|
||||
// we found an array inside an array, so we need to increment the depth
|
||||
depth++;
|
||||
if (depth > pj.depthcapacity) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
goto array_begin;
|
||||
}
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
array_continue:
|
||||
printf("in array_begin %c \n",c);
|
||||
|
||||
DEBUG_PRINTF("in array_continue\n");
|
||||
DEBUG_PRINTF("in array_continue\n");
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case ',':
|
||||
UPDATE_CHAR();
|
||||
switch (c) {
|
||||
case ',': UPDATE_CHAR(); goto main_array_switch;
|
||||
case ']': goto scope_end;
|
||||
default: goto fail;
|
||||
}
|
||||
goto main_array_switch;
|
||||
case ']':
|
||||
goto scope_end;
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
////////////////////////////// FINAL STATES /////////////////////////////
|
||||
////////////////////////////// FINAL STATES /////////////////////////////
|
||||
|
||||
succeed:
|
||||
DEBUG_PRINTF("in succeed\n");
|
||||
// we annotate the root node
|
||||
depth--;
|
||||
// next line allows us to go back to the start
|
||||
pj.write_tape(pj.containing_scope_offset[depth], 'r');// r is root
|
||||
// next line tells the root node how to go to the end
|
||||
pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc());
|
||||
DEBUG_PRINTF("in succeed, depth = %d \n", depth);
|
||||
// we annotate the root node
|
||||
// depth--;
|
||||
// next line allows us to go back to the start
|
||||
pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
|
||||
// next line tells the root node how to go to the end
|
||||
pj.annotate_previousloc(pj.containing_scope_offset[depth],
|
||||
pj.get_current_loc());
|
||||
|
||||
#ifdef DEBUG
|
||||
pj.dump_tapes();
|
||||
pj.dump_tapes();
|
||||
#endif
|
||||
return true;
|
||||
return true;
|
||||
|
||||
fail:
|
||||
DEBUG_PRINTF("in fail\n");
|
||||
DEBUG_PRINTF("in fail\n");
|
||||
#ifdef DEBUG
|
||||
pj.dump_tapes();
|
||||
pj.dump_tapes();
|
||||
#endif
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include "jsonparser/jsonparser.h"
|
||||
|
||||
|
@ -30,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
|
|||
bool fastjson_parse(const char *input) {
|
||||
fastjson::Token token;
|
||||
fastjson::dom::Chunk chunk;
|
||||
std::string error_message;
|
||||
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
|
||||
}
|
||||
// end of fastjson stuff
|
||||
|
@ -41,17 +41,30 @@ using namespace rapidjson;
|
|||
using namespace std;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
bool verbose = false;
|
||||
int c;
|
||||
while ((c = getopt (argc, argv, "v")) != -1)
|
||||
switch (c)
|
||||
{
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
if (optind >= argc) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||
exit(1);
|
||||
}
|
||||
bool verbose = false;
|
||||
if (argc > 2) {
|
||||
if (strcmp(argv[1], "-v"))
|
||||
verbose = true;
|
||||
const char * filename = argv[optind];
|
||||
std::pair<u8 *, size_t> p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
} catch (const std::exception& e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
|
||||
if (verbose) {
|
||||
std::cout << "Input has ";
|
||||
if (p.second > 1024 * 1024)
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "jsonparser/jsonparser.h"
|
||||
|
||||
|
@ -41,6 +42,8 @@ bool validate(const char *dirname) {
|
|||
printf("nothing in dir %s \n", dirname);
|
||||
return false;
|
||||
}
|
||||
bool * isfileasexpected = new bool[c];
|
||||
for(int i = 0; i < c; i++) isfileasexpected[i] = true;
|
||||
size_t howmany = 0;
|
||||
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
||||
for (int i = 0; i < c; i++) {
|
||||
|
@ -56,7 +59,13 @@ bool validate(const char *dirname) {
|
|||
} else {
|
||||
strcpy(fullpath + dirlen, name);
|
||||
}
|
||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
||||
std::pair<u8 *, size_t> p;
|
||||
try {
|
||||
p = get_corpus(fullpath);
|
||||
} catch (const std::exception& e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||
if(pj_ptr == NULL) {
|
||||
std::cerr<< "can't allocate memory"<<std::endl;
|
||||
|
@ -70,11 +79,13 @@ bool validate(const char *dirname) {
|
|||
howmany--;
|
||||
} else if (startsWith("pass", name)) {
|
||||
if (!isok) {
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should pass but it fails.\n", name);
|
||||
everythingfine = false;
|
||||
}
|
||||
} else if (startsWith("fail", name)) {
|
||||
if (isok) {
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should fail but it passes.\n", name);
|
||||
everythingfine = false;
|
||||
}
|
||||
|
@ -87,11 +98,20 @@ bool validate(const char *dirname) {
|
|||
deallocate_ParsedJson(pj_ptr);
|
||||
}
|
||||
}
|
||||
printf("%zu files checked.\n", howmany);
|
||||
if(everythingfine) {
|
||||
printf("All ok!\n");
|
||||
} else {
|
||||
printf("There were problems! Consider reviewing the following files:\n");
|
||||
for(int i = 0; i < c; i++) {
|
||||
if(!isfileasexpected[i]) printf("%s \n", entry_list[i]->d_name);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < c; ++i)
|
||||
free(entry_list[i]);
|
||||
free(entry_list);
|
||||
printf("%zu files checked.\n", howmany);
|
||||
if(everythingfine) printf("All ok!\n");
|
||||
delete[] isfileasexpected;
|
||||
|
||||
return everythingfine;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
|
|||
size_t lenpre = strlen(pre), lenstr = strlen(str);
|
||||
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
|
||||
}
|
||||
bool is_in_bad_list(char *buf) {
|
||||
bool is_in_bad_list(const char *buf) {
|
||||
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
|
||||
if (startsWith(really_bad[i], buf))
|
||||
return true;
|
||||
|
@ -38,9 +38,9 @@ bool is_in_bad_list(char *buf) {
|
|||
inline void foundInvalidNumber(const u8 *buf) {
|
||||
invalid_count++;
|
||||
char *endptr;
|
||||
double expected = strtod((char *)buf, &endptr);
|
||||
if (endptr != (char *)buf) {
|
||||
if (!is_in_bad_list((char *)buf)) {
|
||||
double expected = strtod((const char *)buf, &endptr);
|
||||
if (endptr != (const char *)buf) {
|
||||
if (!is_in_bad_list((const char *)buf)) {
|
||||
printf(
|
||||
"Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
|
||||
buf, expected);
|
||||
|
@ -53,8 +53,8 @@ inline void foundInvalidNumber(const u8 *buf) {
|
|||
inline void foundInteger(int64_t result, const u8 *buf) {
|
||||
int_count++;
|
||||
char *endptr;
|
||||
long long expected = strtoll((char *)buf, &endptr, 10);
|
||||
if ((endptr == (char *)buf) || (expected != result)) {
|
||||
long long expected = strtoll((const char *)buf, &endptr, 10);
|
||||
if ((endptr == (const char *)buf) || (expected != result)) {
|
||||
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
|
||||
printf(" while parsing %s \n", fullpath);
|
||||
parse_error |= PARSE_ERROR;
|
||||
|
@ -64,8 +64,8 @@ inline void foundInteger(int64_t result, const u8 *buf) {
|
|||
inline void foundFloat(double result, const u8 *buf) {
|
||||
char *endptr;
|
||||
float_count++;
|
||||
double expected = strtod((char *)buf, &endptr);
|
||||
if (endptr == (char *)buf) {
|
||||
double expected = strtod((const char *)buf, &endptr);
|
||||
if (endptr == (const char *)buf) {
|
||||
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||
result, buf);
|
||||
printf(" while parsing %s \n", fullpath);
|
||||
|
@ -123,7 +123,13 @@ bool validate(const char *dirname) {
|
|||
} else {
|
||||
strcpy(fullpath + dirlen, name);
|
||||
}
|
||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
||||
std::pair<u8 *, size_t> p;
|
||||
try {
|
||||
p = get_corpus(fullpath);
|
||||
} catch (const std::exception& e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// terrible hack but just to get it working
|
||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||
if (pj_ptr == NULL) {
|
||||
|
|
|
@ -241,7 +241,7 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
|
|||
// we have a zero-length string
|
||||
if (parsed_begin != parsed_end) {
|
||||
printf("WARNING: We have a zero-length but gap is %zu \n",
|
||||
parsed_end - parsed_begin);
|
||||
(size_t)(parsed_end - parsed_begin));
|
||||
probable_bug = true;
|
||||
}
|
||||
empty_string++;
|
||||
|
@ -252,12 +252,12 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
|
|||
printf("WARNING: lengths on parsed strings disagree %zu %zu \n", thislen,
|
||||
len);
|
||||
printf("\nour parsed string : '%*s'\n\n", (int)thislen,
|
||||
(char *)parsed_begin);
|
||||
print_hex((char *)parsed_begin, thislen);
|
||||
(const char *)parsed_begin);
|
||||
print_hex((const char *)parsed_begin, thislen);
|
||||
printf("\n");
|
||||
|
||||
printf("reference parsing :'%*s'\n\n", (int)len, bigbuffer);
|
||||
print_hex((char *)bigbuffer, len);
|
||||
print_hex((const char *)bigbuffer, len);
|
||||
printf("\n");
|
||||
|
||||
probable_bug = true;
|
||||
|
@ -267,15 +267,15 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
|
|||
printf("Lengths %zu %zu \n", thislen, len);
|
||||
|
||||
printf("\nour parsed string : '%*s'\n", (int)thislen,
|
||||
(char *)parsed_begin);
|
||||
print_hex((char *)parsed_begin, thislen);
|
||||
(const char *)parsed_begin);
|
||||
print_hex((const char *)parsed_begin, thislen);
|
||||
printf("\n");
|
||||
|
||||
printf("reference parsing :'%*s'\n", (int)len, bigbuffer);
|
||||
print_hex((char *)bigbuffer, len);
|
||||
print_hex((const char *)bigbuffer, len);
|
||||
printf("\n");
|
||||
|
||||
print_cmp_hex((char *)parsed_begin, bigbuffer, thislen);
|
||||
print_cmp_hex((const char *)parsed_begin, bigbuffer, thislen);
|
||||
|
||||
probable_bug = true;
|
||||
}
|
||||
|
@ -325,8 +325,13 @@ bool validate(const char *dirname) {
|
|||
} else {
|
||||
strcpy(fullpath + dirlen, name);
|
||||
}
|
||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
||||
// terrible hack but just to get it working
|
||||
std::pair<u8 *, size_t> p;
|
||||
try {
|
||||
p = get_corpus(fullpath);
|
||||
} catch (const std::exception& e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
|
||||
if (pj_ptr == NULL) {
|
||||
std::cerr << "can't allocate memory" << std::endl;
|
||||
|
|
Loading…
Reference in New Issue