Lots and lots of cleaning.

This commit is contained in:
Daniel Lemire 2018-11-27 14:37:59 -05:00
parent 5fae7b2100
commit a43b0772e1
15 changed files with 521 additions and 465 deletions

View File

@ -21,7 +21,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
std::vector<uint64_t> ids; std::vector<uint64_t> ids;
public: public:
LinuxEvents(std::vector<int> config_vec) : fd(0) { explicit LinuxEvents(std::vector<int> config_vec) : fd(0) {
memset(&attribs, 0, sizeof(attribs)); memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE; attribs.type = TYPE;
attribs.size = sizeof(attribs); attribs.size = sizeof(attribs);

View File

@ -1,3 +1,4 @@
#include <unistd.h>
#include <iostream> #include <iostream>
#include "benchmark.h" #include "benchmark.h"
@ -13,6 +14,7 @@
#include "rapidjson/writer.h" #include "rapidjson/writer.h"
#include "sajson.h" #include "sajson.h"
using namespace rapidjson; using namespace rapidjson;
using namespace std; using namespace std;
@ -43,17 +45,29 @@ std::string rapidstringme(char *json) {
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
if (argc < 2) { int c;
cerr << "Usage: " << argv[0] << " <jsonfile>\n"; bool verbose = false;
cerr << "Or " << argv[0] << " -v <jsonfile>\n"; while ((c = getopt (argc, argv, "v")) != -1)
switch (c)
{
case 'v':
verbose = true;
break;
default:
abort ();
}
if (optind >= argc) {
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1); exit(1);
} }
bool verbose = false; const char * filename = argv[optind];
if (argc > 2) { pair<u8 *, size_t> p;
if (strcmp(argv[1], "-v")) try {
verbose = true; p = get_corpus(filename);
} catch (const std::exception& e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
} }
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
if (verbose) { if (verbose) {
std::cout << "Input has "; std::cout << "Input has ";
if (p.second > 1024 * 1024) if (p.second > 1024 * 1024)

View File

@ -31,79 +31,14 @@
#include "jsonparser/stage34_unified.h" #include "jsonparser/stage34_unified.h"
using namespace std; using namespace std;
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
namespace Color {
enum Code {
FG_DEFAULT = 39,
FG_BLACK = 30,
FG_RED = 31,
FG_GREEN = 32,
FG_YELLOW = 33,
FG_BLUE = 34,
FG_MAGENTA = 35,
FG_CYAN = 36,
FG_LIGHT_GRAY = 37,
FG_DARK_GRAY = 90,
FG_LIGHT_RED = 91,
FG_LIGHT_GREEN = 92,
FG_LIGHT_YELLOW = 93,
FG_LIGHT_BLUE = 94,
FG_LIGHT_MAGENTA = 95,
FG_LIGHT_CYAN = 96,
FG_WHITE = 97,
BG_RED = 41,
BG_GREEN = 42,
BG_BLUE = 44,
BG_DEFAULT = 49
};
class Modifier {
Code code;
public:
Modifier(Code pCode) : code(pCode) {}
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
return os << "\033[" << mod.code << "m";
}
};
} // namespace Color
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
Color::Modifier greenfg(Color::FG_GREEN);
Color::Modifier yellowfg(Color::FG_YELLOW);
Color::Modifier deffg(Color::FG_DEFAULT);
size_t i = 0;
// skip initial fluff
while ((i + 1 < pj.n_structural_indexes) &&
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
i++;
}
for (; i < pj.n_structural_indexes; i++) {
u32 idx = pj.structural_indexes[i];
u8 c = buf[idx];
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
std::cout << greenfg << buf[idx] << deffg;
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
std::cout << greenfg << buf[idx] << deffg;
} else {
std::cout << yellowfg << buf[idx] << deffg;
}
if (i + 1 < pj.n_structural_indexes) {
u32 nextidx = pj.structural_indexes[i + 1];
for (u32 pos = idx + 1; pos < nextidx; pos++) {
std::cout << buf[pos];
}
}
}
std::cout << std::endl;
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
bool verbose = false; bool verbose = false;
bool dump = false; bool dump = false;
bool forceoneiteration = false;
int c; int c;
while ((c = getopt (argc, argv, "vd")) != -1) while ((c = getopt (argc, argv, "1vd")) != -1)
switch (c) switch (c)
{ {
case 'v': case 'v':
@ -112,6 +47,9 @@ int main(int argc, char *argv[]) {
case 'd': case 'd':
dump = true; dump = true;
break; break;
case '1':
forceoneiteration = true;
break;
default: default:
abort (); abort ();
} }
@ -124,7 +62,13 @@ int main(int argc, char *argv[]) {
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl; cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
} }
if(verbose) cout << "[verbose] loading " << filename << endl; if(verbose) cout << "[verbose] loading " << filename << endl;
pair<u8 *, size_t> p = get_corpus(filename); pair<u8 *, size_t> p;
try {
p = get_corpus(filename);
} catch (const std::exception& e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl; if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024); ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
ParsedJson &pj(*pj_ptr); ParsedJson &pj(*pj_ptr);
@ -133,7 +77,7 @@ int main(int argc, char *argv[]) {
#if defined(DEBUG) #if defined(DEBUG)
const u32 iterations = 1; const u32 iterations = 1;
#else #else
const u32 iterations = p.second < 1 * 1000 * 1000? 1000 : 10; const u32 iterations = forceoneiteration ? 1 : ( p.second < 1 * 1000 * 1000? 1000 : 10);
#endif #endif
vector<double> res; vector<double> res;
res.resize(iterations); res.resize(iterations);
@ -174,7 +118,7 @@ int main(int argc, char *argv[]) {
} }
unified.start(); unified.start();
#endif #endif
isok = flatten_indexes(p.second, pj); isok = isok && flatten_indexes(p.second, pj);
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
unified.end(results); unified.end(results);
cy2 += results[0]; cy2 += results[0];
@ -187,7 +131,7 @@ int main(int argc, char *argv[]) {
unified.start(); unified.start();
#endif #endif
isok = unified_machine(p.first, p.second, pj); isok = isok && unified_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
unified.end(results); unified.end(results);
cy3 += results[0]; cy3 += results[0];

View File

@ -31,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
bool fastjson_parse(const char *input) { bool fastjson_parse(const char *input) {
fastjson::Token token; fastjson::Token token;
fastjson::dom::Chunk chunk; fastjson::dom::Chunk chunk;
std::string error_message;
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL); return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
} }
// end of fastjson stuff // end of fastjson stuff
@ -62,7 +61,14 @@ int main(int argc, char *argv[]) {
if(optind + 1 < argc) { if(optind + 1 < argc) {
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl; cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
} }
pair<u8 *, size_t> p = get_corpus(filename); pair<u8 *, size_t> p;
try {
p = get_corpus(filename);
} catch (const std::exception& e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
if (verbose) { if (verbose) {
std::cout << "Input has "; std::cout << "Input has ";
if (p.second > 1024 * 1024) if (p.second > 1024 * 1024)

View File

@ -20,6 +20,13 @@ char * allocate_aligned_buffer(size_t length);
// first element of the pair is a string (null terminated) // first element of the pair is a string (null terminated)
// whereas the second element is the length. // whereas the second element is the length.
// caller is responsible to free (free std::pair<u8 *, size_t>.first) // caller is responsible to free (free std::pair<u8 *, size_t>.first)
//
// throws an exception if the file cannot be opened, use try/catch
// try {
// p = get_corpus(filename);
// } catch (const std::exception& e) {
// std::cout << "Could not load the file " << filename << std::endl;
// }
std::pair<u8 *, size_t> get_corpus(std::string filename); std::pair<u8 *, size_t> get_corpus(std::string filename);
#endif #endif

View File

@ -128,7 +128,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
const __m128i mul_1_10000 = const __m128i mul_1_10000 =
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((__m128i *)chars), ascii0); const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t3 = _mm_packus_epi32(t2, t2);
@ -149,7 +149,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
// //
static never_inline bool static never_inline bool
parse_highprecision_float(const u8 *const buf, UNUSED size_t len, parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
ParsedJson &pj, const u32 depth, const u32 offset, ParsedJson &pj, UNUSED const u32 depth, const u32 offset,
UNUSED bool found_zero, bool found_minus) { UNUSED bool found_zero, bool found_minus) {
const char *p = (const char *)(buf + offset); const char *p = (const char *)(buf + offset);
@ -193,7 +193,6 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
} }
exponent = firstafterperiod - p; exponent = firstafterperiod - p;
} }
int64_t expnumber = 0; // exponential part
if (('e' == *p) || ('E' == *p)) { if (('e' == *p) || ('E' == *p)) {
++p; ++p;
bool negexp = false; bool negexp = false;
@ -210,7 +209,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
return false; return false;
} }
unsigned char digit = *p - '0'; unsigned char digit = *p - '0';
expnumber = digit; int64_t expnumber = digit; // exponential part
p++; p++;
if (is_integer(*p)) { if (is_integer(*p)) {
digit = *p - '0'; digit = *p - '0';
@ -270,7 +269,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
// //
static never_inline bool parse_large_integer(const u8 *const buf, static never_inline bool parse_large_integer(const u8 *const buf,
UNUSED size_t len, ParsedJson &pj, UNUSED size_t len, ParsedJson &pj,
const u32 depth, const u32 offset, UNUSED const u32 depth, const u32 offset,
UNUSED bool found_zero, UNUSED bool found_zero,
bool found_minus) { bool found_minus) {
const char *p = (const char *)(buf + offset); const char *p = (const char *)(buf + offset);
@ -340,10 +339,12 @@ static never_inline bool parse_large_integer(const u8 *const buf,
#define unlikely(x) __builtin_expect(!!(x), 0) #define unlikely(x) __builtin_expect(!!(x), 0)
#endif #endif
// parse the number at buf + offset // parse the number at buf + offset
// define JSON_TEST_NUMBERS for unit testing // define JSON_TEST_NUMBERS for unit testing
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len, static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
ParsedJson &pj, const u32 depth, ParsedJson &pj, UNUSED const u32 depth,
const u32 offset, UNUSED bool found_zero, const u32 offset, UNUSED bool found_zero,
bool found_minus) { bool found_minus) {
const char *p = (const char *)(buf + offset); const char *p = (const char *)(buf + offset);

View File

@ -105,14 +105,14 @@ public:
void write_tape_s64(s64 i) { void write_tape_s64(s64 i) {
*((s64 *)current_number_buf_loc) = i; *((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy
current_number_buf_loc += 8; current_number_buf_loc += sizeof(s64);
write_tape(current_number_buf_loc - number_buf, 'l'); write_tape(current_number_buf_loc - number_buf, 'l');
} }
void write_tape_double(double d) { void write_tape_double(double d) {
*((double *)current_number_buf_loc) = d; *((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy
current_number_buf_loc += 8; current_number_buf_loc += sizeof(double);
write_tape(current_number_buf_loc - number_buf, 'd'); write_tape(current_number_buf_loc - number_buf, 'd');
} }
@ -137,7 +137,7 @@ public:
u32 scope_header; // the start of our current scope that contains our current location u32 scope_header; // the start of our current scope that contains our current location
u32 location; // our current location on a tape u32 location; // our current location on a tape
ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {} explicit ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
// OK with default copy constructor as the way to clone the POD structure // OK with default copy constructor as the way to clone the POD structure
// some placeholder navigation. Will convert over to a more native C++-ish way of doing // some placeholder navigation. Will convert over to a more native C++-ish way of doing
@ -167,7 +167,7 @@ public:
#ifdef DEBUG #ifdef DEBUG
inline void dump256(m256 d, std::string msg) { inline void dump256(m256 d, const std::string msg) {
for (u32 i = 0; i < 32; i++) { for (u32 i = 0; i < 32; i++) {
std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i); std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i);
if (!((i + 1) % 8)) if (!((i + 1) % 8))
@ -181,14 +181,14 @@ inline void dump256(m256 d, std::string msg) {
} }
// dump bits low to high // dump bits low to high
inline void dumpbits(u64 v, std::string msg) { inline void dumpbits(u64 v, const std::string msg) {
for (u32 i = 0; i < 64; i++) { for (u32 i = 0; i < 64; i++) {
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
} }
std::cout << " " << msg << "\n"; std::cout << " " << msg << "\n";
} }
inline void dumpbits32(u32 v, std::string msg) { inline void dumpbits32(u32 v, const std::string msg) {
for (u32 i = 0; i < 32; i++) { for (u32 i = 0; i < 32; i++) {
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
} }
@ -201,14 +201,14 @@ inline void dumpbits32(u32 v, std::string msg) {
#endif #endif
// dump bits low to high // dump bits low to high
inline void dumpbits_always(u64 v, std::string msg) { inline void dumpbits_always(u64 v, const std::string msg) {
for (u32 i = 0; i < 64; i++) { for (u32 i = 0; i < 64; i++) {
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
} }
std::cout << " " << msg << "\n"; std::cout << " " << msg << "\n";
} }
inline void dumpbits32_always(u32 v, std::string msg) { inline void dumpbits32_always(u32 v, const std::string msg) {
for (u32 i = 0; i < 32; i++) { for (u32 i = 0; i < 32; i++) {
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_"); std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
} }

View File

@ -58,7 +58,7 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
} }
really_inline bool parse_string(const u8 *buf, UNUSED size_t len, really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
ParsedJson &pj, u32 depth, u32 offset) { ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
using namespace std; using namespace std;
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a " const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
u8 *dst = pj.current_string_buf_loc; u8 *dst = pj.current_string_buf_loc;

View File

@ -137,7 +137,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior
const __m256i low_nibble_mask = _mm256_setr_epi8( const __m256i low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d // 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
@ -220,7 +220,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32 __m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
__m256i mask_70 = __m256i mask_70 =

View File

@ -10,9 +10,9 @@
#include <cstring> #include <cstring>
#include "jsonparser/common_defs.h" #include "jsonparser/common_defs.h"
#include "jsonparser/simdjson_internal.h"
#include "jsonparser/jsoncharutils.h" #include "jsonparser/jsoncharutils.h"
#include "jsonparser/numberparsing.h" #include "jsonparser/numberparsing.h"
#include "jsonparser/simdjson_internal.h"
#include "jsonparser/stringparsing.h" #include "jsonparser/stringparsing.h"
#include <iostream> #include <iostream>
@ -20,390 +20,430 @@
#define PATH_SEP '/' #define PATH_SEP '/'
#if defined(DEBUG) && !defined(DEBUG_PRINTF) #if defined(DEBUG) && !defined(DEBUG_PRINTF)
#include <string.h>
#include <stdio.h> #include <stdio.h>
#define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \ #include <string.h>
strrchr(__FILE__, PATH_SEP) + 1, \ #define DEBUG_PRINTF(format, ...) \
__func__, __LINE__, ## __VA_ARGS__) printf("%s:%s:%d:" format, strrchr(__FILE__, PATH_SEP) + 1, __func__, \
__LINE__, ##__VA_ARGS__)
#elif !defined(DEBUG_PRINTF) #elif !defined(DEBUG_PRINTF)
#define DEBUG_PRINTF(format, ...) do { } while(0) #define DEBUG_PRINTF(format, ...) \
do { \
} while (0)
#endif #endif
using namespace std; using namespace std;
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_true_atom(const u8 * loc) { really_inline bool is_valid_true_atom(const u8 *loc) {
u64 tv = *(const u64 *)"true "; u64 tv = *(const u64 *)"true ";
u64 mask4 = 0x00000000ffffffff; u64 mask4 = 0x00000000ffffffff;
u32 error = 0; u32 error = 0;
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
std::memcpy(&locval, loc, sizeof(u64)); std::memcpy(&locval, loc, sizeof(u64));
error = (locval & mask4) ^ tv; error = (locval & mask4) ^ tv;
error |= is_not_structural_or_whitespace(loc[4]); error |= is_not_structural_or_whitespace(loc[4]);
return error == 0; return error == 0;
} }
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_false_atom(const u8 * loc) { really_inline bool is_valid_false_atom(const u8 *loc) {
u64 fv = *(const u64 *)"false "; u64 fv = *(const u64 *)"false ";
u64 mask5 = 0x000000ffffffffff; u64 mask5 = 0x000000ffffffffff;
u32 error = 0; u32 error = 0;
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
std::memcpy(&locval, loc, sizeof(u64)); std::memcpy(&locval, loc, sizeof(u64));
error = (locval & mask5) ^ fv; error = (locval & mask5) ^ fv;
error |= is_not_structural_or_whitespace(loc[5]); error |= is_not_structural_or_whitespace(loc[5]);
return error == 0; return error == 0;
} }
WARN_UNUSED WARN_UNUSED
really_inline bool is_valid_null_atom(const u8 * loc) { really_inline bool is_valid_null_atom(const u8 *loc) {
u64 nv = *(const u64 *)"null "; u64 nv = *(const u64 *)"null ";
u64 mask4 = 0x00000000ffffffff; u64 mask4 = 0x00000000ffffffff;
u32 error = 0; u32 error = 0;
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) u64 locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
std::memcpy(&locval, loc, sizeof(u64)); std::memcpy(&locval, loc, sizeof(u64));
error = (locval & mask4) ^ nv; error = (locval & mask4) ^ nv;
error |= is_not_structural_or_whitespace(loc[4]); error |= is_not_structural_or_whitespace(loc[4]);
return error == 0; return error == 0;
} }
// Implemented using Labels as Values which works in GCC and CLANG (and maybe also in Intel's compiler), // Implemented using Labels as Values which works in GCC and CLANG (and maybe
// but won't work in MSVC. This would need to be reimplemented differently // also in Intel's compiler), but won't work in MSVC. This would need to be
// if one wants to be standard compliant. // reimplemented differently if one wants to be standard compliant.
WARN_UNUSED WARN_UNUSED
bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) { bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
u32 i = 0; // index of the structural character (0,1,2,3...) u32 i = 0; // index of the structural character (0,1,2,3...)
u32 idx; // location of the structural character in the input (buf) u32 idx; // location of the structural character in the input (buf)
u8 c; // used to track the (structural) character we are looking at, updated by UPDATE_CHAR macro u8 c; // used to track the (structural) character we are looking at, updated
u32 depth = 0;//START_DEPTH; // an arbitrary starting depth // by UPDATE_CHAR macro
//void * ret_address[MAX_DEPTH]; // used to store "labels as value" (non-standard compiler extension) u32 depth = 0; // could have an arbitrary starting depth
pj.init();
// a call site is the start of either an object or an array ('[' or '{')
// this is the location of the previous call site
// (in the tape, at the given depth);
// we only need one.
// We should also track the tape address of our containing
// scope for two reasons. First, we will need to put an
// up pointer there at each call site so we can navigate
// upwards. Second, when we encounter the end of the scope
// we can put the current offset into a record for the
// scope so we know where it is
//u32 containing_scope_offset[MAX_DEPTH];
pj.init();
// add a sentinel to the end to avoid premature exit
// need to be able to find the \0 at the 'padded length' end of the buffer
// FIXME: TERRIFYING!
//size_t j;
//for (j = len; buf[j] != 0; j++)
// ;
//pj.structural_indexes[pj.n_structural_indexes++] = j;
// this macro reads the next structural character, updating idx, i and c. // this macro reads the next structural character, updating idx, i and c.
#define UPDATE_CHAR() { idx = pj.structural_indexes[i++]; c = buf[idx]; DEBUG_PRINTF("Got %c at %d (%d offset)\n", c, idx, i-1);} #define UPDATE_CHAR() \
{ \
idx = pj.structural_indexes[i++]; \
c = buf[idx]; \
DEBUG_PRINTF("Got %c at %d (%d offset) (depth %d)\n", c, idx, i - 1, \
depth); \
}
////////////////////////////// START STATE /////////////////////////////
DEBUG_PRINTF("at start\n");
pj.ret_address[depth] = &&start_continue;
pj.containing_scope_offset[depth] = pj.get_current_loc();
////////////////////////////// START STATE ///////////////////////////// pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten
printf("at start\n"); depth++; // everything starts at depth = 1, depth = 0 is just for the root
DEBUG_PRINTF("at start\n"); if (depth > pj.depthcapacity) {
pj.ret_address[depth] = &&start_continue; goto fail;
pj.containing_scope_offset[depth] = pj.get_current_loc(); }
pj.write_tape(0, 'r'); // r for root, 0 is going to get overwritten UPDATE_CHAR();
depth++;// everything starts at depth = 1, depth = 0 is just for the root switch (c) {
if(depth > pj.depthcapacity) { case '{':
goto fail; goto object_begin;
} case '[':
printf("got char %c \n",c); goto array_begin;
UPDATE_CHAR();
switch (c) {
case '{': goto object_begin;
case '[': goto array_begin;
#define SIMDJSON_ALLOWANYTHINGINROOT #define SIMDJSON_ALLOWANYTHINGINROOT
// A JSON text is a serialized value. Note that certain previous // A JSON text is a serialized value. Note that certain previous
// specifications of JSON constrained a JSON text to be an object or an // specifications of JSON constrained a JSON text to be an object or an
// array. Implementations that generate only objects or arrays where a // array. Implementations that generate only objects or arrays where a
// JSON text is called for will be interoperable in the sense that all // JSON text is called for will be interoperable in the sense that all
// implementations will accept these as conforming JSON texts. // implementations will accept these as conforming JSON texts.
// https://tools.ietf.org/html/rfc8259 // https://tools.ietf.org/html/rfc8259
#ifdef SIMDJSON_ALLOWANYTHINGINROOT #ifdef SIMDJSON_ALLOWANYTHINGINROOT
case '"': { case '"': {
if (!parse_string(buf, len, pj, depth, idx)) { if (!parse_string(buf, len, pj, depth, idx)) {
goto fail; goto fail;
}
goto start_continue;
}
case 't':
if (!is_valid_true_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
goto start_continue;
case 'f':
if (!is_valid_false_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
goto start_continue;
case 'n':
if (!is_valid_null_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
goto start_continue;
case '0': {
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
goto fail;
}
goto start_continue;
}
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
goto fail;
}
goto start_continue;
}
case '-': {
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
goto fail;
}
goto start_continue;
}
#endif // ALLOWANYTHINGINROOT
default: goto fail;
} }
break;
}
case 't':
if (!is_valid_true_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'f':
if (!is_valid_false_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'n':
if (!is_valid_null_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case '0': {
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
goto fail;
}
break;
}
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
goto fail;
}
break;
}
case '-': {
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
goto fail;
}
break;
}
#endif // ALLOWANYTHINGINROOT
default:
goto fail;
}
#ifdef SIMDJSON_ALLOWANYTHINGINROOT
depth--; // for fall-through cases (e.g., documents containing just a string)
#endif // ALLOWANYTHINGINROOT
start_continue: start_continue:
DEBUG_PRINTF("in start_object_close\n"); DEBUG_PRINTF("in start_object_close\n");
UPDATE_CHAR(); UPDATE_CHAR();
switch (c) { switch (c) {
case 0: goto succeed; case 0:
default: goto fail; goto succeed;
} default:
goto fail;
}
////////////////////////////// OBJECT STATES ///////////////////////////// ////////////////////////////// OBJECT STATES /////////////////////////////
object_begin: object_begin:
printf("in object_begin %c \n",c); DEBUG_PRINTF("in object_begin\n");
DEBUG_PRINTF("in object_begin\n"); pj.containing_scope_offset[depth] = pj.get_current_loc();
pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c);
pj.write_tape(0, c);
depth ++; UPDATE_CHAR();
if(depth > pj.depthcapacity) { switch (c) {
goto fail; case '"': {
} if (!parse_string(buf, len, pj, depth, idx)) {
UPDATE_CHAR(); goto fail;
switch (c) {
case '"': {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
}
goto object_key_state;
}
case '}': goto scope_end;
default: goto fail;
} }
goto object_key_state;
}
case '}':
goto scope_end; // could also go to object_continue
default:
goto fail;
}
object_key_state: object_key_state:
printf("in object_key_state %c \n",c); DEBUG_PRINTF("in object_key_state\n");
UPDATE_CHAR();
if (c != ':') {
goto fail;
}
UPDATE_CHAR();
switch (c) {
case '"': {
if (!parse_string(buf, len, pj, depth, idx)) {
goto fail;
}
break;
}
case 't':
if (!is_valid_true_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'f':
if (!is_valid_false_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'n':
if (!is_valid_null_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case '0': {
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
goto fail;
}
break;
}
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
goto fail;
}
break;
}
case '-': {
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
goto fail;
}
break;
}
case '{': {
// we have not yet encountered } so we need to come back for it
pj.ret_address[depth] = &&object_continue;
// we found an object inside an object, so we need to increment the depth
depth++;
if (depth > pj.depthcapacity) {
goto fail;
}
DEBUG_PRINTF("in object_key_state\n"); goto object_begin;
UPDATE_CHAR(); }
if (c != ':') { case '[': {
goto fail; // we have not yet encountered } so we need to come back for it
} pj.ret_address[depth] = &&object_continue;
UPDATE_CHAR(); // we found an array inside an object, so we need to increment the depth
switch (c) { depth++;
case '"': { if (depth > pj.depthcapacity) {
if (!parse_string(buf, len, pj, depth, idx)) { goto fail;
goto fail;
}
break;
}
case 't': if (!is_valid_true_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'f': if (!is_valid_false_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'n': if (!is_valid_null_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case '0': {
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
goto fail;
}
break;
}
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
goto fail;
}
break;
}
case '-': {
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
goto fail;
}
break;
}
case '{': {
pj.ret_address[depth] = &&object_continue;
goto object_begin;
}
case '[': {
pj.ret_address[depth] = &&object_continue;
goto array_begin;
}
default: goto fail;
} }
goto array_begin;
}
default:
goto fail;
}
object_continue: object_continue:
printf("in object_continue %c \n",c); DEBUG_PRINTF("in object_continue\n");
UPDATE_CHAR();
DEBUG_PRINTF("in object_continue\n"); switch (c) {
case ',':
UPDATE_CHAR(); UPDATE_CHAR();
switch (c) { if (c != '"') {
case ',': goto fail;
UPDATE_CHAR(); } else {
if (c != '"') { if (!parse_string(buf, len, pj, depth, idx)) {
goto fail; goto fail;
} else { }
if (!parse_string(buf, len, pj, depth, idx)) { goto object_key_state;
goto fail;
}
goto object_key_state;
}
case '}': goto scope_end;
default: goto fail;
} }
case '}':
goto scope_end;
default:
goto fail;
}
////////////////////////////// COMMON STATE ///////////////////////////// ////////////////////////////// COMMON STATE /////////////////////////////
scope_end: scope_end:
// write our tape location to the header scope // write our tape location to the header scope
depth--; depth--;
pj.write_tape(pj.containing_scope_offset[depth], c); pj.write_tape(pj.containing_scope_offset[depth], c);
pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc()); pj.annotate_previousloc(pj.containing_scope_offset[depth],
// goto saved_state pj.get_current_loc());
goto *pj.ret_address[depth]; // goto saved_state
goto *pj.ret_address[depth];
////////////////////////////// ARRAY STATES /////////////////////////////
////////////////////////////// ARRAY STATES /////////////////////////////
array_begin: array_begin:
printf("in array_begin %c \n",c); DEBUG_PRINTF("in array_begin\n");
pj.containing_scope_offset[depth] = pj.get_current_loc();
DEBUG_PRINTF("in array_begin\n"); pj.write_tape(0, c);
pj.containing_scope_offset[depth] = pj.get_current_loc(); UPDATE_CHAR();
pj.write_tape(0, c); if (c == ']') {
depth ++; goto scope_end; // could also go to array_continue
if(depth > pj.depthcapacity) { }
goto fail;
}
UPDATE_CHAR();
if (c == ']') {
goto scope_end;
}
main_array_switch: main_array_switch:
// we call update char on all paths in, so we can peek at c on the // we call update char on all paths in, so we can peek at c on the
// on paths that can accept a close square brace (post-, and at start) // on paths that can accept a close square brace (post-, and at start)
switch (c) { switch (c) {
case '"': { case '"': {
if (!parse_string(buf, len, pj, depth, idx)) { if (!parse_string(buf, len, pj, depth, idx)) {
goto fail; goto fail;
}
goto array_continue;
}
case 't': if (!is_valid_true_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'f': if (!is_valid_false_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'n': if (!is_valid_null_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case '0': {
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
goto fail;
}
break;
}
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
goto fail;
}
break;
}
case '-': {
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
goto fail;
}
break;
}
case '{': {
pj.ret_address[depth] = &&array_continue;
goto object_begin;
}
case '[': {
pj.ret_address[depth] = &&array_continue;
goto array_begin;
}
default: goto fail;
} }
break;
}
case 't':
if (!is_valid_true_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'f':
if (!is_valid_false_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break;
case 'n':
if (!is_valid_null_atom(buf + idx)) {
goto fail;
}
pj.write_tape(0, c);
break; // goto array_continue;
case '0': {
if (!parse_number(buf, len, pj, depth, idx, true, false)) {
goto fail;
}
break; // goto array_continue;
}
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
if (!parse_number(buf, len, pj, depth, idx, false, false)) {
goto fail;
}
break; // goto array_continue;
}
case '-': {
if (!parse_number(buf, len, pj, depth, idx, false, true)) {
goto fail;
}
break; // goto array_continue;
}
case '{': {
// we have not yet encountered ] so we need to come back for it
pj.ret_address[depth] = &&array_continue;
// we found an object inside an array, so we need to increment the depth
depth++;
if (depth > pj.depthcapacity) {
goto fail;
}
goto object_begin;
}
case '[': {
// we have not yet encountered ] so we need to come back for it
pj.ret_address[depth] = &&array_continue;
// we found an array inside an array, so we need to increment the depth
depth++;
if (depth > pj.depthcapacity) {
goto fail;
}
goto array_begin;
}
default:
goto fail;
}
array_continue: array_continue:
printf("in array_begin %c \n",c); DEBUG_PRINTF("in array_continue\n");
UPDATE_CHAR();
DEBUG_PRINTF("in array_continue\n"); switch (c) {
case ',':
UPDATE_CHAR(); UPDATE_CHAR();
switch (c) { goto main_array_switch;
case ',': UPDATE_CHAR(); goto main_array_switch; case ']':
case ']': goto scope_end; goto scope_end;
default: goto fail; default:
} goto fail;
}
////////////////////////////// FINAL STATES ///////////////////////////// ////////////////////////////// FINAL STATES /////////////////////////////
succeed: succeed:
DEBUG_PRINTF("in succeed\n"); DEBUG_PRINTF("in succeed, depth = %d \n", depth);
// we annotate the root node // we annotate the root node
depth--; // depth--;
// next line allows us to go back to the start // next line allows us to go back to the start
pj.write_tape(pj.containing_scope_offset[depth], 'r');// r is root pj.write_tape(pj.containing_scope_offset[depth], 'r'); // r is root
// next line tells the root node how to go to the end // next line tells the root node how to go to the end
pj.annotate_previousloc(pj.containing_scope_offset[depth], pj.get_current_loc()); pj.annotate_previousloc(pj.containing_scope_offset[depth],
pj.get_current_loc());
#ifdef DEBUG #ifdef DEBUG
pj.dump_tapes(); pj.dump_tapes();
#endif #endif
return true; return true;
fail: fail:
DEBUG_PRINTF("in fail\n"); DEBUG_PRINTF("in fail\n");
#ifdef DEBUG #ifdef DEBUG
pj.dump_tapes(); pj.dump_tapes();
#endif #endif
return false; return false;
} }

View File

@ -1,3 +1,4 @@
#include <unistd.h>
#include "jsonparser/jsonparser.h" #include "jsonparser/jsonparser.h"
@ -30,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
bool fastjson_parse(const char *input) { bool fastjson_parse(const char *input) {
fastjson::Token token; fastjson::Token token;
fastjson::dom::Chunk chunk; fastjson::dom::Chunk chunk;
std::string error_message;
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL); return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
} }
// end of fastjson stuff // end of fastjson stuff
@ -41,17 +41,30 @@ using namespace rapidjson;
using namespace std; using namespace std;
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
if (argc < 2) { bool verbose = false;
int c;
while ((c = getopt (argc, argv, "v")) != -1)
switch (c)
{
case 'v':
verbose = true;
break;
default:
abort ();
}
if (optind >= argc) {
cerr << "Usage: " << argv[0] << " <jsonfile>\n"; cerr << "Usage: " << argv[0] << " <jsonfile>\n";
cerr << "Or " << argv[0] << " -v <jsonfile>\n"; cerr << "Or " << argv[0] << " -v <jsonfile>\n";
exit(1); exit(1);
} }
bool verbose = false; const char * filename = argv[optind];
if (argc > 2) { std::pair<u8 *, size_t> p;
if (strcmp(argv[1], "-v")) try {
verbose = true; p = get_corpus(filename);
} catch (const std::exception& e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
} }
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
if (verbose) { if (verbose) {
std::cout << "Input has "; std::cout << "Input has ";
if (p.second > 1024 * 1024) if (p.second > 1024 * 1024)

View File

@ -5,6 +5,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include "jsonparser/jsonparser.h" #include "jsonparser/jsonparser.h"
@ -41,6 +42,8 @@ bool validate(const char *dirname) {
printf("nothing in dir %s \n", dirname); printf("nothing in dir %s \n", dirname);
return false; return false;
} }
bool * isfileasexpected = new bool[c];
for(int i = 0; i < c; i++) isfileasexpected[i] = true;
size_t howmany = 0; size_t howmany = 0;
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/'); bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
for (int i = 0; i < c; i++) { for (int i = 0; i < c; i++) {
@ -56,7 +59,13 @@ bool validate(const char *dirname) {
} else { } else {
strcpy(fullpath + dirlen, name); strcpy(fullpath + dirlen, name);
} }
std::pair<u8 *, size_t> p = get_corpus(fullpath); std::pair<u8 *, size_t> p;
try {
p = get_corpus(fullpath);
} catch (const std::exception& e) {
std::cout << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024); ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
if(pj_ptr == NULL) { if(pj_ptr == NULL) {
std::cerr<< "can't allocate memory"<<std::endl; std::cerr<< "can't allocate memory"<<std::endl;
@ -70,11 +79,13 @@ bool validate(const char *dirname) {
howmany--; howmany--;
} else if (startsWith("pass", name)) { } else if (startsWith("pass", name)) {
if (!isok) { if (!isok) {
isfileasexpected[i] = false;
printf("warning: file %s should pass but it fails.\n", name); printf("warning: file %s should pass but it fails.\n", name);
everythingfine = false; everythingfine = false;
} }
} else if (startsWith("fail", name)) { } else if (startsWith("fail", name)) {
if (isok) { if (isok) {
isfileasexpected[i] = false;
printf("warning: file %s should fail but it passes.\n", name); printf("warning: file %s should fail but it passes.\n", name);
everythingfine = false; everythingfine = false;
} }
@ -87,11 +98,20 @@ bool validate(const char *dirname) {
deallocate_ParsedJson(pj_ptr); deallocate_ParsedJson(pj_ptr);
} }
} }
printf("%zu files checked.\n", howmany);
if(everythingfine) {
printf("All ok!\n");
} else {
printf("There were problems! Consider reviewing the following files:\n");
for(int i = 0; i < c; i++) {
if(!isfileasexpected[i]) printf("%s \n", entry_list[i]->d_name);
}
}
for (int i = 0; i < c; ++i) for (int i = 0; i < c; ++i)
free(entry_list[i]); free(entry_list[i]);
free(entry_list); free(entry_list);
printf("%zu files checked.\n", howmany); delete[] isfileasexpected;
if(everythingfine) printf("All ok!\n");
return everythingfine; return everythingfine;
} }

View File

@ -28,7 +28,7 @@ bool startsWith(const char *pre, const char *str) {
size_t lenpre = strlen(pre), lenstr = strlen(str); size_t lenpre = strlen(pre), lenstr = strlen(str);
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0; return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
} }
bool is_in_bad_list(char *buf) { bool is_in_bad_list(const char *buf) {
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++) for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
if (startsWith(really_bad[i], buf)) if (startsWith(really_bad[i], buf))
return true; return true;
@ -38,9 +38,9 @@ bool is_in_bad_list(char *buf) {
inline void foundInvalidNumber(const u8 *buf) { inline void foundInvalidNumber(const u8 *buf) {
invalid_count++; invalid_count++;
char *endptr; char *endptr;
double expected = strtod((char *)buf, &endptr); double expected = strtod((const char *)buf, &endptr);
if (endptr != (char *)buf) { if (endptr != (const char *)buf) {
if (!is_in_bad_list((char *)buf)) { if (!is_in_bad_list((const char *)buf)) {
printf( printf(
"Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ", "Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
buf, expected); buf, expected);
@ -53,8 +53,8 @@ inline void foundInvalidNumber(const u8 *buf) {
inline void foundInteger(int64_t result, const u8 *buf) { inline void foundInteger(int64_t result, const u8 *buf) {
int_count++; int_count++;
char *endptr; char *endptr;
long long expected = strtoll((char *)buf, &endptr, 10); long long expected = strtoll((const char *)buf, &endptr, 10);
if ((endptr == (char *)buf) || (expected != result)) { if ((endptr == (const char *)buf) || (expected != result)) {
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf); printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
printf(" while parsing %s \n", fullpath); printf(" while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR; parse_error |= PARSE_ERROR;
@ -64,8 +64,8 @@ inline void foundInteger(int64_t result, const u8 *buf) {
inline void foundFloat(double result, const u8 *buf) { inline void foundFloat(double result, const u8 *buf) {
char *endptr; char *endptr;
float_count++; float_count++;
double expected = strtod((char *)buf, &endptr); double expected = strtod((const char *)buf, &endptr);
if (endptr == (char *)buf) { if (endptr == (const char *)buf) {
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ", printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
result, buf); result, buf);
printf(" while parsing %s \n", fullpath); printf(" while parsing %s \n", fullpath);
@ -123,7 +123,13 @@ bool validate(const char *dirname) {
} else { } else {
strcpy(fullpath + dirlen, name); strcpy(fullpath + dirlen, name);
} }
std::pair<u8 *, size_t> p = get_corpus(fullpath); std::pair<u8 *, size_t> p;
try {
p = get_corpus(fullpath);
} catch (const std::exception& e) {
std::cout << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
// terrible hack but just to get it working // terrible hack but just to get it working
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024); ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
if (pj_ptr == NULL) { if (pj_ptr == NULL) {

View File

@ -241,7 +241,7 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
// we have a zero-length string // we have a zero-length string
if (parsed_begin != parsed_end) { if (parsed_begin != parsed_end) {
printf("WARNING: We have a zero-length but gap is %zu \n", printf("WARNING: We have a zero-length but gap is %zu \n",
parsed_end - parsed_begin); (size_t)(parsed_end - parsed_begin));
probable_bug = true; probable_bug = true;
} }
empty_string++; empty_string++;
@ -252,12 +252,12 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
printf("WARNING: lengths on parsed strings disagree %zu %zu \n", thislen, printf("WARNING: lengths on parsed strings disagree %zu %zu \n", thislen,
len); len);
printf("\nour parsed string : '%*s'\n\n", (int)thislen, printf("\nour parsed string : '%*s'\n\n", (int)thislen,
(char *)parsed_begin); (const char *)parsed_begin);
print_hex((char *)parsed_begin, thislen); print_hex((const char *)parsed_begin, thislen);
printf("\n"); printf("\n");
printf("reference parsing :'%*s'\n\n", (int)len, bigbuffer); printf("reference parsing :'%*s'\n\n", (int)len, bigbuffer);
print_hex((char *)bigbuffer, len); print_hex((const char *)bigbuffer, len);
printf("\n"); printf("\n");
probable_bug = true; probable_bug = true;
@ -267,15 +267,15 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
printf("Lengths %zu %zu \n", thislen, len); printf("Lengths %zu %zu \n", thislen, len);
printf("\nour parsed string : '%*s'\n", (int)thislen, printf("\nour parsed string : '%*s'\n", (int)thislen,
(char *)parsed_begin); (const char *)parsed_begin);
print_hex((char *)parsed_begin, thislen); print_hex((const char *)parsed_begin, thislen);
printf("\n"); printf("\n");
printf("reference parsing :'%*s'\n", (int)len, bigbuffer); printf("reference parsing :'%*s'\n", (int)len, bigbuffer);
print_hex((char *)bigbuffer, len); print_hex((const char *)bigbuffer, len);
printf("\n"); printf("\n");
print_cmp_hex((char *)parsed_begin, bigbuffer, thislen); print_cmp_hex((const char *)parsed_begin, bigbuffer, thislen);
probable_bug = true; probable_bug = true;
} }
@ -325,8 +325,13 @@ bool validate(const char *dirname) {
} else { } else {
strcpy(fullpath + dirlen, name); strcpy(fullpath + dirlen, name);
} }
std::pair<u8 *, size_t> p = get_corpus(fullpath); std::pair<u8 *, size_t> p;
// terrible hack but just to get it working try {
p = get_corpus(fullpath);
} catch (const std::exception& e) {
std::cout << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024); ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
if (pj_ptr == NULL) { if (pj_ptr == NULL) {
std::cerr << "can't allocate memory" << std::endl; std::cerr << "can't allocate memory" << std::endl;