simdjson/tests/numberparsingcheck.cpp

180 lines
5.5 KiB
C++
Raw Normal View History

#include <assert.h>
#include <cstring>
#include <dirent.h>
#include <inttypes.h>
2018-10-24 08:19:33 +08:00
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef JSON_TEST_NUMBERS
#define JSON_TEST_NUMBERS
2018-10-04 21:48:00 +08:00
#endif
2018-11-30 22:37:57 +08:00
#include "simdjson/common_defs.h"
2018-09-28 08:26:27 +08:00
int parse_error;
char *fullpath;
2018-10-24 08:19:33 +08:00
enum { PARSE_WARNING, PARSE_ERROR };
2018-10-04 21:48:00 +08:00
size_t float_count;
size_t int_count;
size_t invalid_count;
2018-10-24 08:19:33 +08:00
// strings that start with these should not be parsed as numbers
const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
bool startsWith(const char *pre, const char *str) {
size_t lenpre = strlen(pre), lenstr = strlen(str);
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
}
2018-11-28 03:37:59 +08:00
bool is_in_bad_list(const char *buf) {
2018-10-24 08:19:33 +08:00
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
if (startsWith(really_bad[i], buf))
return true;
return false;
}
inline void foundInvalidNumber(const u8 *buf) {
2018-10-04 21:48:00 +08:00
invalid_count++;
2018-10-24 08:19:33 +08:00
char *endptr;
2018-11-28 03:37:59 +08:00
double expected = strtod((const char *)buf, &endptr);
if (endptr != (const char *)buf) {
if (!is_in_bad_list((const char *)buf)) {
2018-10-24 08:19:33 +08:00
printf(
"Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
buf, expected);
printf(" while parsing %s \n", fullpath);
parse_error |= PARSE_WARNING;
}
2018-09-28 08:26:27 +08:00
}
}
2018-10-24 08:19:33 +08:00
inline void foundInteger(int64_t result, const u8 *buf) {
2018-10-04 21:48:00 +08:00
int_count++;
2018-10-24 08:19:33 +08:00
char *endptr;
2018-11-28 03:37:59 +08:00
long long expected = strtoll((const char *)buf, &endptr, 10);
if ((endptr == (const char *)buf) || (expected != result)) {
2018-09-28 08:26:27 +08:00
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
printf(" while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR;
}
}
2018-10-24 08:19:33 +08:00
inline void foundFloat(double result, const u8 *buf) {
char *endptr;
2018-10-04 21:48:00 +08:00
float_count++;
2018-11-28 03:37:59 +08:00
double expected = strtod((const char *)buf, &endptr);
if (endptr == (const char *)buf) {
2018-10-24 08:19:33 +08:00
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
result, buf);
2018-09-28 08:26:27 +08:00
printf(" while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR;
}
// we want to get some reasonable relative accuracy
2018-10-24 08:19:33 +08:00
if (fabs(expected - result) / fmin(fabs(expected), fabs(result)) >
0.000000000000001) {
printf("parsed %.32f from \n", result);
printf(" %.32s whereas strtod gives\n", buf);
2018-10-24 08:19:33 +08:00
printf(" %.32f,", expected);
2018-09-28 08:26:27 +08:00
printf(" while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR;
}
}
2018-11-30 22:37:57 +08:00
#include "simdjson/jsonparser.h"
#include "src/stage34_unified.cpp"
/**
* Does the file filename ends with the given extension.
*/
static bool hasExtension(const char *filename, const char *extension) {
const char *ext = strrchr(filename, '.');
return (ext && !strcmp(ext, extension));
}
bool validate(const char *dirname) {
2018-09-28 08:26:27 +08:00
parse_error = 0;
2018-10-24 08:19:33 +08:00
size_t total_count = 0;
// init_state_machine(); // no longer necessary
const char *extension = ".json";
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort);
if (c < 0) {
printf("error accessing %s \n", dirname);
return false;
}
if (c == 0) {
printf("nothing in dir %s \n", dirname);
return false;
}
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (hasExtension(name, extension)) {
size_t filelen = strlen(name);
2018-09-28 08:26:27 +08:00
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
strcpy(fullpath, dirname);
if (needsep) {
fullpath[dirlen] = '/';
strcpy(fullpath + dirlen + 1, name);
} else {
strcpy(fullpath + dirlen, name);
}
2018-11-28 03:37:59 +08:00
std::pair<u8 *, size_t> p;
try {
p = get_corpus(fullpath);
} catch (const std::exception& e) {
std::cout << "Could not load the file " << fullpath << std::endl;
return EXIT_FAILURE;
}
// terrible hack but just to get it working
2018-11-27 23:10:39 +08:00
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
2018-10-24 08:19:33 +08:00
if (pj_ptr == NULL) {
std::cerr << "can't allocate memory" << std::endl;
return false;
}
2018-10-04 21:48:00 +08:00
float_count = 0;
int_count = 0;
invalid_count = 0;
2018-10-24 08:19:33 +08:00
total_count += float_count + int_count + invalid_count;
ParsedJson &pj(*pj_ptr);
2018-10-24 08:19:33 +08:00
bool isok = json_parse(p.first, p.second, pj);
if (int_count + float_count + invalid_count > 0) {
printf("File %40s %s --- integers: %10zu floats: %10zu invalid: %10zu "
"total numbers: %10zu \n",
name, isok ? " is valid " : " is not valid ", int_count,
float_count, invalid_count,
int_count + float_count + invalid_count);
2018-10-04 21:48:00 +08:00
}
free(p.first);
free(fullpath);
deallocate_ParsedJson(pj_ptr);
}
}
2018-10-24 08:19:33 +08:00
if ((parse_error & PARSE_ERROR) != 0) {
2018-09-28 08:26:27 +08:00
printf("NUMBER PARSING FAILS?\n");
2018-10-24 08:19:33 +08:00
} else {
printf("All ok.\n");
2018-09-28 08:26:27 +08:00
}
for (int i = 0; i < c; ++i)
free(entry_list[i]);
free(entry_list);
2018-09-28 08:26:27 +08:00
return ((parse_error & PARSE_ERROR) == 0);
}
int main(int argc, char *argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
<< std::endl;
2018-10-24 08:19:33 +08:00
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
"'jsonexamples' directories."
<< std::endl;
return validate("jsonchecker/") && validate("jsonexamples/") ? EXIT_SUCCESS
: EXIT_FAILURE;
}
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
}