2018-09-28 05:38:15 +08:00
|
|
|
#include <cstring>
|
2020-10-11 00:47:49 +08:00
|
|
|
#include <cinttypes>
|
|
|
|
#include <cmath>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <cstdlib>
|
|
|
|
|
2018-09-28 05:38:15 +08:00
|
|
|
#ifndef JSON_TEST_NUMBERS
|
|
|
|
#define JSON_TEST_NUMBERS
|
2018-10-04 21:48:00 +08:00
|
|
|
#endif
|
2018-09-28 05:38:15 +08:00
|
|
|
|
2020-06-30 09:10:54 +08:00
|
|
|
#if (!(_MSC_VER) && !(__MINGW32__) && !(__MINGW64__))
|
2020-05-02 20:16:50 +08:00
|
|
|
#include <dirent.h>
|
|
|
|
#else
|
|
|
|
#include <dirent_portable.h>
|
|
|
|
#endif
|
2020-09-10 07:31:00 +08:00
|
|
|
|
|
|
|
void found_invalid_number(const uint8_t *buf);
|
|
|
|
void found_float(double result, const uint8_t *buf);
|
|
|
|
void found_integer(int64_t result, const uint8_t *buf);
|
|
|
|
void found_unsigned_integer(uint64_t result, const uint8_t *buf);
|
|
|
|
|
2020-03-03 06:23:19 +08:00
|
|
|
#include "simdjson.h"
|
2018-09-28 05:38:15 +08:00
|
|
|
|
2020-10-11 00:47:49 +08:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Some systems have bad floating-point parsing. We want to exclude them.
|
|
|
|
*/
|
2020-11-04 04:48:09 +08:00
|
|
|
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
|
2020-10-11 00:47:49 +08:00
|
|
|
// Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
|
2020-11-04 04:48:09 +08:00
|
|
|
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
|
2020-10-11 00:47:49 +08:00
|
|
|
// or cygwin.
|
|
|
|
//
|
|
|
|
// Finally, we want to exclude legacy 32-bit systems.
|
|
|
|
#ifndef SIMDJSON_IS_32BITS
|
|
|
|
// So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
|
|
|
|
#define TEST_FLOATS
|
|
|
|
// Apple and freebsd need a special header, typically.
|
|
|
|
#if defined __APPLE__ || defined(__FreeBSD__)
|
2020-11-04 04:48:09 +08:00
|
|
|
# include <xlocale.h>
|
2020-10-11 00:47:49 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2019-07-16 10:17:49 +08:00
|
|
|
|
|
|
|
|
2018-09-28 08:26:27 +08:00
|
|
|
int parse_error;
|
|
|
|
char *fullpath;
|
2018-10-24 08:19:33 +08:00
|
|
|
enum { PARSE_WARNING, PARSE_ERROR };
|
2018-09-28 05:38:15 +08:00
|
|
|
|
2018-10-04 21:48:00 +08:00
|
|
|
size_t float_count;
|
|
|
|
size_t int_count;
|
|
|
|
size_t invalid_count;
|
|
|
|
|
2018-10-24 08:19:33 +08:00
|
|
|
// strings that start with these should not be parsed as numbers
|
|
|
|
const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
|
|
|
|
|
2019-07-31 05:18:10 +08:00
|
|
|
bool starts_with(const char *pre, const char *str) {
|
2020-09-23 15:07:14 +08:00
|
|
|
size_t lenpre = std::strlen(pre);
|
2018-12-11 06:39:19 +08:00
|
|
|
return strncmp(pre, str, lenpre) == 0;
|
2018-10-24 08:19:33 +08:00
|
|
|
}
|
2018-12-11 06:39:19 +08:00
|
|
|
|
2018-11-28 03:37:59 +08:00
|
|
|
bool is_in_bad_list(const char *buf) {
|
2019-06-27 07:48:51 +08:00
|
|
|
if (buf[0] != '0')
|
|
|
|
return false;
|
2018-10-24 08:19:33 +08:00
|
|
|
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
|
2019-07-31 05:18:10 +08:00
|
|
|
if (starts_with(really_bad[i], buf))
|
2018-10-24 08:19:33 +08:00
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-10-11 00:47:49 +08:00
|
|
|
#ifndef TEST_FLOATS
|
|
|
|
// We do not recognize the system, so we do not verify our results.
|
|
|
|
void found_invalid_number(const uint8_t *) {}
|
|
|
|
#else
|
2019-07-31 05:18:10 +08:00
|
|
|
void found_invalid_number(const uint8_t *buf) {
|
2018-10-04 21:48:00 +08:00
|
|
|
invalid_count++;
|
2018-10-24 08:19:33 +08:00
|
|
|
char *endptr;
|
2020-09-15 23:36:18 +08:00
|
|
|
#ifdef _WIN32
|
|
|
|
static _locale_t c_locale = _create_locale(LC_ALL, "C");
|
|
|
|
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
|
|
|
|
#else
|
|
|
|
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
|
|
|
double expected = strtod_l((const char *)buf, &endptr, c_locale);
|
2020-11-04 04:48:09 +08:00
|
|
|
#endif
|
2018-11-28 03:37:59 +08:00
|
|
|
if (endptr != (const char *)buf) {
|
|
|
|
if (!is_in_bad_list((const char *)buf)) {
|
2019-07-31 05:18:10 +08:00
|
|
|
printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
|
|
|
|
"%f, ",
|
|
|
|
buf, expected);
|
2018-10-24 08:19:33 +08:00
|
|
|
printf(" while parsing %s \n", fullpath);
|
|
|
|
parse_error |= PARSE_WARNING;
|
|
|
|
}
|
2018-09-28 08:26:27 +08:00
|
|
|
}
|
2018-09-28 05:38:15 +08:00
|
|
|
}
|
2020-10-11 00:47:49 +08:00
|
|
|
#endif
|
2018-09-28 05:38:15 +08:00
|
|
|
|
2019-07-31 05:18:10 +08:00
|
|
|
void found_integer(int64_t result, const uint8_t *buf) {
|
2018-10-04 21:48:00 +08:00
|
|
|
int_count++;
|
2018-10-24 08:19:33 +08:00
|
|
|
char *endptr;
|
2018-11-28 03:37:59 +08:00
|
|
|
long long expected = strtoll((const char *)buf, &endptr, 10);
|
|
|
|
if ((endptr == (const char *)buf) || (expected != result)) {
|
2020-06-30 09:10:54 +08:00
|
|
|
#if (!(__MINGW32__) && !(__MINGW64__))
|
2018-12-31 10:00:19 +08:00
|
|
|
fprintf(stderr, "Error: parsed %" PRId64 " out of %.32s, ", result, buf);
|
2020-10-11 00:47:49 +08:00
|
|
|
#else // mingw is busted since we include #include <inttypes.h> and it will still not provide PRId64
|
2020-06-30 09:10:54 +08:00
|
|
|
fprintf(stderr, "Error: parsed %lld out of %.32s, ", (long long)result, buf);
|
|
|
|
#endif
|
2018-12-31 10:00:19 +08:00
|
|
|
fprintf(stderr, " while parsing %s \n", fullpath);
|
2018-09-28 08:26:27 +08:00
|
|
|
parse_error |= PARSE_ERROR;
|
|
|
|
}
|
2018-09-28 05:38:15 +08:00
|
|
|
}
|
|
|
|
|
2019-09-03 00:32:44 +08:00
|
|
|
void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
|
2019-09-02 22:50:24 +08:00
|
|
|
int_count++;
|
|
|
|
char *endptr;
|
|
|
|
unsigned long long expected = strtoull((const char *)buf, &endptr, 10);
|
|
|
|
if ((endptr == (const char *)buf) || (expected != result)) {
|
2020-06-30 09:10:54 +08:00
|
|
|
#if (!(__MINGW32__) && !(__MINGW64__))
|
2019-09-02 22:50:24 +08:00
|
|
|
fprintf(stderr, "Error: parsed %" PRIu64 " out of %.32s, ", result, buf);
|
2020-06-30 09:10:54 +08:00
|
|
|
#else // mingw is busted since we include #include <inttypes.h>
|
|
|
|
fprintf(stderr, "Error: parsed %llu out of %.32s, ", (unsigned long long)result, buf);
|
|
|
|
#endif
|
2019-09-02 22:50:24 +08:00
|
|
|
fprintf(stderr, " while parsing %s \n", fullpath);
|
|
|
|
parse_error |= PARSE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-11 00:47:49 +08:00
|
|
|
#ifndef TEST_FLOATS
|
|
|
|
// We do not recognize the system, so we do not verify our results.
|
|
|
|
void found_float(double , const uint8_t *) {}
|
|
|
|
#else
|
2019-07-31 05:18:10 +08:00
|
|
|
void found_float(double result, const uint8_t *buf) {
|
2018-10-24 08:19:33 +08:00
|
|
|
char *endptr;
|
2018-10-04 21:48:00 +08:00
|
|
|
float_count++;
|
2020-09-15 23:36:18 +08:00
|
|
|
#ifdef _WIN32
|
|
|
|
static _locale_t c_locale = _create_locale(LC_ALL, "C");
|
|
|
|
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
|
|
|
|
#else
|
|
|
|
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
|
|
|
double expected = strtod_l((const char *)buf, &endptr, c_locale);
|
2020-11-04 04:48:09 +08:00
|
|
|
#endif
|
2018-11-28 03:37:59 +08:00
|
|
|
if (endptr == (const char *)buf) {
|
2019-06-27 07:48:51 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
|
|
|
result, buf);
|
2018-12-31 10:00:19 +08:00
|
|
|
fprintf(stderr, " while parsing %s \n", fullpath);
|
2018-09-28 08:26:27 +08:00
|
|
|
parse_error |= PARSE_ERROR;
|
|
|
|
}
|
2020-10-11 00:47:49 +08:00
|
|
|
if (std::fpclassify(expected) != std::fpclassify(result)) {
|
2019-06-27 07:48:51 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"floats not in the same category expected: %f observed: %f \n",
|
|
|
|
expected, result);
|
2019-04-25 05:31:47 +08:00
|
|
|
fprintf(stderr, "%.32s\n", buf);
|
2018-12-11 03:25:49 +08:00
|
|
|
parse_error |= PARSE_ERROR;
|
2019-07-16 10:17:49 +08:00
|
|
|
return;
|
2018-12-11 03:25:49 +08:00
|
|
|
}
|
2020-03-16 10:30:21 +08:00
|
|
|
if (expected != result) {
|
2018-12-31 10:00:19 +08:00
|
|
|
fprintf(stderr, "parsed %.128e from \n", result);
|
2019-04-25 05:31:47 +08:00
|
|
|
fprintf(stderr, " %.32s whereas strtod gives\n", buf);
|
2018-12-31 10:00:19 +08:00
|
|
|
fprintf(stderr, " %.128e,", expected);
|
|
|
|
fprintf(stderr, " while parsing %s \n", fullpath);
|
2018-09-28 08:26:27 +08:00
|
|
|
parse_error |= PARSE_ERROR;
|
|
|
|
}
|
2018-09-28 05:38:15 +08:00
|
|
|
}
|
2020-10-11 00:47:49 +08:00
|
|
|
#endif
|
2018-09-28 05:38:15 +08:00
|
|
|
|
2020-03-03 06:23:19 +08:00
|
|
|
#include "simdjson.h"
|
2020-03-03 07:19:20 +08:00
|
|
|
#include "simdjson.cpp"
|
2018-09-28 05:38:15 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Does the file filename ends with the given extension.
|
|
|
|
*/
|
2019-07-31 05:18:10 +08:00
|
|
|
static bool has_extension(const char *filename, const char *extension) {
|
2018-09-28 05:38:15 +08:00
|
|
|
const char *ext = strrchr(filename, '.');
|
|
|
|
return (ext && !strcmp(ext, extension));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool validate(const char *dirname) {
|
2018-09-28 08:26:27 +08:00
|
|
|
parse_error = 0;
|
2018-10-24 08:19:33 +08:00
|
|
|
size_t total_count = 0;
|
2018-09-28 05:38:15 +08:00
|
|
|
const char *extension = ".json";
|
2020-09-23 15:07:14 +08:00
|
|
|
size_t dirlen = std::strlen(dirname);
|
2018-09-28 05:38:15 +08:00
|
|
|
struct dirent **entry_list;
|
|
|
|
int c = scandir(dirname, &entry_list, 0, alphasort);
|
|
|
|
if (c < 0) {
|
|
|
|
printf("error accessing %s \n", dirname);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (c == 0) {
|
|
|
|
printf("nothing in dir %s \n", dirname);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
|
|
|
for (int i = 0; i < c; i++) {
|
|
|
|
const char *name = entry_list[i]->d_name;
|
2019-07-31 05:18:10 +08:00
|
|
|
if (has_extension(name, extension)) {
|
2020-09-23 15:07:14 +08:00
|
|
|
size_t filelen = std::strlen(name);
|
2018-09-28 08:26:27 +08:00
|
|
|
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
2018-09-28 05:38:15 +08:00
|
|
|
strcpy(fullpath, dirname);
|
|
|
|
if (needsep) {
|
|
|
|
fullpath[dirlen] = '/';
|
|
|
|
strcpy(fullpath + dirlen + 1, name);
|
|
|
|
} else {
|
|
|
|
strcpy(fullpath + dirlen, name);
|
|
|
|
}
|
2020-06-22 06:26:44 +08:00
|
|
|
simdjson::padded_string p;
|
|
|
|
auto error = simdjson::padded_string::load(fullpath).get(p);
|
2020-03-07 10:14:34 +08:00
|
|
|
if (error) {
|
|
|
|
std::cerr << "Could not load the file " << fullpath << std::endl;
|
2018-11-28 03:37:59 +08:00
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
2018-09-28 05:38:15 +08:00
|
|
|
// terrible hack but just to get it working
|
2018-10-04 21:48:00 +08:00
|
|
|
float_count = 0;
|
|
|
|
int_count = 0;
|
|
|
|
invalid_count = 0;
|
2018-10-24 08:19:33 +08:00
|
|
|
total_count += float_count + int_count + invalid_count;
|
2020-03-29 02:43:41 +08:00
|
|
|
simdjson::dom::parser parser;
|
2020-04-15 08:26:26 +08:00
|
|
|
auto err = parser.parse(p).error();
|
2020-03-27 08:08:54 +08:00
|
|
|
bool isok = (err == simdjson::error_code::SUCCESS);
|
2018-10-24 08:19:33 +08:00
|
|
|
if (int_count + float_count + invalid_count > 0) {
|
|
|
|
printf("File %40s %s --- integers: %10zu floats: %10zu invalid: %10zu "
|
|
|
|
"total numbers: %10zu \n",
|
|
|
|
name, isok ? " is valid " : " is not valid ", int_count,
|
|
|
|
float_count, invalid_count,
|
|
|
|
int_count + float_count + invalid_count);
|
2018-10-04 21:48:00 +08:00
|
|
|
}
|
2018-09-28 05:38:15 +08:00
|
|
|
free(fullpath);
|
|
|
|
}
|
|
|
|
}
|
2018-10-24 08:19:33 +08:00
|
|
|
if ((parse_error & PARSE_ERROR) != 0) {
|
2018-09-28 08:26:27 +08:00
|
|
|
printf("NUMBER PARSING FAILS?\n");
|
2018-10-24 08:19:33 +08:00
|
|
|
} else {
|
|
|
|
printf("All ok.\n");
|
2018-09-28 08:26:27 +08:00
|
|
|
}
|
2018-09-28 05:38:15 +08:00
|
|
|
for (int i = 0; i < c; ++i)
|
|
|
|
free(entry_list[i]);
|
|
|
|
free(entry_list);
|
2018-09-28 08:26:27 +08:00
|
|
|
return ((parse_error & PARSE_ERROR) == 0);
|
2018-09-28 05:38:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
if (argc != 2) {
|
|
|
|
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
|
|
|
|
<< std::endl;
|
2019-06-27 07:48:51 +08:00
|
|
|
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
|
|
|
|
std::cout << "We are going to assume you mean to use the '"
|
|
|
|
<< SIMDJSON_TEST_DATA_DIR << "' and '"
|
|
|
|
<< SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
|
|
|
|
return validate(SIMDJSON_TEST_DATA_DIR) &&
|
|
|
|
validate(SIMDJSON_BENCHMARK_DATA_DIR)
|
|
|
|
? EXIT_SUCCESS
|
|
|
|
: EXIT_FAILURE;
|
2018-12-29 02:04:38 +08:00
|
|
|
#else
|
2018-10-24 08:19:33 +08:00
|
|
|
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
|
|
|
|
"'jsonexamples' directories."
|
|
|
|
<< std::endl;
|
|
|
|
return validate("jsonchecker/") && validate("jsonexamples/") ? EXIT_SUCCESS
|
|
|
|
: EXIT_FAILURE;
|
2018-12-29 02:04:38 +08:00
|
|
|
#endif
|
2018-09-28 05:38:15 +08:00
|
|
|
}
|
|
|
|
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
|
|
|
|
}
|