This avoids locale-dependent number parsing at the standard library level (#1157)
* This avoids locale-dependent number parsing at the standard library level. * Adding missing cast. * Inserting the missing "endif" * Trial and error. * Another attempt. * Another tweak. * Another fix. * Restricting it even more. * Tweaking our symbol checks. * Somewhat smarter tests. * Nice comments. * Minor simplification. * Adding cerr.
This commit is contained in:
parent
bfbac12f76
commit
72c83d9430
|
@ -73,7 +73,7 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024;
|
||||||
#define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
|
#define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
|
||||||
// Get rid of Intellisense-only warnings (Code Analysis)
|
// Get rid of Intellisense-only warnings (Code Analysis)
|
||||||
// Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
|
// Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
|
||||||
#if defined(_MSC_VER) && (_MSC_VER>=1910)
|
#ifdef __has_include
|
||||||
#if __has_include(<CppCoreCheck\Warnings.h>)
|
#if __has_include(<CppCoreCheck\Warnings.h>)
|
||||||
#include <CppCoreCheck\Warnings.h>
|
#include <CppCoreCheck\Warnings.h>
|
||||||
#define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
|
#define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
|
||||||
|
@ -196,4 +196,53 @@ namespace std {
|
||||||
#endif // SIMDJSON_HAS_STRING_VIEW
|
#endif // SIMDJSON_HAS_STRING_VIEW
|
||||||
#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore.
|
#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We may fall back on the system's number parsing, and we want
|
||||||
|
* to be able to call a locale-insensitive number parser. It unfortunately
|
||||||
|
* means that we need to load up locale headers.
|
||||||
|
* The locale.h header is generally available:
|
||||||
|
*/
|
||||||
|
#include <locale.h>
|
||||||
|
/**
|
||||||
|
* Determining whether we should import xlocale.h or not is
|
||||||
|
* a bit of a nightmare. Visual Studio and recent recent GLIBC (GCC) do not need it.
|
||||||
|
* However, FreeBSD and Apple platforms will need it.
|
||||||
|
* And we would want to cover as many platforms as possible.
|
||||||
|
*/
|
||||||
|
#ifdef __has_include
|
||||||
|
// This is the easy case: we have __has_include and can check whether
|
||||||
|
// xlocale is available. If so, we load it up.
|
||||||
|
#if __has_include(<xlocale.h>)
|
||||||
|
#include <xlocale.h>
|
||||||
|
#endif // __has_include
|
||||||
|
#else // We do not have __has_include
|
||||||
|
// Here we do not have __has_include
|
||||||
|
// We first check for __GLIBC__
|
||||||
|
#ifdef __GLIBC__ // If we have __GLIBC__ then we should have features.h which should help.
|
||||||
|
// Note that having __GLIBC__ does not imply that we are compiling against glibc. But
|
||||||
|
// we hope that any platform that defines __GLIBC__ will mimick glibc.
|
||||||
|
#include <features.h>
|
||||||
|
// Check whether we have an old GLIBC.
|
||||||
|
#if !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
|
||||||
|
#include <xlocale.h> // Old glibc needs xlocale, otherwise xlocale is unavailable.
|
||||||
|
#endif // !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
|
||||||
|
#else // __GLIBC__
|
||||||
|
// Ok. So we do not have __GLIBC__
|
||||||
|
// We assume that everything that is not GLIBC and not on old freebsd or windows
|
||||||
|
// needs xlocale.
|
||||||
|
// It is likely that recent FreeBSD and Apple platforms load xlocale.h next:
|
||||||
|
#if !(defined(_WIN32) || (__FreeBSD_version < 1000010))
|
||||||
|
#include <xlocale.h> // Will always happen under apple.
|
||||||
|
#endif //
|
||||||
|
#endif // __GLIBC__
|
||||||
|
#endif // __has_include
|
||||||
|
/**
|
||||||
|
* End of the crazy locale headers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
#endif // SIMDJSON_COMMON_DEFS_H
|
#endif // SIMDJSON_COMMON_DEFS_H
|
||||||
|
|
|
@ -58,7 +58,24 @@ if(NOT SIMDJSON_SANITIZE)
|
||||||
else()
|
else()
|
||||||
add_test(
|
add_test(
|
||||||
NAME "avoid_abort"
|
NAME "avoid_abort"
|
||||||
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} abort || exit 0 && exit 1"
|
# Under FreeBSD, the __cxa_guard_abort symbol may appear but it is fine.
|
||||||
|
# So we want to look for <space><possibly _>abort as a test.
|
||||||
|
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*abort' || exit 0 && exit 1"
|
||||||
|
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||||
|
)
|
||||||
|
add_test(
|
||||||
|
NAME "avoid_cout"
|
||||||
|
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*cout' || exit 0 && exit 1"
|
||||||
|
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||||
|
)
|
||||||
|
add_test(
|
||||||
|
NAME "avoid_cerr"
|
||||||
|
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*cerr' || exit 0 && exit 1"
|
||||||
|
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||||
|
)
|
||||||
|
add_test(
|
||||||
|
NAME "avoid_printf"
|
||||||
|
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*printf' || exit 0 && exit 1"
|
||||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||||
)
|
)
|
||||||
add_test(
|
add_test(
|
||||||
|
|
|
@ -226,7 +226,16 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
||||||
|
|
||||||
static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) {
|
static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) {
|
||||||
char *endptr;
|
char *endptr;
|
||||||
*outDouble = strtod((const char *)ptr, &endptr);
|
// We want to call strtod with the C (default) locale to avoid
|
||||||
|
// potential issues in case someone has a different locale.
|
||||||
|
// Unfortunately, Visual Studio has a different syntax.
|
||||||
|
#ifdef _WIN32
|
||||||
|
static _locale_t c_locale = _create_locale(LC_ALL, "C");
|
||||||
|
*outDouble = _strtod_l((const char *)ptr, &endptr, c_locale);
|
||||||
|
#else
|
||||||
|
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
||||||
|
*outDouble = strtod_l((const char *)ptr, &endptr, c_locale);
|
||||||
|
#endif
|
||||||
// Some libraries will set errno = ERANGE when the value is subnormal,
|
// Some libraries will set errno = ERANGE when the value is subnormal,
|
||||||
// yet we may want to be able to parse subnormal values.
|
// yet we may want to be able to parse subnormal values.
|
||||||
// However, we do not want to tolerate NAN or infinite values.
|
// However, we do not want to tolerate NAN or infinite values.
|
||||||
|
|
|
@ -70,7 +70,13 @@ bool is_in_bad_list(const char *buf) {
|
||||||
void found_invalid_number(const uint8_t *buf) {
|
void found_invalid_number(const uint8_t *buf) {
|
||||||
invalid_count++;
|
invalid_count++;
|
||||||
char *endptr;
|
char *endptr;
|
||||||
double expected = strtod((const char *)buf, &endptr);
|
#ifdef _WIN32
|
||||||
|
static _locale_t c_locale = _create_locale(LC_ALL, "C");
|
||||||
|
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
|
||||||
|
#else
|
||||||
|
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
||||||
|
double expected = strtod_l((const char *)buf, &endptr, c_locale);
|
||||||
|
#endif
|
||||||
if (endptr != (const char *)buf) {
|
if (endptr != (const char *)buf) {
|
||||||
if (!is_in_bad_list((const char *)buf)) {
|
if (!is_in_bad_list((const char *)buf)) {
|
||||||
printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
|
printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
|
||||||
|
@ -115,7 +121,13 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
|
||||||
void found_float(double result, const uint8_t *buf) {
|
void found_float(double result, const uint8_t *buf) {
|
||||||
char *endptr;
|
char *endptr;
|
||||||
float_count++;
|
float_count++;
|
||||||
double expected = strtod((const char *)buf, &endptr);
|
#ifdef _WIN32
|
||||||
|
static _locale_t c_locale = _create_locale(LC_ALL, "C");
|
||||||
|
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
|
||||||
|
#else
|
||||||
|
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
||||||
|
double expected = strtod_l((const char *)buf, &endptr, c_locale);
|
||||||
|
#endif
|
||||||
if (endptr == (const char *)buf) {
|
if (endptr == (const char *)buf) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||||
|
|
Loading…
Reference in New Issue