This avoids locale-dependent number parsing at the standard library level (#1157)

* This avoids locale-dependent number parsing at the standard library level.

* Adding missing cast.

* Inserting the missing "endif"

* Trial and error.

* Another attempt.

* Another tweak.

* Another fix.

* Restricting it even more.

* Tweaking our symbol checks.

* Somewhat smarter tests.

* Nice comments.

* Minor simplification.

* Adding cerr.
This commit is contained in:
Daniel Lemire 2020-09-15 11:36:18 -04:00 committed by GitHub
parent bfbac12f76
commit 72c83d9430
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 92 additions and 5 deletions

View File

@ -73,7 +73,7 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024;
#define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
// Get rid of Intellisense-only warnings (Code Analysis) // Get rid of Intellisense-only warnings (Code Analysis)
// Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
#if defined(_MSC_VER) && (_MSC_VER>=1910) #ifdef __has_include
#if __has_include(<CppCoreCheck\Warnings.h>) #if __has_include(<CppCoreCheck\Warnings.h>)
#include <CppCoreCheck\Warnings.h> #include <CppCoreCheck\Warnings.h>
#define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
@ -196,4 +196,53 @@ namespace std {
#endif // SIMDJSON_HAS_STRING_VIEW #endif // SIMDJSON_HAS_STRING_VIEW
#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. #undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore.
/**
* We may fall back on the system's number parsing, and we want
* to be able to call a locale-insensitive number parser. It unfortunately
* means that we need to load up locale headers.
* The locale.h header is generally available:
*/
#include <locale.h>
/**
* Determining whether we should import xlocale.h or not is
* a bit of a nightmare. Visual Studio and recent recent GLIBC (GCC) do not need it.
* However, FreeBSD and Apple platforms will need it.
* And we would want to cover as many platforms as possible.
*/
#ifdef __has_include
// This is the easy case: we have __has_include and can check whether
// xlocale is available. If so, we load it up.
#if __has_include(<xlocale.h>)
#include <xlocale.h>
#endif // __has_include
#else // We do not have __has_include
// Here we do not have __has_include
// We first check for __GLIBC__
#ifdef __GLIBC__ // If we have __GLIBC__ then we should have features.h which should help.
// Note that having __GLIBC__ does not imply that we are compiling against glibc. But
// we hope that any platform that defines __GLIBC__ will mimick glibc.
#include <features.h>
// Check whether we have an old GLIBC.
#if !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
#include <xlocale.h> // Old glibc needs xlocale, otherwise xlocale is unavailable.
#endif // !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
#else // __GLIBC__
// Ok. So we do not have __GLIBC__
// We assume that everything that is not GLIBC and not on old freebsd or windows
// needs xlocale.
// It is likely that recent FreeBSD and Apple platforms load xlocale.h next:
#if !(defined(_WIN32) || (__FreeBSD_version < 1000010))
#include <xlocale.h> // Will always happen under apple.
#endif //
#endif // __GLIBC__
#endif // __has_include
/**
* End of the crazy locale headers.
*/
#endif // SIMDJSON_COMMON_DEFS_H #endif // SIMDJSON_COMMON_DEFS_H

View File

@ -58,7 +58,24 @@ if(NOT SIMDJSON_SANITIZE)
else() else()
add_test( add_test(
NAME "avoid_abort" NAME "avoid_abort"
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} abort || exit 0 && exit 1" # Under FreeBSD, the __cxa_guard_abort symbol may appear but it is fine.
# So we want to look for <space><possibly _>abort as a test.
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*abort' || exit 0 && exit 1"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
)
add_test(
NAME "avoid_cout"
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*cout' || exit 0 && exit 1"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
)
add_test(
NAME "avoid_cerr"
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*cerr' || exit 0 && exit 1"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
)
add_test(
NAME "avoid_printf"
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*printf' || exit 0 && exit 1"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
) )
add_test( add_test(

View File

@ -226,7 +226,16 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) { static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) {
char *endptr; char *endptr;
*outDouble = strtod((const char *)ptr, &endptr); // We want to call strtod with the C (default) locale to avoid
// potential issues in case someone has a different locale.
// Unfortunately, Visual Studio has a different syntax.
#ifdef _WIN32
static _locale_t c_locale = _create_locale(LC_ALL, "C");
*outDouble = _strtod_l((const char *)ptr, &endptr, c_locale);
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
*outDouble = strtod_l((const char *)ptr, &endptr, c_locale);
#endif
// Some libraries will set errno = ERANGE when the value is subnormal, // Some libraries will set errno = ERANGE when the value is subnormal,
// yet we may want to be able to parse subnormal values. // yet we may want to be able to parse subnormal values.
// However, we do not want to tolerate NAN or infinite values. // However, we do not want to tolerate NAN or infinite values.

View File

@ -70,7 +70,13 @@ bool is_in_bad_list(const char *buf) {
void found_invalid_number(const uint8_t *buf) { void found_invalid_number(const uint8_t *buf) {
invalid_count++; invalid_count++;
char *endptr; char *endptr;
double expected = strtod((const char *)buf, &endptr); #ifdef _WIN32
static _locale_t c_locale = _create_locale(LC_ALL, "C");
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
double expected = strtod_l((const char *)buf, &endptr, c_locale);
#endif
if (endptr != (const char *)buf) { if (endptr != (const char *)buf) {
if (!is_in_bad_list((const char *)buf)) { if (!is_in_bad_list((const char *)buf)) {
printf("Warning: found_invalid_number %.32s whereas strtod parses it to " printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
@ -115,7 +121,13 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
void found_float(double result, const uint8_t *buf) { void found_float(double result, const uint8_t *buf) {
char *endptr; char *endptr;
float_count++; float_count++;
double expected = strtod((const char *)buf, &endptr); #ifdef _WIN32
static _locale_t c_locale = _create_locale(LC_ALL, "C");
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
double expected = strtod_l((const char *)buf, &endptr, c_locale);
#endif
if (endptr == (const char *)buf) { if (endptr == (const char *)buf) {
fprintf(stderr, fprintf(stderr,
"parsed %f from %.32s whereas strtod refuses to parse a float, ", "parsed %f from %.32s whereas strtod refuses to parse a float, ",