From 72c83d94304442bdc2ec33f0d8cd60e996486127 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 15 Sep 2020 11:36:18 -0400 Subject: [PATCH] This avoids locale-dependent number parsing at the standard library level (#1157) * This avoids locale-dependent number parsing at the standard library level. * Adding missing cast. * Inserting the missing "endif" * Trial and error. * Another attempt. * Another tweak. * Another fix. * Restricting it even more. * Tweaking our symbol checks. * Somewhat smarter tests. * Nice comments. * Minor simplification. * Adding cerr. --- include/simdjson/common_defs.h | 51 +++++++++++++++++++++++++++++- src/CMakeLists.txt | 19 ++++++++++- src/generic/stage2/numberparsing.h | 11 ++++++- tests/numberparsingcheck.cpp | 16 ++++++++-- 4 files changed, 92 insertions(+), 5 deletions(-) diff --git a/include/simdjson/common_defs.h b/include/simdjson/common_defs.h index 80cb1df0..d2d42a52 100644 --- a/include/simdjson/common_defs.h +++ b/include/simdjson/common_defs.h @@ -73,7 +73,7 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024; #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) // Get rid of Intellisense-only warnings (Code Analysis) // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). - #if defined(_MSC_VER) && (_MSC_VER>=1910) + #ifdef __has_include #if __has_include() #include #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) @@ -196,4 +196,53 @@ namespace std { #endif // SIMDJSON_HAS_STRING_VIEW #undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + + + + +/** + * We may fall back on the system's number parsing, and we want + * to be able to call a locale-insensitive number parser. It unfortunately + * means that we need to load up locale headers. + * The locale.h header is generally available: + */ +#include +/** + * Determining whether we should import xlocale.h or not is + * a bit of a nightmare. Visual Studio and recent recent GLIBC (GCC) do not need it. + * However, FreeBSD and Apple platforms will need it. + * And we would want to cover as many platforms as possible. + */ +#ifdef __has_include +// This is the easy case: we have __has_include and can check whether +// xlocale is available. If so, we load it up. +#if __has_include() +#include +#endif // __has_include +#else // We do not have __has_include +// Here we do not have __has_include +// We first check for __GLIBC__ +#ifdef __GLIBC__ // If we have __GLIBC__ then we should have features.h which should help. +// Note that having __GLIBC__ does not imply that we are compiling against glibc. But +// we hope that any platform that defines __GLIBC__ will mimick glibc. +#include +// Check whether we have an old GLIBC. +#if !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25))) +#include // Old glibc needs xlocale, otherwise xlocale is unavailable. +#endif // !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25))) +#else // __GLIBC__ +// Ok. So we do not have __GLIBC__ +// We assume that everything that is not GLIBC and not on old freebsd or windows +// needs xlocale. +// It is likely that recent FreeBSD and Apple platforms load xlocale.h next: +#if !(defined(_WIN32) || (__FreeBSD_version < 1000010)) +#include // Will always happen under apple. +#endif // +#endif // __GLIBC__ +#endif // __has_include +/** + * End of the crazy locale headers. + */ + + #endif // SIMDJSON_COMMON_DEFS_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index df64970b..97fc328d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,7 +58,24 @@ if(NOT SIMDJSON_SANITIZE) else() add_test( NAME "avoid_abort" - COMMAND sh -c "${NM} $ | ${GREP} abort || exit 0 && exit 1" + # Under FreeBSD, the __cxa_guard_abort symbol may appear but it is fine. + # So we want to look for abort as a test. + COMMAND sh -c "${NM} $ | ${GREP} ' _*abort' || exit 0 && exit 1" + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + ) + add_test( + NAME "avoid_cout" + COMMAND sh -c "${NM} $ | ${GREP} ' _*cout' || exit 0 && exit 1" + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + ) + add_test( + NAME "avoid_cerr" + COMMAND sh -c "${NM} $ | ${GREP} ' _*cerr' || exit 0 && exit 1" + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + ) + add_test( + NAME "avoid_printf" + COMMAND sh -c "${NM} $ | ${GREP} ' _*printf' || exit 0 && exit 1" WORKING_DIRECTORY ${PROJECT_BINARY_DIR} ) add_test( diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h index d977107c..93f9a945 100644 --- a/src/generic/stage2/numberparsing.h +++ b/src/generic/stage2/numberparsing.h @@ -226,7 +226,16 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) { char *endptr; - *outDouble = strtod((const char *)ptr, &endptr); + // We want to call strtod with the C (default) locale to avoid + // potential issues in case someone has a different locale. + // Unfortunately, Visual Studio has a different syntax. +#ifdef _WIN32 + static _locale_t c_locale = _create_locale(LC_ALL, "C"); + *outDouble = _strtod_l((const char *)ptr, &endptr, c_locale); +#else + static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); + *outDouble = strtod_l((const char *)ptr, &endptr, c_locale); +#endif // Some libraries will set errno = ERANGE when the value is subnormal, // yet we may want to be able to parse subnormal values. // However, we do not want to tolerate NAN or infinite values. diff --git a/tests/numberparsingcheck.cpp b/tests/numberparsingcheck.cpp index d3d67c87..cd7bbbcc 100644 --- a/tests/numberparsingcheck.cpp +++ b/tests/numberparsingcheck.cpp @@ -70,7 +70,13 @@ bool is_in_bad_list(const char *buf) { void found_invalid_number(const uint8_t *buf) { invalid_count++; char *endptr; - double expected = strtod((const char *)buf, &endptr); +#ifdef _WIN32 + static _locale_t c_locale = _create_locale(LC_ALL, "C"); + double expected = _strtod_l((const char *)buf, &endptr, c_locale); +#else + static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); + double expected = strtod_l((const char *)buf, &endptr, c_locale); +#endif if (endptr != (const char *)buf) { if (!is_in_bad_list((const char *)buf)) { printf("Warning: found_invalid_number %.32s whereas strtod parses it to " @@ -115,7 +121,13 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf) { void found_float(double result, const uint8_t *buf) { char *endptr; float_count++; - double expected = strtod((const char *)buf, &endptr); +#ifdef _WIN32 + static _locale_t c_locale = _create_locale(LC_ALL, "C"); + double expected = _strtod_l((const char *)buf, &endptr, c_locale); +#else + static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); + double expected = strtod_l((const char *)buf, &endptr, c_locale); +#endif if (endptr == (const char *)buf) { fprintf(stderr, "parsed %f from %.32s whereas strtod refuses to parse a float, ",