Faster case-insensitive comparisons. (#837)

* Faster case-insensitive comparisons.
This commit is contained in:
Daniel Lemire 2020-04-30 12:52:28 -07:00 committed by GitHub
parent e7f774f964
commit fc1ddcd2f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 4 deletions

View File

@ -313,6 +313,7 @@ public:
/** /**
* Get the value associated with the given key in a case-insensitive manner. * Get the value associated with the given key in a case-insensitive manner.
* It is only guaranteed to work over ASCII inputs.
* *
* Note: The key will be matched against **unescaped** JSON. * Note: The key will be matched against **unescaped** JSON.
* *

View File

@ -686,10 +686,10 @@ inline simdjson_result<element> object::at_key_case_insensitive(const std::strin
for (iterator field = begin(); field != end_field; ++field) { for (iterator field = begin(); field != end_field; ++field) {
auto field_key = field.key(); auto field_key = field.key();
if (key.length() == field_key.length()) { if (key.length() == field_key.length()) {
bool equal = true; // See For case-insensitive string comparisons, avoid char-by-char functions
for (size_t i=0; i<field_key.length(); i++) { // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
equal = equal && std::tolower(key[i]) == std::tolower(field_key[i]); // Note that it might be worth rolling our own strncasecmp function, with vectorization.
} const bool equal = (simdjson_strncasecmp(key.data(), field_key.data(), key.length()) == 0);
if (equal) { return field.value(); } if (equal) { return field.value(); }
} }
} }

View File

@ -147,8 +147,13 @@ compiling for a known 64-bit platform."
// regular visual studio and clang under visual studio. // regular visual studio and clang under visual studio.
// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) // clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
#define simdjson_strcasecmp _stricmp #define simdjson_strcasecmp _stricmp
#define simdjson_strncasecmp _strnicmp
#else #else
// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8).
// So they are only useful for ASCII in our context.
// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings
#define simdjson_strcasecmp strcasecmp #define simdjson_strcasecmp strcasecmp
#define simdjson_strncasecmp strncasecmp
#endif #endif
namespace simdjson { namespace simdjson {