Faster case-insensitive comparisons. (#837)

* Faster case-insensitive comparisons.
This commit is contained in:
Daniel Lemire 2020-04-30 12:52:28 -07:00 committed by GitHub
parent e7f774f964
commit fc1ddcd2f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 4 deletions

View File

@ -313,6 +313,7 @@ public:
/**
* Get the value associated with the given key in a case-insensitive manner.
* It is only guaranteed to work over ASCII inputs.
*
* Note: The key will be matched against **unescaped** JSON.
*

View File

@ -686,10 +686,10 @@ inline simdjson_result<element> object::at_key_case_insensitive(const std::strin
for (iterator field = begin(); field != end_field; ++field) {
auto field_key = field.key();
if (key.length() == field_key.length()) {
bool equal = true;
for (size_t i=0; i<field_key.length(); i++) {
equal = equal && std::tolower(key[i]) == std::tolower(field_key[i]);
}
// See For case-insensitive string comparisons, avoid char-by-char functions
// https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
// Note that it might be worth rolling our own strncasecmp function, with vectorization.
const bool equal = (simdjson_strncasecmp(key.data(), field_key.data(), key.length()) == 0);
if (equal) { return field.value(); }
}
}

View File

@ -147,8 +147,13 @@ compiling for a known 64-bit platform."
// regular visual studio and clang under visual studio.
// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
#define simdjson_strcasecmp _stricmp
#define simdjson_strncasecmp _strnicmp
#else
// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8).
// So they are only useful for ASCII in our context.
// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings
#define simdjson_strcasecmp strcasecmp
#define simdjson_strncasecmp strncasecmp
#endif
namespace simdjson {