Faster case-insensitive comparisons. (#837)
* Faster case-insensitive comparisons.
This commit is contained in:
parent
e7f774f964
commit
fc1ddcd2f8
|
@ -313,6 +313,7 @@ public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the value associated with the given key in a case-insensitive manner.
|
* Get the value associated with the given key in a case-insensitive manner.
|
||||||
|
* It is only guaranteed to work over ASCII inputs.
|
||||||
*
|
*
|
||||||
* Note: The key will be matched against **unescaped** JSON.
|
* Note: The key will be matched against **unescaped** JSON.
|
||||||
*
|
*
|
||||||
|
|
|
@ -686,10 +686,10 @@ inline simdjson_result<element> object::at_key_case_insensitive(const std::strin
|
||||||
for (iterator field = begin(); field != end_field; ++field) {
|
for (iterator field = begin(); field != end_field; ++field) {
|
||||||
auto field_key = field.key();
|
auto field_key = field.key();
|
||||||
if (key.length() == field_key.length()) {
|
if (key.length() == field_key.length()) {
|
||||||
bool equal = true;
|
// See For case-insensitive string comparisons, avoid char-by-char functions
|
||||||
for (size_t i=0; i<field_key.length(); i++) {
|
// https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
|
||||||
equal = equal && std::tolower(key[i]) == std::tolower(field_key[i]);
|
// Note that it might be worth rolling our own strncasecmp function, with vectorization.
|
||||||
}
|
const bool equal = (simdjson_strncasecmp(key.data(), field_key.data(), key.length()) == 0);
|
||||||
if (equal) { return field.value(); }
|
if (equal) { return field.value(); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -147,8 +147,13 @@ compiling for a known 64-bit platform."
|
||||||
// regular visual studio and clang under visual studio.
|
// regular visual studio and clang under visual studio.
|
||||||
// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
|
// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
|
||||||
#define simdjson_strcasecmp _stricmp
|
#define simdjson_strcasecmp _stricmp
|
||||||
|
#define simdjson_strncasecmp _strnicmp
|
||||||
#else
|
#else
|
||||||
|
// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8).
|
||||||
|
// So they are only useful for ASCII in our context.
|
||||||
|
// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings
|
||||||
#define simdjson_strcasecmp strcasecmp
|
#define simdjson_strcasecmp strcasecmp
|
||||||
|
#define simdjson_strncasecmp strncasecmp
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace simdjson {
|
namespace simdjson {
|
||||||
|
|
Loading…
Reference in New Issue