diff --git a/src/common.cpp b/src/common.cpp index fac1556ce..038e6ead6 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1859,7 +1859,7 @@ string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string, result.match_distance_first = match_against.size() - string.size(); result.match_distance_second = location; // prefer earlier matches } else if (limit_type >= fuzzy_match_substring_case_insensitive && - (location = ifind(match_against, string)) != wcstring::npos) { + (location = ifind(match_against, string, true)) != wcstring::npos) { // A case-insensitive version of the string is in the match against. result.type = fuzzy_match_substring_case_insensitive; assert(match_against.size() >= string.size()); diff --git a/src/common.h b/src/common.h index 17ae1c65e..be21c9fe6 100644 --- a/src/common.h +++ b/src/common.h @@ -356,18 +356,54 @@ bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix, const wcstring &value); - /// Case-insensitive string search, templated for use with both std::string and std::wstring. /// Modeled after std::string::find(). +/// \param fuzzy indicates this is being used for fuzzy matching and case insensitivity is +/// expanded to include symbolic characters (#3584). /// \return the offset of the first case-insensitive matching instance of `needle` within /// `haystack`, or `string::npos()` if no results were found. template -size_t ifind(const T &haystack, const T &needle) { +size_t ifind(const T &haystack, const T &needle, bool fuzzy = false) { using char_t = typename T::value_type; auto locale = std::locale(); - auto icase_eq = [&locale](char_t c1, char_t c2) { - return std::toupper(c1, locale) == std::toupper(c2, locale); - }; + + std::function icase_eq; + + if (!fuzzy) { + icase_eq = [&locale](char_t c1, char_t c2) { + return std::toupper(c1, locale) == std::toupper(c2, locale); + }; + } else { + icase_eq = [&locale](char_t c1, char_t c2) { + // This `ifind()` call is being used for fuzzy string matching. Further extend case + // insensitivity to treat `-` and `_` as equal (#3584). + + // The two lines below were tested to be 27% faster than + // (c1 == '_' || c1 == '-') && (c2 == '-' || c2 == '_') + // while returning no false positives for all (c1, c2) combinations in the printable + // range (0x20-0x7E). It might return false positives outside that range, but fuzzy + // comparisons are typically called for file names only, which are unlikely to have + // such characters and this entire function is 100% broken on unicode so there's no + // point in worrying about anything outside of the ANSII range. + // ((c1 == Literal('_') || c1 == Literal('-')) && + // ((c1 ^ c2) == (Literal('-') ^ Literal('_')))); + + // One of the following would be an illegal comparison between a char and a wchar_t. + // However, placing them behind a constexpr gate results in the elision of the if + // statement and the incorrect branch, with the compiler's SFINAE support suppressing + // any errors in the branch not taken. + if (sizeof(char_t) == sizeof(char)) { + return std::toupper(c1, locale) == std::toupper(c2, locale) || + ((c1 == '_' || c1 == '-') && + ((c1 ^ c2) == ('-' ^ '_'))); + } else { + return std::toupper(c1, locale) == std::toupper(c2, locale) || + ((c1 == L'_' || c1 == L'-') && + ((c1 ^ c2) == (L'-' ^ L'_'))); + } + }; + } + auto result = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end(), icase_eq); if (result != haystack.end()) { diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 1fb3d1666..d2e2fd73a 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -1742,6 +1742,15 @@ static void test_ifind() { do_test(ifind(std::string{"balpha"}, std::string{"Plh"}) == std::string::npos); } +static void test_ifind_fuzzy() { + say(L"Testing ifind with fuzzy logic"); + do_test(ifind(std::string{"alpha"}, std::string{"alpha"}, true) == 0); + do_test(ifind(wcstring{L"alphab"}, wcstring{L"alpha"}, true) == 0); + do_test(ifind(std::string{"alpha-b"}, std::string{"alpha_b"}, true) == 0); + do_test(ifind(std::string{"alpha-_"}, std::string{"alpha_-"}, true) == 0); + do_test(ifind(std::string{"alpha-b"}, std::string{"alpha b"}, true) == std::string::npos); +} + static void test_abbreviations() { say(L"Testing abbreviations"); env_push(true); @@ -4937,6 +4946,7 @@ int main(int argc, char **argv) { if (should_test_function("expand")) test_expand(); if (should_test_function("fuzzy_match")) test_fuzzy_match(); if (should_test_function("ifind")) test_ifind(); + if (should_test_function("ifind_fuzzy")) test_ifind_fuzzy(); if (should_test_function("abbreviations")) test_abbreviations(); if (should_test_function("test")) test_test(); if (should_test_function("path")) test_path();