Fix fuzzy matching for multi-character lowercase sequences

Same as 8668ce336c (Fix common::wcscasecmp() for multi-byte lowercase strings, 2023-05-02).
2026-06-05 08:11:15 -03:00 · 2024-11-17 08:11:41 +01:00
parent 81f1ab75d0
commit d234ebf484
2 changed files with 30 additions and 23 deletions
--- a/src/fallback.rs
+++ b/src/fallback.rs
@@ -141,8 +141,12 @@ pub fn fish_mkstemp_cloexec(name_template: CString) -> Result<(File, CString), E
    }
 }

-/// Compare two wide strings in a case-insensitive fashion
 pub fn wcscasecmp(lhs: &wstr, rhs: &wstr) -> cmp::Ordering {
+    wcscasecmp_fuzzy(lhs, rhs, std::convert::identity)
+}
+
+/// Compare two wide strings in a case-insensitive fashion
+pub fn wcscasecmp_fuzzy(lhs: &wstr, rhs: &wstr, canonicalize: fn(char) -> char) -> cmp::Ordering {
    use std::char::ToLowercase;
    use widestring::utfstr::CharsUtf32;

@@ -151,12 +155,12 @@ pub fn wcscasecmp(lhs: &wstr, rhs: &wstr) -> cmp::Ordering {
    /// `char::to_lowercase()` returns an iterator of chars and we sometimes need to cmp the last
    /// char of one char's `to_lowercase()` with the first char of the other char's
    /// `to_lowercase()`. This makes that possible.
-    struct ToLowerBuffer<'a> {
+    struct ToLowerBuffer<'a, Canonicalize: Fn(char) -> char> {
        current: ToLowercase,
-        chars: CharsUtf32<'a>,
+        chars: std::iter::Map<CharsUtf32<'a>, Canonicalize>,
    }

-    impl<'a> Iterator for ToLowerBuffer<'a> {
+    impl<'a, Canonicalize: Fn(char) -> char> Iterator for ToLowerBuffer<'a, Canonicalize> {
        type Item = char;

        fn next(&mut self) -> Option<Self::Item> {
@@ -169,9 +173,8 @@ fn next(&mut self) -> Option<Self::Item> {
        }
    }

-    impl<'a> ToLowerBuffer<'a> {
-        pub fn from(w: &'a wstr) -> Self {
-            let mut chars = w.chars();
+    impl<'a, Canonicalize: Fn(char) -> char> ToLowerBuffer<'a, Canonicalize> {
+        pub fn new(mut chars: std::iter::Map<CharsUtf32<'a>, Canonicalize>) -> Self {
            Self {
                current: chars.next().map(|c| c.to_lowercase()).unwrap_or_else(|| {
                    let mut empty = 'a'.to_lowercase();
@@ -184,8 +187,8 @@ pub fn from(w: &'a wstr) -> Self {
        }
    }

-    let lhs = ToLowerBuffer::from(lhs);
-    let rhs = ToLowerBuffer::from(rhs);
+    let lhs = ToLowerBuffer::new(lhs.chars().map(canonicalize));
+    let rhs = ToLowerBuffer::new(rhs.chars().map(canonicalize));
    lhs.cmp(rhs)
 }

--- a/src/wcstringutil.rs
+++ b/src/wcstringutil.rs
@@ -2,7 +2,7 @@

 use crate::common::{get_ellipsis_char, get_ellipsis_str};
 use crate::expand::INTERNAL_SEPARATOR;
-use crate::fallback::{fish_wcwidth, wcscasecmp};
+use crate::fallback::{fish_wcwidth, wcscasecmp, wcscasecmp_fuzzy};
 use crate::flog::FLOGF;
 use crate::libc::MB_CUR_MAX;
 use crate::wchar::{decode_byte_from_char, prelude::*};
@@ -95,21 +95,25 @@ pub fn ifind(haystack: &wstr, needle: &wstr, fuzzy: bool /* = false */) -> Optio
        .as_char_slice()
        .windows(needle.len())
        .position(|window| {
-            for (l, r) in window.iter().zip(needle.chars()) {
-                // In fuzzy matching treat treat `-` and `_` as equal (#3584).
-                if fuzzy && ['-', '_'].contains(l) && ['-', '_'].contains(&r) {
-                    continue;
-                }
-                // TODO Decide what to do for different lengths.
-                let l = l.to_lowercase();
-                let r = r.to_lowercase();
-                for (l, r) in l.zip(r) {
-                    if l != r {
-                        return false;
-                    }
+            // In fuzzy matching treat treat `-` and `_` as equal (#3584).
+            fn fuzzy_canonicalize(c: char) -> char {
+                if c == '_' {
+                    '-'
+                } else {
+                    c
                }
            }
-            true
+
+            wcscasecmp_fuzzy(
+                wstr::from_char_slice(window),
+                needle,
+                if fuzzy {
+                    fuzzy_canonicalize
+                } else {
+                    std::convert::identity
+                },
+            )
+            .is_eq()
        })
 }