mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-06-05 08:11:15 -03:00
Fix fuzzy matching for multi-character lowercase sequences
Same as 8668ce336c (Fix common::wcscasecmp() for multi-byte lowercase strings,
2023-05-02).
This commit is contained in:
@@ -141,8 +141,12 @@ pub fn fish_mkstemp_cloexec(name_template: CString) -> Result<(File, CString), E
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare two wide strings in a case-insensitive fashion
|
||||
pub fn wcscasecmp(lhs: &wstr, rhs: &wstr) -> cmp::Ordering {
|
||||
wcscasecmp_fuzzy(lhs, rhs, std::convert::identity)
|
||||
}
|
||||
|
||||
/// Compare two wide strings in a case-insensitive fashion
|
||||
pub fn wcscasecmp_fuzzy(lhs: &wstr, rhs: &wstr, canonicalize: fn(char) -> char) -> cmp::Ordering {
|
||||
use std::char::ToLowercase;
|
||||
use widestring::utfstr::CharsUtf32;
|
||||
|
||||
@@ -151,12 +155,12 @@ pub fn wcscasecmp(lhs: &wstr, rhs: &wstr) -> cmp::Ordering {
|
||||
/// `char::to_lowercase()` returns an iterator of chars and we sometimes need to cmp the last
|
||||
/// char of one char's `to_lowercase()` with the first char of the other char's
|
||||
/// `to_lowercase()`. This makes that possible.
|
||||
struct ToLowerBuffer<'a> {
|
||||
struct ToLowerBuffer<'a, Canonicalize: Fn(char) -> char> {
|
||||
current: ToLowercase,
|
||||
chars: CharsUtf32<'a>,
|
||||
chars: std::iter::Map<CharsUtf32<'a>, Canonicalize>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ToLowerBuffer<'a> {
|
||||
impl<'a, Canonicalize: Fn(char) -> char> Iterator for ToLowerBuffer<'a, Canonicalize> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
@@ -169,9 +173,8 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ToLowerBuffer<'a> {
|
||||
pub fn from(w: &'a wstr) -> Self {
|
||||
let mut chars = w.chars();
|
||||
impl<'a, Canonicalize: Fn(char) -> char> ToLowerBuffer<'a, Canonicalize> {
|
||||
pub fn new(mut chars: std::iter::Map<CharsUtf32<'a>, Canonicalize>) -> Self {
|
||||
Self {
|
||||
current: chars.next().map(|c| c.to_lowercase()).unwrap_or_else(|| {
|
||||
let mut empty = 'a'.to_lowercase();
|
||||
@@ -184,8 +187,8 @@ pub fn from(w: &'a wstr) -> Self {
|
||||
}
|
||||
}
|
||||
|
||||
let lhs = ToLowerBuffer::from(lhs);
|
||||
let rhs = ToLowerBuffer::from(rhs);
|
||||
let lhs = ToLowerBuffer::new(lhs.chars().map(canonicalize));
|
||||
let rhs = ToLowerBuffer::new(rhs.chars().map(canonicalize));
|
||||
lhs.cmp(rhs)
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use crate::common::{get_ellipsis_char, get_ellipsis_str};
|
||||
use crate::expand::INTERNAL_SEPARATOR;
|
||||
use crate::fallback::{fish_wcwidth, wcscasecmp};
|
||||
use crate::fallback::{fish_wcwidth, wcscasecmp, wcscasecmp_fuzzy};
|
||||
use crate::flog::FLOGF;
|
||||
use crate::libc::MB_CUR_MAX;
|
||||
use crate::wchar::{decode_byte_from_char, prelude::*};
|
||||
@@ -95,21 +95,25 @@ pub fn ifind(haystack: &wstr, needle: &wstr, fuzzy: bool /* = false */) -> Optio
|
||||
.as_char_slice()
|
||||
.windows(needle.len())
|
||||
.position(|window| {
|
||||
for (l, r) in window.iter().zip(needle.chars()) {
|
||||
// In fuzzy matching treat treat `-` and `_` as equal (#3584).
|
||||
if fuzzy && ['-', '_'].contains(l) && ['-', '_'].contains(&r) {
|
||||
continue;
|
||||
}
|
||||
// TODO Decide what to do for different lengths.
|
||||
let l = l.to_lowercase();
|
||||
let r = r.to_lowercase();
|
||||
for (l, r) in l.zip(r) {
|
||||
if l != r {
|
||||
return false;
|
||||
}
|
||||
// In fuzzy matching treat treat `-` and `_` as equal (#3584).
|
||||
fn fuzzy_canonicalize(c: char) -> char {
|
||||
if c == '_' {
|
||||
'-'
|
||||
} else {
|
||||
c
|
||||
}
|
||||
}
|
||||
true
|
||||
|
||||
wcscasecmp_fuzzy(
|
||||
wstr::from_char_slice(window),
|
||||
needle,
|
||||
if fuzzy {
|
||||
fuzzy_canonicalize
|
||||
} else {
|
||||
std::convert::identity
|
||||
},
|
||||
)
|
||||
.is_eq()
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user