diff --git a/fish-rust/build.rs b/fish-rust/build.rs index e69e9c674..3e6ef0ccb 100644 --- a/fish-rust/build.rs +++ b/fish-rust/build.rs @@ -87,6 +87,7 @@ fn main() { "fish-rust/src/trace.rs", "fish-rust/src/util.rs", "fish-rust/src/wait_handle.rs", + "fish-rust/src/wildcard.rs", ]; cxx_build::bridges(&source_files) .flag_if_supported("-std=c++11") diff --git a/fish-rust/src/builtins/string/match.rs b/fish-rust/src/builtins/string/match.rs index 64495224f..62c268cbb 100644 --- a/fish-rust/src/builtins/string/match.rs +++ b/fish-rust/src/builtins/string/match.rs @@ -6,8 +6,7 @@ use crate::env::{EnvMode, EnvVar, EnvVarFlags}; use crate::flog::FLOG; use crate::parse_util::parse_util_unescape_wildcards; -use crate::wchar_ffi::WCharToFFI; -use crate::wildcard::ANY_STRING; +use crate::wildcard::{wildcard_match, ANY_STRING}; #[derive(Default)] pub struct Match<'args> { @@ -380,13 +379,11 @@ fn new(pattern: &'args wstr, opts: &'opts Match<'args>) -> Self { fn report_matches(&mut self, arg: &wstr, streams: &mut io_streams_t) { // Note: --all is a no-op for glob matching since the pattern is always matched // against the entire argument. - use crate::ffi::wildcard_match; - let subject = match self.opts.ignore_case { true => arg.to_lowercase(), false => arg.to_owned(), }; - let m = wildcard_match(&subject.to_ffi(), &self.pattern.to_ffi(), false); + let m = wildcard_match(subject, &self.pattern, false); if m ^ self.opts.invert_match { self.total_matched += 1; diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index 7e3cdf9d2..fdfa22ce7 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -45,7 +45,6 @@ #include "reader.h" #include "screen.h" #include "tokenizer.h" - #include "wildcard.h" #include "wutil.h" // We need to block these types so when exposing C++ to Rust. @@ -92,7 +91,6 @@ generate!("log_extra_to_flog_file") - generate!("wildcard_match") generate!("wgettext_ptr") generate!("block_t") diff --git a/fish-rust/src/flog.rs b/fish-rust/src/flog.rs index 4e1f3ddeb..14e209979 100644 --- a/fish-rust/src/flog.rs +++ b/fish-rust/src/flog.rs @@ -1,7 +1,6 @@ -use crate::ffi::wildcard_match; use crate::parse_util::parse_util_unescape_wildcards; use crate::wchar::prelude::*; -use crate::wchar_ffi::WCharToFFI; +use crate::wildcard::wildcard_match; use libc::c_int; use std::io::Write; use std::os::unix::prelude::*; @@ -212,7 +211,7 @@ fn apply_one_wildcard(wc_esc: &wstr, sense: bool) { let wc = parse_util_unescape_wildcards(wc_esc); let mut match_found = false; for cat in categories::all_categories() { - if wildcard_match(&cat.name.to_ffi(), &wc.to_ffi(), false) { + if wildcard_match(cat.name, &wc, false) { cat.enabled.store(sense, Ordering::Relaxed); match_found = true; } diff --git a/fish-rust/src/wildcard.rs b/fish-rust/src/wildcard.rs index 00b773743..1f6608f72 100644 --- a/fish-rust/src/wildcard.rs +++ b/fish-rust/src/wildcard.rs @@ -1,6 +1,14 @@ // Enumeration of all wildcard types. -use crate::common::{char_offset, WILDCARD_RESERVED_BASE}; +use cxx::CxxWString; + +use crate::common::{ + char_offset, unescape_string, UnescapeFlags, UnescapeStringStyle, WILDCARD_RESERVED_BASE, +}; +use crate::future_feature_flags::feature_test; +use crate::future_feature_flags::FeatureFlag; +use crate::wchar::prelude::*; +use crate::wchar_ffi::WCharFromFFI; /// Character representing any character except '/' (slash). pub const ANY_CHAR: char = char_offset(WILDCARD_RESERVED_BASE, 0); @@ -11,3 +19,210 @@ /// This is a special pseudo-char that is not used other than to mark the /// end of the the special characters so we can sanity check the enum range. pub const ANY_SENTINEL: char = char_offset(WILDCARD_RESERVED_BASE, 3); + +/// Expand the wildcard by matching against the filesystem. +/// +/// wildcard_expand works by dividing the wildcard into segments at each directory boundary. Each +/// segment is processed separately. All except the last segment are handled by matching the +/// wildcard segment against all subdirectories of matching directories, and recursively calling +/// wildcard_expand for matches. On the last segment, matching is made to any file, and all matches +/// are inserted to the list. +/// +/// If wildcard_expand encounters any errors (such as insufficient privileges) during matching, no +/// error messages will be printed and wildcard_expand will continue the matching process. +/// +/// \param wc The wildcard string +/// \param working_directory The working directory +/// \param flags flags for the search. Can be any combination of for_completions and +/// executables_only +/// \param output The list in which to put the output +/// +enum WildcardResult { + /// The wildcard did not match. + NoMatch, + /// The wildcard did match. + Match, + /// Expansion was cancelled (e.g. control-C). + Cancel, + /// Expansion produced too many results. + Overflow, +} + +// pub fn wildcard_expand_string(wc: &wstr, working_directory: &wstr, flags: ExpandFlags, cancel_checker: impl CancelChecker, output: *mut completion_receiver_t) -> WildcardResult { +// todo!() +// } + +/// Test whether the given wildcard matches the string. Does not perform any I/O. +/// +/// \param str The string to test +/// \param wc The wildcard to test against +/// \param leading_dots_fail_to_match if set, strings with leading dots are assumed to be hidden +/// files and are not matched (default was false) +/// +/// \return true if the wildcard matched +#[must_use] +pub fn wildcard_match( + name: impl AsRef, + pattern: impl AsRef, + leading_dots_fail_to_match: bool, +) -> bool { + let name = name.as_ref(); + let pattern = pattern.as_ref(); + // Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow + // literal matches. + if leading_dots_fail_to_match && (name == L!(".") || name == L!("..")) { + // The string is '.' or '..' so the only possible match is an exact match. + return name == pattern; + } + + // Near Linear implementation as proposed here https://research.swtch.com/glob. + let mut px = 0; + let mut nx = 0; + let mut next_px = 0; + let mut next_nx = 0; + + while px < pattern.len() || nx < name.len() { + if px < pattern.len() { + match pattern.char_at(px) { + ANY_STRING | ANY_STRING_RECURSIVE => { + // Ignore hidden file + if leading_dots_fail_to_match && nx == 0 && name.char_at(0) == '.' { + return false; + } + + // Common case of * at the end. In that case we can early out since we know it will + // match. + if px == pattern.len() - 1 { + return true; + } + + // Try to match at nx. + // If that doesn't work out, restart at nx+1 next. + next_px = px; + next_nx = nx + 1; + px += 1; + continue; + } + ANY_CHAR => { + if nx < name.len() { + if nx == 0 && name.char_at(nx) == '.' { + return false; + } + + px += 1; + nx += 1; + continue; + } + } + c => { + // ordinary char + if nx < name.len() && name.char_at(nx) == c { + px += 1; + nx += 1; + continue; + } + } + } + } + + // Mismatch. Maybe restart. + if 0 < next_nx && next_nx <= name.len() { + px = next_px; + nx = next_nx; + continue; + } + return false; + } + // Matched all of pattern to all of name. Success. + true +} + +// Check if the string has any unescaped wildcards (e.g. ANY_STRING). +#[inline] +#[must_use] +fn wildcard_has_internal(s: impl AsRef) -> bool { + s.as_ref() + .chars() + .any(|c| matches!(c, ANY_STRING | ANY_STRING_RECURSIVE | ANY_CHAR)) +} + +/// Check if the specified string contains wildcards (e.g. *). +#[must_use] +fn wildcard_has(s: impl AsRef) -> bool { + let s = s.as_ref(); + let qmark_is_wild = !feature_test(FeatureFlag::qmark_noglob); + // Fast check for * or ?; if none there is no wildcard. + // Note some strings contain * but no wildcards, e.g. if they are quoted. + if !s.contains('*') && (!qmark_is_wild || !s.contains('?')) { + return false; + } + let unescaped = + unescape_string(s, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap_or_default(); + return wildcard_has_internal(unescaped); +} + +/// Test wildcard completion. +// pub fn wildcard_complete(str: &wstr, wc: &wstr, desc_func: impl Fn(&wstr) -> WString, out: *mut completion_receiver_t, expand_flags: ExpandFlags, flags: CompleteFlags) -> bool { +// todo!() +// } + +#[cfg(test)] +mod tests { + use super::*; + use crate::future_feature_flags::scoped_test; + + #[test] + fn test_wildcards() { + assert!(!wildcard_has(L!(""))); + assert!(wildcard_has(L!("*"))); + assert!(!wildcard_has(L!("\\*"))); + + let wc = L!("foo*bar"); + assert!(wildcard_has(wc) && !wildcard_has_internal(wc)); + let wc = unescape_string(wc, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap(); + assert!(!wildcard_has(&wc) && wildcard_has_internal(&wc)); + + scoped_test(FeatureFlag::qmark_noglob, false, || { + assert!(wildcard_has(L!("?"))); + assert!(!wildcard_has(L!("\\?"))); + }); + + scoped_test(FeatureFlag::qmark_noglob, true, || { + assert!(!wildcard_has(L!("?"))); + assert!(!wildcard_has(L!("\\?"))); + }); + } +} + +#[cxx::bridge] +mod ffi { + extern "C++" { + include!("wutil.h"); + } + extern "Rust" { + #[cxx_name = "wildcard_match_ffi"] + fn wildcard_match_ffi( + str: &CxxWString, + wc: &CxxWString, + leading_dots_fail_to_match: bool, + ) -> bool; + + #[cxx_name = "wildcard_has"] + fn wildcard_has_ffi(s: &CxxWString) -> bool; + + #[cxx_name = "wildcard_has_internal"] + fn wildcard_has_internal_ffi(s: &CxxWString) -> bool; + } +} + +fn wildcard_match_ffi(str: &CxxWString, wc: &CxxWString, leading_dots_fail_to_match: bool) -> bool { + wildcard_match(str.from_ffi(), wc.from_ffi(), leading_dots_fail_to_match) +} + +fn wildcard_has_ffi(s: &CxxWString) -> bool { + wildcard_has(s.from_ffi()) +} + +fn wildcard_has_internal_ffi(s: &CxxWString) -> bool { + wildcard_has_internal(s.from_ffi()) +} diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 7c4d86c7b..722aeb8af 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -2444,28 +2444,6 @@ static void test_autoload() { autoload_tester_t::run_test(); } -static void test_wildcards() { - say(L"Testing wildcards"); - do_test(!wildcard_has(L"")); - do_test(wildcard_has(L"*")); - do_test(!wildcard_has(L"\\*")); - do_test(!wildcard_has(L"\"*\"")); - - wcstring wc = L"foo*bar"; - do_test(wildcard_has(wc) && !wildcard_has_internal(wc)); - unescape_string_in_place(&wc, UNESCAPE_SPECIAL); - do_test(!wildcard_has(wc) && wildcard_has_internal(wc)); - - auto saved = feature_test(feature_flag_t::qmark_noglob); - feature_set(feature_flag_t::qmark_noglob, false); - do_test(wildcard_has(L"?")); - do_test(!wildcard_has(L"\\?")); - feature_set(feature_flag_t::qmark_noglob, true); - do_test(!wildcard_has(L"?")); - do_test(!wildcard_has(L"\\?")); - feature_set(feature_flag_t::qmark_noglob, saved); -} - static void test_complete() { say(L"Testing complete"); @@ -5595,7 +5573,6 @@ static const test_t s_tests[]{ {TEST_GROUP("word_motion"), test_word_motion}, {TEST_GROUP("is_potential_path"), test_is_potential_path}, {TEST_GROUP("colors"), test_colors}, - {TEST_GROUP("wildcard"), test_wildcards}, {TEST_GROUP("complete"), test_complete}, {TEST_GROUP("autoload"), test_autoload}, {TEST_GROUP("input"), test_input}, diff --git a/src/wildcard.cpp b/src/wildcard.cpp index e88cdb220..95fb2d82b 100644 --- a/src/wildcard.cpp +++ b/src/wildcard.cpp @@ -39,101 +39,6 @@ static size_t wildcard_find(const wchar_t *wc) { return wcstring::npos; } -bool wildcard_has_internal(const wchar_t *s, size_t len) { - for (size_t i = 0; i < len; i++) { - wchar_t c = s[i]; - if (c == ANY_CHAR || c == ANY_STRING || c == ANY_STRING_RECURSIVE) { - return true; - } - } - return false; -} - -// Note we want to handle embedded nulls (issue #1631). -bool wildcard_has(const wchar_t *str, size_t len) { - assert(str != nullptr); - const wchar_t *end = str + len; - bool qmark_is_wild = !feature_test(feature_flag_t::qmark_noglob); - // Fast check for * or ?; if none there is no wildcard. - // Note some strings contain * but no wildcards, e.g. if they are quoted. - if (std::find(str, end, L'*') == end && (!qmark_is_wild || std::find(str, end, L'?') == end)) { - return false; - } - wcstring unescaped; - if (auto tmp = unescape_string(wcstring{str, len}, UNESCAPE_SPECIAL)) { - unescaped = *tmp; - } - return wildcard_has_internal(unescaped); -} - -/// Check whether the string str matches the wildcard string wc. -/// -/// \param str String to be matched. -/// \param wc The wildcard. -/// \param leading_dots_fail_to_match Whether files beginning with dots should not be matched -/// against wildcards. -bool wildcard_match(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match) { - // Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow - // literal matches. - if (leading_dots_fail_to_match && (str == L"." || str == L"..")) { - // The string is '.' or '..' so the only possible match is an exact match. - return str == wc; - } - - // Near Linear implementation as proposed here https://research.swtch.com/glob. - const wchar_t *const str_start = str.c_str(); - const wchar_t *wc_x = wc.c_str(); - const wchar_t *str_x = str_start; - const wchar_t *restart_wc_x = wc.c_str(); - const wchar_t *restart_str_x = str_start; - - bool restart_is_out_of_str = false; - for (; *wc_x != 0 || *str_x != 0;) { - bool is_first = (str_x == str_start); - if (*wc_x != 0) { - if (*wc_x == ANY_STRING || *wc_x == ANY_STRING_RECURSIVE) { - // Ignore hidden file - if (leading_dots_fail_to_match && is_first && str[0] == L'.') { - return false; - } - - // Common case of * at the end. In that case we can early out since we know it will - // match. - if (wc_x[1] == L'\0') { - return true; - } - // Try to match at str_x. - // If that doesn't work out, restart at str_x+1 next. - restart_wc_x = wc_x; - restart_str_x = str_x + 1; - restart_is_out_of_str = (*str_x == 0); - wc_x++; - continue; - } else if (*wc_x == ANY_CHAR && *str_x != 0) { - if (is_first && *str_x == L'.') { - return false; - } - wc_x++; - str_x++; - continue; - } else if (*str_x != 0 && *str_x == *wc_x) { // ordinary character - wc_x++; - str_x++; - continue; - } - } - // Mismatch. Maybe restart. - if (restart_str_x != str.c_str() && !restart_is_out_of_str) { - wc_x = restart_wc_x; - str_x = restart_str_x; - continue; - } - return false; - } - // Matched all of pattern to all of name. Success. - return true; -} - // This does something horrible refactored from an even more horrible function. static wcstring resolve_description(const wcstring &full_completion, wcstring *completion, expand_flags_t expand_flags, diff --git a/src/wildcard.h b/src/wildcard.h index 55a76fb96..d60c37fb6 100644 --- a/src/wildcard.h +++ b/src/wildcard.h @@ -75,6 +75,11 @@ wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &wor const cancel_checker_t &cancel_checker, completion_receiver_t *output); +#if INCLUDE_RUST_HEADERS + +#include "wildcard.rs.h" + +#else /// Test whether the given wildcard matches the string. Does not perform any I/O. /// /// \param str The string to test @@ -83,18 +88,24 @@ wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &wor /// files and are not matched /// /// \return true if the wildcard matched -bool wildcard_match(const wcstring &str, const wcstring &wc, - bool leading_dots_fail_to_match = false); +bool wildcard_match_ffi(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match); // Check if the string has any unescaped wildcards (e.g. ANY_STRING). -bool wildcard_has_internal(const wchar_t *s, size_t len); -inline bool wildcard_has_internal(const wcstring &s) { - return wildcard_has_internal(s.c_str(), s.size()); -} +bool wildcard_has_internal(const wcstring &s); /// Check if the specified string contains wildcards (e.g. *). -bool wildcard_has(const wchar_t *s, size_t len); -inline bool wildcard_has(const wcstring &s) { return wildcard_has(s.c_str(), s.size()); } +bool wildcard_has(const wcstring &s); + +#endif + +inline bool wildcard_match(const wcstring &str, const wcstring &wc, + bool leading_dots_fail_to_match = false) { + return wildcard_match_ffi(str, wc, leading_dots_fail_to_match); + } + +inline bool wildcard_has(const wchar_t *s, size_t len) { + return wildcard_has(wcstring(s, len)); +}; /// Test wildcard completion. wildcard_result_t wildcard_complete(const wcstring &str, const wchar_t *wc,