From 6737872fb7d4b9e7ad778e447a54fe85cf7e6e06 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Wed, 28 May 2025 13:31:24 +0200 Subject: [PATCH] embed-data: naturalize generated_completions-hack to prevent repeated autoloading As reported in https://github.com/fish-shell/fish-shell/issues/11535#issuecomment-2915440295, a command like "complete -C'git '" gets progressively slower every time. A diff of "fish_trace=1" output shows that each completion invocation added more stuff to the global "__fish_git_aliases", resulting in output like: --> for s db ... --> for s db s db ... --> for s db s db s db Reproducer: $ touch ~/.local/share/fish/generated_completions/foo.fish $ cargo install --path . --debug $ ~/.cargo/bin/fish -d autoload -c 'function foo; end; for i in 1 2; complete -C"foo "; end' We redundantly autoload the embedded file, which, by definition doesn't change. This happens when 1. the "embed-data" feature is enabled (default for "cargo install") 2. there is a completion file in generated_completions which triggers a hack to give precedence to "embedded:completions/git.fish" over "generated_completions/git.fish". Since we always load all file-based files first, we clobber the autoload cache ("self.autoloaded_files") with the mtime of the generated completion file, even if we're never gonna return it. This makes the embed-data logic think that the asset has changed (which is impossible! But of course it is possible that "fish_complete_path" changes and causes a need to load "embedded:git.fish"). Fix that by treating embedded files more like normal files. This is closer to historical behavior where $__fish_data_dir/{functions,completions} are normal directories. Seems like this should fix a false negative in "has_attempted_autoload" which feels useful. Add a dead test, I guess. It's not run with feature="embed-data" yet. In future we should test this in CI. --- src/autoload.rs | 170 ++++++++++++++++++++++++------------- tests/checks/autoload.fish | 9 ++ 2 files changed, 118 insertions(+), 61 deletions(-) create mode 100644 tests/checks/autoload.fish diff --git a/src/autoload.rs b/src/autoload.rs index d6a0c6308..82a488508 100644 --- a/src/autoload.rs +++ b/src/autoload.rs @@ -9,7 +9,9 @@ #[cfg(test)] use crate::tests::prelude::*; use crate::wchar::{wstr, WString, L}; +use crate::wchar_ext::WExt; use crate::wutil::{file_id_for_path, FileId, INVALID_FILE_ID}; +use crate::FLOGF; use lru::LruCache; #[cfg(feature = "embed-data")] use rust_embed::RustEmbed; @@ -56,6 +58,12 @@ pub fn has_asset(_cmd: &str) -> bool { false } +#[derive(Clone, Copy, Eq, PartialEq)] +enum AssetDir { + Functions, + Completions, +} + pub enum AutoloadPath { #[cfg(feature = "embed-data")] Embedded(String), @@ -63,7 +71,7 @@ pub enum AutoloadPath { } enum AutoloadResult { - Path(WString), + Path(AutoloadPath), Loaded, Pending, None, @@ -97,9 +105,6 @@ pub fn new(env_var_name: &'static wstr) -> Self { /// mark_autoload_finished() with the same command. Note this does not actually execute any /// code; it is the caller's responsibility to load the file. pub fn resolve_command(&mut self, cmd: &wstr, env: &dyn Environment) -> Option { - use crate::wchar_ext::WExt; - - let mut possible_path = None; match self.resolve_command_impl( cmd, env.get(self.env_var_name) @@ -108,54 +113,19 @@ pub fn resolve_command(&mut self, cmd: &wstr, env: &dyn Environment) -> Option { - crate::FLOGF!(autoload, "Loading from path with var: %ls", path); - // HACK: Ignore generated_completions until we tried the embedded assets - if path - .find("/generated_completions/".chars().collect::>()) - .is_some() - { - possible_path = Some(path); - } else { - return Some(AutoloadPath::Path(path)); - } - } - AutoloadResult::Loaded => return None, - AutoloadResult::Pending => return None, - AutoloadResult::None => (), - }; - - // HACK: In cargo tests, this used to never load functions - // It will hang for reasons unrelated to this. - if cfg!(test) { - return None; - } - - #[cfg(feature = "embed-data")] - { - let narrow = wcs2string(cmd); - let cmdstr = std::str::from_utf8(&narrow).ok()?; - let p = if self.env_var_name == "fish_function_path" { - "functions/".to_owned() + cmdstr + ".fish" - } else if self.env_var_name == "fish_complete_path" { - "completions/".to_owned() + cmdstr + ".fish" - } else { - return None; - }; - if has_asset(&p) { - if let Some(loaded_file) = self.autoloaded_files.get(cmd) { - if *loaded_file == INVALID_FILE_ID { - // The file has been autoloaded and is unchanged. - return None; + match &path { + #[cfg(feature = "embed-data")] + AutoloadPath::Embedded(_) => { + FLOGF!(autoload, "Embedded: %ls", cmd); + } + AutoloadPath::Path(path) => { + FLOGF!(autoload, "Loading from path with var: %ls", path) } } - self.current_autoloading.insert(cmd.to_owned()); - self.autoloaded_files - .insert(cmd.to_owned(), INVALID_FILE_ID); - crate::FLOGF!(autoload, "Embedded: %ls", cmd); - return Some(AutoloadPath::Embedded(p)); + Some(path) } + AutoloadResult::Loaded | AutoloadResult::Pending | AutoloadResult::None => None, } - possible_path.map(AutoloadPath::Path) } /// Helper to actually perform an autoload. @@ -176,7 +146,7 @@ pub fn perform_autoload(path: &AutoloadPath, parser: &Parser) { AutoloadPath::Embedded(name) => { use crate::common::str2wcstring; use std::sync::Arc; - crate::FLOGF!(autoload, "Loading embedded: %ls", name); + FLOGF!(autoload, "Loading embedded: %ls", name); let emfile = Asset::get(name).expect("Embedded file not found"); let src = str2wcstring(&emfile.data); let mut widename = L!("embedded:").to_owned(); @@ -203,7 +173,9 @@ pub fn autoload_in_progress(&self, cmd: &wstr) -> bool { /// Return whether a command could potentially be autoloaded. /// This does not actually mark the command as being autoloaded. pub fn can_autoload(&mut self, cmd: &wstr) -> bool { - self.cache.check(cmd, true /* allow stale */).is_some() + self.cache + .check(self.env_var_name, cmd, true /* allow stale */) + .is_some() } /// Return whether autoloading has been attempted for a command. @@ -253,13 +225,19 @@ fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> AutoloadRes } // Do we have an entry to load? - let Some(file) = self.cache.check(cmd, false) else { + let Some(file) = self.cache.check(self.env_var_name, cmd, false) else { return AutoloadResult::None; }; + let file_id = match &file { + AutoloadableFileInfo::FileInfo(file) => &file.file_id, + #[cfg(feature = "embed-data")] + AutoloadableFileInfo::EmbeddedPath(_) => &INVALID_FILE_ID, + }; + // Is this file the same as what we previously autoloaded? if let Some(loaded_file) = self.autoloaded_files.get(cmd) { - if *loaded_file == file.file_id { + if *loaded_file == *file_id { // The file has been autoloaded and is unchanged. return AutoloadResult::Loaded; } @@ -267,8 +245,13 @@ fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> AutoloadRes // We're going to (tell our caller to) autoload this command. self.current_autoloading.insert(cmd.to_owned()); - self.autoloaded_files.insert(cmd.to_owned(), file.file_id); - AutoloadResult::Path(file.path) + self.autoloaded_files + .insert(cmd.to_owned(), file_id.clone()); + AutoloadResult::Path(match file { + AutoloadableFileInfo::FileInfo(path) => AutoloadPath::Path(path.path), + #[cfg(feature = "embed-data")] + AutoloadableFileInfo::EmbeddedPath(path) => AutoloadPath::Embedded(path), + }) } } @@ -277,19 +260,28 @@ fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> AutoloadRes /// Represents a file that we might want to autoload. #[derive(Clone)] -struct AutoloadableFile { +struct FileInfo { /// The path to the file. path: WString, /// The metadata for the file. file_id: FileId, } +#[derive(Clone)] +enum AutoloadableFileInfo { + /// An on-disk file. + FileInfo(FileInfo), + /// An embedded file. + #[cfg(feature = "embed-data")] + EmbeddedPath(String), +} + // A timestamp is a monotonic point in time. type Timestamp = time::Instant; type MissesLruCache = LruCache; struct KnownFile { - file: AutoloadableFile, + file: AutoloadableFileInfo, last_checked: Timestamp, } @@ -337,10 +329,32 @@ fn dirs(&self) -> &[WString] { /// Check if a command `cmd` can be loaded. /// If `allow_stale` is true, allow stale entries; otherwise discard them. /// This returns an autoloadable file, or none() if there is no such file. - fn check(&mut self, cmd: &wstr, allow_stale: bool) -> Option { + fn check( + &mut self, + env_var_name: &wstr, + cmd: &wstr, + allow_stale: bool, + ) -> Option { + let asset_dir = cfg!(feature = "embed-data").then_some(()).and_then(|()| { + if env_var_name == "fish_function_path" { + Some(AssetDir::Functions) + } else if cfg!(feature = "embed-data") && env_var_name == "fish_complete_path" { + Some(AssetDir::Completions) + } else { + None + } + }); + // Check hits. if let Some(value) = self.known_files.get(cmd) { - if allow_stale || Self::is_fresh(value.last_checked, Self::current_timestamp()) { + #[cfg(feature = "embed-data")] + let embedded = matches!(value.file, AutoloadableFileInfo::EmbeddedPath(_)); + #[cfg(not(feature = "embed-data"))] + let embedded = false; + if allow_stale + || embedded + || Self::is_fresh(value.last_checked, Self::current_timestamp()) + { // Re-use this cached hit. return Some(value.file.clone()); } @@ -359,7 +373,10 @@ fn check(&mut self, cmd: &wstr, allow_stale: bool) -> Option { } // We couldn't satisfy this request from the cache. Hit the disk. - let file = self.locate_file(cmd); + let file = self + .locate_file(cmd, asset_dir, false) + .or_else(|| self.locate_asset(cmd, asset_dir?)) + .or_else(|| self.locate_file(cmd, asset_dir, true)); if let Some(file) = file.as_ref() { let old_value = self.known_files.insert( cmd.to_owned(), @@ -402,7 +419,12 @@ fn is_fresh(then: Timestamp, now: Timestamp) -> bool { /// Attempt to find an autoloadable file by searching our path list for a given command. /// Return the file, or none() if none. - fn locate_file(&self, cmd: &wstr) -> Option { + fn locate_file( + &self, + cmd: &wstr, + asset_dir: Option, + want_generated_completions: bool, + ) -> Option { // If the command is empty or starts with NULL (i.e. is empty as a path) // we'd try to source the *directory*, which exists. // So instead ignore these here. @@ -415,6 +437,12 @@ fn locate_file(&self, cmd: &wstr) -> Option { // Re-use the storage for path. let mut path; for dir in self.dirs() { + if asset_dir == Some(AssetDir::Completions) { + // HACK: Ignore generated_completions until we tried the embedded assets + if dir.ends_with(L!("/generated_completions")) != want_generated_completions { + continue; + } + } // Construct the path as dir/cmd.fish path = dir.to_owned(); path.push('/'); @@ -424,11 +452,31 @@ fn locate_file(&self, cmd: &wstr) -> Option { let file_id = file_id_for_path(&path); if file_id != INVALID_FILE_ID { // Found it. - return Some(AutoloadableFile { path, file_id }); + return Some(AutoloadableFileInfo::FileInfo(FileInfo { path, file_id })); } } None } + + #[cfg(not(feature = "embed-data"))] + fn locate_asset(&self, _cmd: &wstr, _asset_dir: AssetDir) -> Option { + None + } + #[cfg(feature = "embed-data")] + fn locate_asset(&self, cmd: &wstr, asset_dir: AssetDir) -> Option { + // HACK: In cargo tests, this used to never load functions + // It will hang for reasons unrelated to this. + if cfg!(test) { + return None; + } + let narrow = wcs2string(cmd); + let cmdstr = std::str::from_utf8(&narrow).ok()?; + let p = match asset_dir { + AssetDir::Functions => "functions/".to_owned() + cmdstr + ".fish", + AssetDir::Completions => "completions/".to_owned() + cmdstr + ".fish", + }; + has_asset(&p).then_some(AutoloadableFileInfo::EmbeddedPath(p)) + } } #[test] diff --git a/tests/checks/autoload.fish b/tests/checks/autoload.fish new file mode 100644 index 000000000..cf98218e8 --- /dev/null +++ b/tests/checks/autoload.fish @@ -0,0 +1,9 @@ +#RUN: %fish %s + +function foo; end +mkdir $__fish_config_dir/completions +echo >$__fish_config_dir/completions/foo.fish "echo auto-loading foo.fish" +complete -C "foo " >/dev/null +# CHECK: auto-loading foo.fish +complete -C "foo " >/dev/null +# already loaded