embed-data: naturalize generated_completions-hack to prevent repeated autoloading

As reported in https://github.com/fish-shell/fish-shell/issues/11535#issuecomment-2915440295,
a command like "complete -C'git '" gets progressively slower every time.

A diff of "fish_trace=1" output shows that each completion invocation added
more stuff to the global "__fish_git_aliases", resulting in output like:

	--> for s db
	...
	--> for s db s db
	...
	--> for s db s db s db

Reproducer:

	$ touch ~/.local/share/fish/generated_completions/foo.fish
	$ cargo install --path . --debug
	$ ~/.cargo/bin/fish -d autoload -c 'function foo; end; for i in 1 2; complete -C"foo "; end'

We redundantly autoload the embedded file, which, by definition doesn't change.
This happens when
1. the "embed-data" feature is enabled (default for "cargo install")
2. there is a completion file in generated_completions

which triggers a hack to give precedence to "embedded:completions/git.fish"
over "generated_completions/git.fish".

Since we always load all file-based files first, we clobber the autoload cache
("self.autoloaded_files") with the mtime of the generated completion file, even
if we're never gonna return it.  This makes the embed-data logic think that
the asset has changed (which is impossible! But of course it is possible that
"fish_complete_path" changes and causes a need to load "embedded:git.fish").

Fix that by treating embedded files more like normal files.  This is closer
to historical behavior where $__fish_data_dir/{functions,completions}
are normal directories. Seems like this should fix a false negative in
"has_attempted_autoload" which feels useful.

Add a dead test, I guess. It's not run with feature="embed-data" yet. In
future we should test this in CI.
This commit is contained in:
Johannes Altmanninger
2025-05-28 13:31:24 +02:00
parent f88f7e8dd6
commit 6737872fb7
2 changed files with 118 additions and 61 deletions

View File

@@ -9,7 +9,9 @@
#[cfg(test)]
use crate::tests::prelude::*;
use crate::wchar::{wstr, WString, L};
use crate::wchar_ext::WExt;
use crate::wutil::{file_id_for_path, FileId, INVALID_FILE_ID};
use crate::FLOGF;
use lru::LruCache;
#[cfg(feature = "embed-data")]
use rust_embed::RustEmbed;
@@ -56,6 +58,12 @@ pub fn has_asset(_cmd: &str) -> bool {
false
}
#[derive(Clone, Copy, Eq, PartialEq)]
enum AssetDir {
Functions,
Completions,
}
pub enum AutoloadPath {
#[cfg(feature = "embed-data")]
Embedded(String),
@@ -63,7 +71,7 @@ pub enum AutoloadPath {
}
enum AutoloadResult {
Path(WString),
Path(AutoloadPath),
Loaded,
Pending,
None,
@@ -97,9 +105,6 @@ pub fn new(env_var_name: &'static wstr) -> Self {
/// mark_autoload_finished() with the same command. Note this does not actually execute any
/// code; it is the caller's responsibility to load the file.
pub fn resolve_command(&mut self, cmd: &wstr, env: &dyn Environment) -> Option<AutoloadPath> {
use crate::wchar_ext::WExt;
let mut possible_path = None;
match self.resolve_command_impl(
cmd,
env.get(self.env_var_name)
@@ -108,54 +113,19 @@ pub fn resolve_command(&mut self, cmd: &wstr, env: &dyn Environment) -> Option<A
.unwrap_or_default(),
) {
AutoloadResult::Path(path) => {
crate::FLOGF!(autoload, "Loading from path with var: %ls", path);
// HACK: Ignore generated_completions until we tried the embedded assets
if path
.find("/generated_completions/".chars().collect::<Vec<_>>())
.is_some()
{
possible_path = Some(path);
} else {
return Some(AutoloadPath::Path(path));
}
}
AutoloadResult::Loaded => return None,
AutoloadResult::Pending => return None,
AutoloadResult::None => (),
};
// HACK: In cargo tests, this used to never load functions
// It will hang for reasons unrelated to this.
if cfg!(test) {
return None;
}
#[cfg(feature = "embed-data")]
{
let narrow = wcs2string(cmd);
let cmdstr = std::str::from_utf8(&narrow).ok()?;
let p = if self.env_var_name == "fish_function_path" {
"functions/".to_owned() + cmdstr + ".fish"
} else if self.env_var_name == "fish_complete_path" {
"completions/".to_owned() + cmdstr + ".fish"
} else {
return None;
};
if has_asset(&p) {
if let Some(loaded_file) = self.autoloaded_files.get(cmd) {
if *loaded_file == INVALID_FILE_ID {
// The file has been autoloaded and is unchanged.
return None;
match &path {
#[cfg(feature = "embed-data")]
AutoloadPath::Embedded(_) => {
FLOGF!(autoload, "Embedded: %ls", cmd);
}
AutoloadPath::Path(path) => {
FLOGF!(autoload, "Loading from path with var: %ls", path)
}
}
self.current_autoloading.insert(cmd.to_owned());
self.autoloaded_files
.insert(cmd.to_owned(), INVALID_FILE_ID);
crate::FLOGF!(autoload, "Embedded: %ls", cmd);
return Some(AutoloadPath::Embedded(p));
Some(path)
}
AutoloadResult::Loaded | AutoloadResult::Pending | AutoloadResult::None => None,
}
possible_path.map(AutoloadPath::Path)
}
/// Helper to actually perform an autoload.
@@ -176,7 +146,7 @@ pub fn perform_autoload(path: &AutoloadPath, parser: &Parser) {
AutoloadPath::Embedded(name) => {
use crate::common::str2wcstring;
use std::sync::Arc;
crate::FLOGF!(autoload, "Loading embedded: %ls", name);
FLOGF!(autoload, "Loading embedded: %ls", name);
let emfile = Asset::get(name).expect("Embedded file not found");
let src = str2wcstring(&emfile.data);
let mut widename = L!("embedded:").to_owned();
@@ -203,7 +173,9 @@ pub fn autoload_in_progress(&self, cmd: &wstr) -> bool {
/// Return whether a command could potentially be autoloaded.
/// This does not actually mark the command as being autoloaded.
pub fn can_autoload(&mut self, cmd: &wstr) -> bool {
self.cache.check(cmd, true /* allow stale */).is_some()
self.cache
.check(self.env_var_name, cmd, true /* allow stale */)
.is_some()
}
/// Return whether autoloading has been attempted for a command.
@@ -253,13 +225,19 @@ fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> AutoloadRes
}
// Do we have an entry to load?
let Some(file) = self.cache.check(cmd, false) else {
let Some(file) = self.cache.check(self.env_var_name, cmd, false) else {
return AutoloadResult::None;
};
let file_id = match &file {
AutoloadableFileInfo::FileInfo(file) => &file.file_id,
#[cfg(feature = "embed-data")]
AutoloadableFileInfo::EmbeddedPath(_) => &INVALID_FILE_ID,
};
// Is this file the same as what we previously autoloaded?
if let Some(loaded_file) = self.autoloaded_files.get(cmd) {
if *loaded_file == file.file_id {
if *loaded_file == *file_id {
// The file has been autoloaded and is unchanged.
return AutoloadResult::Loaded;
}
@@ -267,8 +245,13 @@ fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> AutoloadRes
// We're going to (tell our caller to) autoload this command.
self.current_autoloading.insert(cmd.to_owned());
self.autoloaded_files.insert(cmd.to_owned(), file.file_id);
AutoloadResult::Path(file.path)
self.autoloaded_files
.insert(cmd.to_owned(), file_id.clone());
AutoloadResult::Path(match file {
AutoloadableFileInfo::FileInfo(path) => AutoloadPath::Path(path.path),
#[cfg(feature = "embed-data")]
AutoloadableFileInfo::EmbeddedPath(path) => AutoloadPath::Embedded(path),
})
}
}
@@ -277,19 +260,28 @@ fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> AutoloadRes
/// Represents a file that we might want to autoload.
#[derive(Clone)]
struct AutoloadableFile {
struct FileInfo {
/// The path to the file.
path: WString,
/// The metadata for the file.
file_id: FileId,
}
#[derive(Clone)]
enum AutoloadableFileInfo {
/// An on-disk file.
FileInfo(FileInfo),
/// An embedded file.
#[cfg(feature = "embed-data")]
EmbeddedPath(String),
}
// A timestamp is a monotonic point in time.
type Timestamp = time::Instant;
type MissesLruCache = LruCache<WString, Timestamp>;
struct KnownFile {
file: AutoloadableFile,
file: AutoloadableFileInfo,
last_checked: Timestamp,
}
@@ -337,10 +329,32 @@ fn dirs(&self) -> &[WString] {
/// Check if a command `cmd` can be loaded.
/// If `allow_stale` is true, allow stale entries; otherwise discard them.
/// This returns an autoloadable file, or none() if there is no such file.
fn check(&mut self, cmd: &wstr, allow_stale: bool) -> Option<AutoloadableFile> {
fn check(
&mut self,
env_var_name: &wstr,
cmd: &wstr,
allow_stale: bool,
) -> Option<AutoloadableFileInfo> {
let asset_dir = cfg!(feature = "embed-data").then_some(()).and_then(|()| {
if env_var_name == "fish_function_path" {
Some(AssetDir::Functions)
} else if cfg!(feature = "embed-data") && env_var_name == "fish_complete_path" {
Some(AssetDir::Completions)
} else {
None
}
});
// Check hits.
if let Some(value) = self.known_files.get(cmd) {
if allow_stale || Self::is_fresh(value.last_checked, Self::current_timestamp()) {
#[cfg(feature = "embed-data")]
let embedded = matches!(value.file, AutoloadableFileInfo::EmbeddedPath(_));
#[cfg(not(feature = "embed-data"))]
let embedded = false;
if allow_stale
|| embedded
|| Self::is_fresh(value.last_checked, Self::current_timestamp())
{
// Re-use this cached hit.
return Some(value.file.clone());
}
@@ -359,7 +373,10 @@ fn check(&mut self, cmd: &wstr, allow_stale: bool) -> Option<AutoloadableFile> {
}
// We couldn't satisfy this request from the cache. Hit the disk.
let file = self.locate_file(cmd);
let file = self
.locate_file(cmd, asset_dir, false)
.or_else(|| self.locate_asset(cmd, asset_dir?))
.or_else(|| self.locate_file(cmd, asset_dir, true));
if let Some(file) = file.as_ref() {
let old_value = self.known_files.insert(
cmd.to_owned(),
@@ -402,7 +419,12 @@ fn is_fresh(then: Timestamp, now: Timestamp) -> bool {
/// Attempt to find an autoloadable file by searching our path list for a given command.
/// Return the file, or none() if none.
fn locate_file(&self, cmd: &wstr) -> Option<AutoloadableFile> {
fn locate_file(
&self,
cmd: &wstr,
asset_dir: Option<AssetDir>,
want_generated_completions: bool,
) -> Option<AutoloadableFileInfo> {
// If the command is empty or starts with NULL (i.e. is empty as a path)
// we'd try to source the *directory*, which exists.
// So instead ignore these here.
@@ -415,6 +437,12 @@ fn locate_file(&self, cmd: &wstr) -> Option<AutoloadableFile> {
// Re-use the storage for path.
let mut path;
for dir in self.dirs() {
if asset_dir == Some(AssetDir::Completions) {
// HACK: Ignore generated_completions until we tried the embedded assets
if dir.ends_with(L!("/generated_completions")) != want_generated_completions {
continue;
}
}
// Construct the path as dir/cmd.fish
path = dir.to_owned();
path.push('/');
@@ -424,11 +452,31 @@ fn locate_file(&self, cmd: &wstr) -> Option<AutoloadableFile> {
let file_id = file_id_for_path(&path);
if file_id != INVALID_FILE_ID {
// Found it.
return Some(AutoloadableFile { path, file_id });
return Some(AutoloadableFileInfo::FileInfo(FileInfo { path, file_id }));
}
}
None
}
#[cfg(not(feature = "embed-data"))]
fn locate_asset(&self, _cmd: &wstr, _asset_dir: AssetDir) -> Option<AutoloadableFileInfo> {
None
}
#[cfg(feature = "embed-data")]
fn locate_asset(&self, cmd: &wstr, asset_dir: AssetDir) -> Option<AutoloadableFileInfo> {
// HACK: In cargo tests, this used to never load functions
// It will hang for reasons unrelated to this.
if cfg!(test) {
return None;
}
let narrow = wcs2string(cmd);
let cmdstr = std::str::from_utf8(&narrow).ok()?;
let p = match asset_dir {
AssetDir::Functions => "functions/".to_owned() + cmdstr + ".fish",
AssetDir::Completions => "completions/".to_owned() + cmdstr + ".fish",
};
has_asset(&p).then_some(AutoloadableFileInfo::EmbeddedPath(p))
}
}
#[test]

View File

@@ -0,0 +1,9 @@
#RUN: %fish %s
function foo; end
mkdir $__fish_config_dir/completions
echo >$__fish_config_dir/completions/foo.fish "echo auto-loading foo.fish"
complete -C "foo " >/dev/null
# CHECK: auto-loading foo.fish
complete -C "foo " >/dev/null
# already loaded