Teach autoloader to read embedded files

This will load the functions and completions from inside of the fish
binary.

That means its no longer necessary to *install* a self-installable
build for basic functionality.

The functions/completions will be loaded *last*, so they are still
overridable via a file on disk (with the exception of
generated_completions, which we only use as a fallback if no
completions exist).

It still needs to extract files that are to be used by other tools,
including the man pages, fish_config.py, the man page completion
generator.

The remaining issues:

- It no longer prompts to install, but if you tried `fish_config`
  after this it would fail to open the tool,
  and it would be easy to forget to update those files
  So: When and how should this remind you that these files need to be extracted?
  Do we want e.g. a builtin that checks the version file (`status installed`)?
  This could then be run by `fish_config` and `help` and tell you to run `fish --install`.
- `builtin --help` will fail - we could read these things from inside,
  but we'd need groff for that.
  Do we want to pre-process these and put them in the builtins themselves?
  Do we want to print these on demand in `__fish_print_help` to groff?
- What directories need to still be defined? Does $__fish_data_dir need to keep existing?

Technically this *could* be the main distribution method. Maybe we
could let distro packages skip the embedded documentation and external
files,
but keep the functions/completions in the binary.
This commit is contained in:
Fabian Boehm
2025-01-31 20:04:47 +01:00
parent 66d7c00ba3
commit 2a3a23f53d
5 changed files with 235 additions and 93 deletions

View File

@@ -1,5 +1,7 @@
//! The classes responsible for autoloading functions and completions.
#[cfg(feature = "installable")]
use crate::common::wcs2string;
use crate::common::{escape, ScopeGuard};
use crate::env::Environment;
use crate::io::IoChain;
@@ -9,6 +11,8 @@
use crate::wchar::{wstr, WString, L};
use crate::wutil::{file_id_for_path, FileId, INVALID_FILE_ID};
use lru::LruCache;
#[cfg(feature = "installable")]
use rust_embed::RustEmbed;
use std::collections::{HashMap, HashSet};
use std::num::NonZeroUsize;
use std::time;
@@ -37,6 +41,44 @@ pub struct Autoload {
cache: Box<AutoloadFileCache>,
}
#[cfg(feature = "installable")]
#[derive(RustEmbed)]
#[folder = "share/"]
pub struct Asset;
#[cfg(feature = "installable")]
pub fn has_asset(cmd: &str) -> bool {
Asset::get(cmd).is_some()
}
#[cfg(not(feature = "installable"))]
pub fn has_asset(_cmd: &str) -> bool {
false
}
pub enum AutoloadPath {
#[cfg(feature = "installable")]
Embedded(String),
Path(WString),
}
enum AutoloadResult {
Path(WString),
Loaded,
Pending,
None,
}
#[cfg(test)]
impl AutoloadResult {
fn is_none(&self) -> bool {
matches!(self, AutoloadResult::None)
}
fn is_some(&self) -> bool {
!self.is_none()
}
}
impl Autoload {
/// Construct an autoloader that loads from the paths given by `env_var_name`.
pub fn new(env_var_name: &'static wstr) -> Self {
@@ -54,26 +96,102 @@ pub fn new(env_var_name: &'static wstr) -> Self {
/// After returning a path, the command is marked in-progress until the caller calls
/// mark_autoload_finished() with the same command. Note this does not actually execute any
/// code; it is the caller's responsibility to load the file.
pub fn resolve_command(&mut self, cmd: &wstr, env: &dyn Environment) -> Option<WString> {
pub fn resolve_command(&mut self, cmd: &wstr, env: &dyn Environment) -> Option<AutoloadPath> {
use crate::wchar_ext::WExt;
let mut possible_path = None;
if let Some(var) = env.get(self.env_var_name) {
self.resolve_command_impl(cmd, var.as_list())
match self.resolve_command_impl(cmd, var.as_list()) {
AutoloadResult::Path(path) => {
crate::FLOGF!(autoload, "Loading from path with var: %ls", path);
// HACK: Ignore generated_completions until we tried the embedded assets
if path
.find("/generated_completions/".chars().collect::<Vec<_>>())
.is_some()
{
possible_path = Some(path);
} else {
return Some(AutoloadPath::Path(path));
}
}
AutoloadResult::Loaded => return None,
AutoloadResult::Pending => return None,
AutoloadResult::None => (),
};
} else {
self.resolve_command_impl(cmd, &[])
match self.resolve_command_impl(cmd, &[]) {
AutoloadResult::Path(path) => {
crate::FLOGF!(autoload, "Loading from path with var: %ls", path);
return Some(AutoloadPath::Path(path));
}
AutoloadResult::Loaded => return None,
AutoloadResult::Pending => return None,
AutoloadResult::None => (),
};
}
// HACK: In cargo tests, this used to never load functions
// It will hang for reasons unrelated to this.
#[cfg(test)]
return None;
#[cfg(feature = "installable")]
{
let narrow = wcs2string(cmd);
let cmdstr = std::str::from_utf8(&narrow).ok()?;
let p = if self.env_var_name == "fish_function_path" {
"functions/".to_owned() + cmdstr + ".fish"
} else if self.env_var_name == "fish_complete_path" {
"completions/".to_owned() + cmdstr + ".fish"
} else {
return None;
};
if has_asset(&p) {
if let Some(loaded_file) = self.autoloaded_files.get(cmd) {
if *loaded_file == INVALID_FILE_ID {
// The file has been autoloaded and is unchanged.
return None;
}
}
self.current_autoloading.insert(cmd.to_owned());
self.autoloaded_files
.insert(cmd.to_owned(), INVALID_FILE_ID);
crate::FLOGF!(autoload, "Embedded: %ls", cmd);
return Some(AutoloadPath::Embedded(p));
}
}
possible_path.map(AutoloadPath::Path)
}
/// Helper to actually perform an autoload.
/// This is a static function because it executes fish script, and so must be called without
/// holding any particular locks.
pub fn perform_autoload(path: &wstr, parser: &Parser) {
pub fn perform_autoload(path: &AutoloadPath, parser: &Parser) {
// We do the useful part of what exec_subshell does ourselves
// - we source the file.
// We don't create a buffer or check ifs or create a read_limit
let script_source = L!("source ").to_owned() + &escape(path)[..];
let prev_statuses = parser.get_last_statuses();
let _put_back = ScopeGuard::new((), |()| parser.set_last_statuses(prev_statuses));
parser.eval(&script_source, &IoChain::new());
match path {
AutoloadPath::Path(p) => {
let script_source = L!("source ").to_owned() + &escape(p)[..];
parser.eval(&script_source, &IoChain::new());
}
#[cfg(feature = "installable")]
AutoloadPath::Embedded(name) => {
use crate::common::str2wcstring;
use std::sync::Arc;
crate::FLOGF!(autoload, "Loading embedded: %ls", name);
let emfile = Asset::get(name).expect("Embedded file not found");
let src = str2wcstring(&emfile.data);
let mut widename = L!("embedded:").to_owned();
widename.push_str(name);
let ret = parser.eval_file_wstr(src, Arc::new(widename), &IoChain::new(), None);
if let Err(msg) = ret {
eprintf!("%ls", msg);
}
}
}
}
/// Mark that a command previously returned from path_to_autoload is finished autoloading.
@@ -126,10 +244,11 @@ fn invalidate_cache(&mut self) {
/// Like resolve_autoload(), but accepts the paths directly.
/// This is exposed for testing.
fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> Option<WString> {
fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> AutoloadResult {
use AutoloadResult;
// Are we currently in the process of autoloading this?
if self.current_autoloading.contains(cmd) {
return None;
return AutoloadResult::Pending;
}
// Check to see if our paths have changed. If so, replace our cache.
@@ -140,20 +259,22 @@ fn resolve_command_impl(&mut self, cmd: &wstr, paths: &[WString]) -> Option<WStr
}
// Do we have an entry to load?
let file = self.cache.check(cmd, false)?;
let Some(file) = self.cache.check(cmd, false) else {
return AutoloadResult::None;
};
// Is this file the same as what we previously autoloaded?
if let Some(loaded_file) = self.autoloaded_files.get(cmd) {
if *loaded_file == file.file_id {
// The file has been autoloaded and is unchanged.
return None;
return AutoloadResult::Loaded;
}
}
// We're going to (tell our caller to) autoload this command.
self.current_autoloading.insert(cmd.to_owned());
self.autoloaded_files.insert(cmd.to_owned(), file.file_id);
Some(file.path)
AutoloadResult::Path(file.path)
}
}
@@ -364,36 +485,60 @@ fn touch_file(path: &wstr) {
autoload.invalidate_cache();
assert!(!autoload.autoload_in_progress(L!("file1")));
assert!(autoload.resolve_command_impl(L!("file1"), paths).is_some());
assert!(autoload.resolve_command_impl(L!("file1"), paths).is_none());
assert!(matches!(
autoload.resolve_command_impl(L!("file1"), paths),
AutoloadResult::Path(_)
));
assert!(matches!(
autoload.resolve_command_impl(L!("file1"), paths),
AutoloadResult::Pending
));
assert!(autoload.autoload_in_progress(L!("file1")));
assert!(autoload.get_autoloaded_commands() == vec![L!("file1")]);
autoload.mark_autoload_finished(L!("file1"));
assert!(!autoload.autoload_in_progress(L!("file1")));
assert!(autoload.get_autoloaded_commands() == vec![L!("file1")]);
assert!(autoload.resolve_command_impl(L!("file1"), paths).is_none());
assert!(matches!(
autoload.resolve_command_impl(L!("file1"), paths),
AutoloadResult::Loaded
));
assert!(autoload
.resolve_command_impl(L!("nothing"), paths)
.is_none());
assert!(autoload.resolve_command_impl(L!("file2"), paths).is_some());
assert!(autoload.resolve_command_impl(L!("file2"), paths).is_none());
assert!(matches!(
autoload.resolve_command_impl(L!("file2"), paths),
AutoloadResult::Pending
));
autoload.mark_autoload_finished(L!("file2"));
assert!(autoload.resolve_command_impl(L!("file2"), paths).is_none());
assert!(matches!(
autoload.resolve_command_impl(L!("file2"), paths),
AutoloadResult::Loaded
));
assert!((autoload.get_autoloaded_commands() == vec![L!("file1"), L!("file2")]));
autoload.clear();
assert!(autoload.resolve_command_impl(L!("file1"), paths).is_some());
autoload.mark_autoload_finished(L!("file1"));
assert!(autoload.resolve_command_impl(L!("file1"), paths).is_none());
assert!(matches!(
autoload.resolve_command_impl(L!("file1"), paths),
AutoloadResult::Loaded
));
assert!(autoload
.resolve_command_impl(L!("nothing"), paths)
.is_none());
assert!(autoload.resolve_command_impl(L!("file2"), paths).is_some());
assert!(autoload.resolve_command_impl(L!("file2"), paths).is_none());
assert!(matches!(
autoload.resolve_command_impl(L!("file2"), paths),
AutoloadResult::Pending
));
autoload.mark_autoload_finished(L!("file2"));
assert!(autoload.resolve_command_impl(L!("file1"), paths).is_none());
assert!(matches!(
autoload.resolve_command_impl(L!("file1"), paths),
AutoloadResult::Loaded
));
touch_file(&sprintf!("%ls/file1.fish", p1));
autoload.invalidate_cache();
assert!(autoload.resolve_command_impl(L!("file1"), paths).is_some());

View File

@@ -67,6 +67,8 @@
wchar::prelude::*,
wutil::waccess,
};
#[cfg(feature = "installable")]
use rust_embed::RustEmbed;
use std::ffi::{CString, OsStr, OsString};
use std::fs::File;
use std::os::unix::prelude::*;
@@ -78,16 +80,17 @@
use std::sync::Arc;
use std::{env, ops::ControlFlow};
#[cfg(feature = "installable")]
#[derive(RustEmbed)]
#[folder = "share/"]
struct Asset;
#[cfg(feature = "installable")]
// Disable for clippy because otherwise it would require sphinx
#[cfg(not(clippy))]
fn install(confirm: bool, dir: &Path) -> bool {
use rust_embed::RustEmbed;
#[derive(RustEmbed)]
#[folder = "share/"]
struct Asset;
#[derive(RustEmbed)]
#[folder = "target/man/man1"]
#[prefix = "man/man1/"]
@@ -139,6 +142,14 @@ fn install(confirm: bool, dir: &Path) -> bool {
// be a part of the function signature.
fn extract_embed<T: rust_embed::Embed>(dir: &Path) -> bool {
for file in T::iter() {
// These are read as embedded on demand.
// (yes it's a hack the docs don't match this)
if file.starts_with("functions/")
|| file.starts_with("completions/")
|| file == "config.fish"
{
continue;
}
let path = dir.join(file.as_ref());
let Ok(_) = fs::create_dir_all(path.parent().unwrap()) else {
eprintln!(
@@ -298,56 +309,37 @@ fn check_version_file(paths: &ConfigPaths, datapath: &wstr) -> Option<bool> {
/// Parse init files. exec_path is the path of fish executable as determined by argv[0].
fn read_init(parser: &Parser, paths: &ConfigPaths) {
let datapath = str2wcstring(paths.data.as_os_str().as_bytes());
#[cfg(feature = "installable")]
{
// If the version file is non-existent or out of date,
// we try to install automatically, but only if we're interactive.
// If we're not interactive, we still print an error later on pointing to `--install` if they don't exist,
// but don't complain if they're merely out-of-date.
// We do specifically check for a tty because we want to read input to confirm.
let v = check_version_file(paths, &datapath);
#[allow(clippy::incompatible_msrv)]
if v.is_none_or(|x| !x) && is_interactive_session() && isatty(libc::STDIN_FILENO) {
if v.is_none() {
FLOG!(
warning,
"Fish's asset files are missing. Trying to install them."
);
} else {
FLOG!(
warning,
"Fish's asset files are out of date. Trying to install them."
);
}
install(true, &PathBuf::from(wcs2osstring(&datapath)));
// We try to go on if installation failed (or was rejected) here
// If the assets are missing, we will trigger a later error,
// if they are outdated, things will probably (tm) work somewhat.
let emfile = Asset::get("config.fish").expect("Embedded file not found");
let src = str2wcstring(&emfile.data);
parser.libdata_mut().within_fish_init = true;
let fname: Arc<WString> = Arc::new(L!("embedded:config.fish").into());
let ret = parser.eval_file_wstr(src, fname, &IoChain::new(), None);
parser.libdata_mut().within_fish_init = false;
if let Err(msg) = ret {
eprintf!("%ls", msg);
}
}
if !source_config_in_directory(parser, &datapath) {
// If we cannot read share/config.fish, our internal configuration,
// something is wrong.
// That also means that our functions won't be found,
// and so any config we get would almost certainly be broken.
let escaped_pathname = escape(&datapath);
FLOGF!(
error,
"Fish cannot find its asset files in '%ls'.\n\
Refusing to read configuration because of this.",
escaped_pathname,
);
#[cfg(feature = "installable")]
FLOG!(
error,
"If you installed via `cargo install`, please run `fish --install` and restart fish."
);
return;
#[cfg(not(feature = "installable"))]
{
let datapath = str2wcstring(paths.data.as_os_str().as_bytes());
if !source_config_in_directory(parser, &datapath) {
// If we cannot read share/config.fish, our internal configuration,
// something is wrong.
// That also means that our functions won't be found,
// and so any config we get would almost certainly be broken.
let escaped_pathname = escape(&datapath);
FLOGF!(
error,
"Fish cannot find its asset files in '%ls'.\n\
Refusing to read configuration because of this.",
escaped_pathname,
);
return;
}
}
source_config_in_directory(parser, &str2wcstring(paths.sysconf.as_os_str().as_bytes()));
// We need to get the configuration directory before we can source the user configuration file.

View File

@@ -140,6 +140,7 @@ pub fn all_categories() -> Vec<&'static category_t> {
(abbrs, "abbrs", "Abbreviation expansion");
(refcell, "refcell", "Refcell dynamic borrowing");
(autoload, "autoload", "autoloading");
);
}

View File

@@ -112,7 +112,7 @@ fn allow_autoload(&self, name: &wstr) -> bool {
/// loaded. Note this executes fish script code.
pub fn load(name: &wstr, parser: &Parser) -> bool {
parser.assert_can_execute();
let mut path_to_autoload: Option<WString> = None;
let mut path_to_autoload: Option<_> = None;
// Note we can't autoload while holding the funcset lock.
// Lock around a local region.
{
@@ -239,7 +239,17 @@ pub fn exists_no_autoload(cmd: &wstr) -> bool {
let mut funcset = FUNCTION_SET.lock().unwrap();
// Check if we either have the function, or it could be autoloaded.
let tombstoned = funcset.autoload_tombstones.contains(cmd);
funcset.funcs.contains_key(cmd) || (!tombstoned && funcset.autoloader.can_autoload(cmd))
if funcset.funcs.contains_key(cmd) || (!tombstoned && funcset.autoloader.can_autoload(cmd)) {
return true;
}
let narrow = crate::common::wcs2string(cmd);
if let Ok(cmdstr) = std::str::from_utf8(&narrow) {
let cmd = "functions/".to_owned() + cmdstr + ".fish";
crate::autoload::has_asset(&cmd)
} else {
false
}
}
/// Remove the function with the specified name.
@@ -327,6 +337,21 @@ pub fn get_names(get_hidden: bool, vars: &dyn Environment) -> Vec<WString> {
}
names.insert(name.clone());
}
#[cfg(feature = "installable")]
for name in crate::autoload::Asset::iter() {
let Some(bname) = name.strip_prefix("functions/") else {
continue;
};
if !get_hidden && (bname.is_empty() || bname.starts_with('_')) {
continue;
};
let Some(fname) = bname.strip_suffix(".fish") else {
continue;
};
names.insert(fname.into());
}
names.into_iter().collect()
}

View File

@@ -138,31 +138,10 @@ touch $tmpdir/actual_function.fish
begin
set -l fish_function_path $tmpdir
functions
functions | grep '^$\|directory\|^actual_function$'
end
# CHECK: actual_function
# these are functions defined either in this file,
# or eagerly in share/config.fish.
# I don't know of a way to ignore just them.
#
# CHECK: bg
# CHECK: disown
# CHECK: fg
# CHECK: fish_command_not_found
# CHECK: fish_prompt
# CHECK: fish_prompt_event
# CHECK: fish_sigtrap_handler
# CHECK: fish_title
# CHECK: frob
# CHECK: kill
# CHECK: name1
# CHECK: name1a
# CHECK: name3
# CHECK: name3a
# CHECK: t
# CHECK: wait
rm -r $tmpdir
functions -e foo