From 1d159277c658ae17b0710257062888f48b87a4a0 Mon Sep 17 00:00:00 2001
From: Mahmoud Al-Qudsi <mqudsi@neosmart.net>
Date: Sat, 1 Jun 2024 11:15:19 -0500
Subject: [PATCH] Move Block fields specific to certain block types to separate
 enum

This has a few advantages,
* We now statically assert that all fields used by a particular block type are
  correctly initialized (i.e. you can't assign the function name but forget to
  assign its arguments),
* Conversely, we can match directly on `BlockData` and be guaranteed that the
  fields we want to access are initialized and present,
* We reduce the number of assertions, effectively "unwrapping" only once based
  off the block type instead of each time we try to access a conditional field,
* We reduce the size of the `Block` struct by coalescing fields that cannot
  co-exist, bringing it down from 104 bytes to 88 bytes.

It would be nice to make all of `Block` itself an enum, but it currently
requires `Copy` and we take advantage of that to copy it around everywhere.
Putting these fields directly in `Block` directly would mean a lot more memory
traffic just checking block types.
---
 src/parse_execution.rs |  17 +-
 src/parser.rs          |  89 ++++--
 src/wutil/mod.rs2      | 692 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 757 insertions(+), 41 deletions(-)
 create mode 100644 src/wutil/mod.rs2
diff --git a/src/parse_execution.rs b/src/parse_execution.rs
index 06da5979d..a56b84f98 100644
--- a/src/parse_execution.rs
+++ b/src/parse_execution.rs
@@ -33,7 +33,7 @@
 };
 use crate::parse_tree::{NodeRef, ParsedSourceRef};
 use crate::parse_util::parse_util_unescape_wildcards;
-use crate::parser::{Block, BlockId, BlockType, LoopStatus, Parser, ProfileItem};
+use crate::parser::{Block, BlockData, BlockId, BlockType, LoopStatus, Parser, ProfileItem};
 use crate::parser_keywords::parser_keywords_is_subcommand;
 use crate::path::{path_as_implicit_cd, path_try_get_path};
 use crate::pointer::ConstPointer;
@@ -441,19 +441,22 @@ fn infinite_recursive_statement_in_job_list<'b>(
         // not inside a block in that function call. If, in the future, the rules for what
         // block scopes are pushed on function invocation changes, then this check will break.
         let parser = ctx.parser();
-        let parent = {
+        let parent;
+        let parent_fn_name = {
             match (parser.block_at_index(0), parser.block_at_index(1)) {
-                (Some(current), Some(parent))
-                    if current.typ() == BlockType::top && parent.is_function_call() =>
-                {
-                    parent
+                (Some(current), Some(p)) if current.typ() == BlockType::top => {
+                    parent = p;
+                    match &parent.data {
+                        BlockData::Function { name, .. } => name,
+                        _ => return None,
+                    }
                 }
                 _ => return None, // Not within function call.
             }
         };
 
         // Get the function name of the immediate block.
-        let forbidden_function_name = &parent.function_name;
+        let forbidden_function_name = parent_fn_name;
 
         // Get the first job in the job list.
         let jc = &jobs.get(0)?;
diff --git a/src/parser.rs b/src/parser.rs
index af223599e..0453f87f6 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -43,27 +43,37 @@
     Arc,
 };
 
+#[derive(Default)]
+pub enum BlockData {
+    #[default]
+    None,
+    Function {
+        /// Name of the function
+        name: WString,
+        /// Arguments passed to the function
+        args: Vec<WString>,
+    },
+    Event(Rc<Event>),
+    Source {
+        /// The sourced file
+        file: Arc<WString>,
+    },
+}
+
 /// block_t represents a block of commands.
 #[derive(Default)]
 pub struct Block {
-    /// If this is a function block, the function name. Otherwise empty.
-    pub function_name: WString,
+    /// [`BlockType`]-specific data.
+    ///
+    /// Ideally this would be coalesced into `BlockType` but we currently require that to implement
+    /// `Copy`, so now we have to awkwardly deal with a discriminant stored separately.
+    pub data: BlockData,
 
     /// List of event blocks.
     pub event_blocks: u64,
 
-    /// If this is a function block, the function args. Otherwise empty.
-    pub function_args: Vec<WString>,
-
-    /// Name of file that created this block.
-    pub src_filename: Option<FilenameRef>,
-
-    // If this is an event block, the event. Otherwise ignored.
-    pub event: Option<Rc<Event>>,
-
-    // If this is a source block, the source'd file, interned.
-    // Otherwise nothing.
-    pub sourced_file: Option<FilenameRef>,
+    /// Name of the file that created this block
+    pub src_filename: Option<Arc<WString>>,
 
     /// Line number where this block was created.
     pub src_lineno: Option<usize>,
@@ -133,18 +143,17 @@ pub fn if_block() -> Block {
     }
     pub fn event_block(event: Event) -> Block {
         let mut b = Block::new(BlockType::event);
-        b.event = Some(Rc::new(event));
+        b.data = BlockData::Event(Rc::new(event));
         b
     }
     pub fn function_block(name: WString, args: Vec<WString>, shadows: bool) -> Block {
         let mut b = Block::new(BlockType::function_call { shadows });
-        b.function_name = name;
-        b.function_args = args;
+        b.data = BlockData::Function { name, args };
         b
     }
     pub fn source_block(src: FilenameRef) -> Block {
         let mut b = Block::new(BlockType::source);
-        b.sourced_file = Some(src);
+        b.data = BlockData::Source { file: src };
         b
     }
     pub fn for_block() -> Block {
@@ -843,8 +852,10 @@ pub fn get_function_name(&self, level: i32) -> Option<WString> {
                 .iter()
                 .rev()
                 .skip_while(|b| b.typ() != BlockType::breakpoint)
-                .find(|b| b.is_function_call())
-                .map(|b| b.function_name.clone());
+                .find_map(|b| match &b.data {
+                    BlockData::Function { name, .. } => Some(name.clone()),
+                    _ => None,
+                });
         }
 
         self.blocks()
@@ -863,7 +874,12 @@ pub fn get_function_name(&self, level: i32) -> Option<WString> {
             })
             .skip_while(|(_, l)| *l != level)
             .inspect(|(b, _)| debug_assert!(b.is_function_call()))
-            .map(|(b, _)| b.function_name.clone())
+            .map(|(b, _)| {
+                let BlockData::Function { name, .. } = &b.data else {
+                    unreachable!()
+                };
+                name.clone()
+            })
             .next()
     }
 
@@ -994,15 +1010,12 @@ pub fn current_filename(&self) -> Option<FilenameRef> {
         self.blocks()
             .iter()
             .rev()
-            .find_map(|b| {
-                if b.is_function_call() {
-                    function::get_props(&b.function_name)
-                        .and_then(|props| props.definition_file.clone())
-                } else if b.typ() == BlockType::source {
-                    b.sourced_file.clone()
-                } else {
-                    None
+            .find_map(|b| match &b.data {
+                BlockData::Function { name, .. } => {
+                    function::get_props(name).and_then(|props| props.definition_file.clone())
                 }
+                BlockData::Source { file } => Some(file.clone()),
+                _ => None,
             })
             .or_else(|| self.libdata().current_filename.clone())
     }
@@ -1120,10 +1133,13 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m
     let mut print_call_site = false;
     match b.typ() {
         BlockType::function_call { .. } => {
-            trace.push_utfstr(&wgettext_fmt!("in function '%ls'", &b.function_name));
+            let BlockData::Function { name, args, .. } = &b.data else {
+                unreachable!()
+            };
+            trace.push_utfstr(&wgettext_fmt!("in function '%ls'", name));
             // Print arguments on the same line.
             let mut args_str = WString::new();
-            for arg in &b.function_args {
+            for arg in args {
                 if !args_str.is_empty() {
                     args_str.push(' ');
                 }
@@ -1150,7 +1166,10 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m
             print_call_site = true;
         }
         BlockType::source => {
-            let source_dest = b.sourced_file.as_ref().unwrap();
+            let BlockData::Source { file, .. } = &b.data else {
+                unreachable!()
+            };
+            let source_dest = file;
             trace.push_utfstr(&wgettext_fmt!(
                 "from sourcing file %ls\n",
                 &user_presentable_path(source_dest, parser.vars())
@@ -1158,8 +1177,10 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m
             print_call_site = true;
         }
         BlockType::event => {
-            let description =
-                event::get_desc(parser, b.event.as_ref().expect("Should have an event"));
+            let BlockData::Event(event) = &b.data else {
+                unreachable!()
+            };
+            let description = event::get_desc(parser, event);
             trace.push_utfstr(&wgettext_fmt!("in event handler: %ls\n", &description));
             print_call_site = true;
         }
diff --git a/src/wutil/mod.rs2 b/src/wutil/mod.rs2
new file mode 100644
index 000000000..c35cf55c3
--- /dev/null
+++ b/src/wutil/mod.rs2
@@ -0,0 +1,692 @@
+pub mod dir_iter;
+pub mod encoding;
+pub mod errors;
+pub mod fileid;
+pub mod gettext;
+pub mod printf;
+#[cfg(test)]
+mod tests;
+pub mod wcstod;
+pub mod wcstoi;
+
+use crate::common::{
+    cstr2wcstring, fish_reserved_codepoint, str2wcstring, wcs2osstring, wcs2string, wcs2zstring,
+};
+use crate::fallback;
+use crate::fds::AutoCloseFd;
+use crate::flog::FLOGF;
+use crate::wchar::{wstr, WString, L};
+use crate::wchar_ext::WExt;
+use crate::wcstringutil::{join_strings, wcs2string_callback};
+use errno::errno;
+pub use gettext::{wgettext, wgettext_fmt, wgettext_maybe_fmt, wgettext_str};
+pub use printf::sprintf;
+use std::ffi::{CStr, OsStr};
+use std::fs::{self, canonicalize};
+use std::io::{self, Write};
+use std::os::unix::prelude::*;
+
+pub use wcstoi::*;
+
+/// Wide character version of opendir(). Note that opendir() is guaranteed to set close-on-exec by
+/// POSIX (hooray).
+pub fn wopendir(name: &wstr) -> *mut libc::DIR {
+    let tmp = wcs2zstring(name);
+    unsafe { libc::opendir(tmp.as_ptr()) }
+}
+
+/// Wide character version of stat().
+pub fn wstat(file_name: &wstr) -> io::Result<fs::Metadata> {
+    let tmp = wcs2osstring(file_name);
+    fs::metadata(tmp)
+}
+
+/// Wide character version of lstat().
+pub fn lwstat(file_name: &wstr) -> io::Result<fs::Metadata> {
+    let tmp = wcs2osstring(file_name);
+    fs::symlink_metadata(tmp)
+}
+
+/// Wide character version of access().
+pub fn waccess(file_name: &wstr, mode: libc::c_int) -> libc::c_int {
+    let tmp = wcs2zstring(file_name);
+    unsafe { libc::access(tmp.as_ptr(), mode) }
+}
+
+/// Wide character version of unlink().
+pub fn wunlink(file_name: &wstr) -> libc::c_int {
+    let tmp = wcs2zstring(file_name);
+    unsafe { libc::unlink(tmp.as_ptr()) }
+}
+
+pub fn wperror(s: &wstr) {
+    let bytes = wcs2string(s);
+    // We can't guarantee the string is 100% Unicode (why?), so we don't use std::str::from_utf8()
+    let s = OsStr::from_bytes(&bytes).to_string_lossy();
+    perror(&s)
+}
+
+/// Port of the wide-string wperror from `src/wutil.cpp` but for rust `&str`.
+pub fn perror(s: &str) {
+    let e = errno().0;
+    let mut stderr = std::io::stderr().lock();
+    if !s.is_empty() {
+        let _ = write!(stderr, "{s}: ");
+    }
+    let slice = unsafe {
+        let msg = libc::strerror(e) as *const u8;
+        let len = libc::strlen(msg as *const _);
+        std::slice::from_raw_parts(msg, len)
+    };
+    let _ = stderr.write_all(slice);
+    let _ = stderr.write_all(b"\n");
+}
+
+pub fn perror_io(s: &str, e: &io::Error) {
+    eprintln!("{}: {}", s, e);
+}
+
+/// Wide character version of getcwd().
+pub fn wgetcwd() -> WString {
+    let mut cwd = [b'\0'; libc::PATH_MAX as usize];
+    let res = unsafe {
+        libc::getcwd(
+            std::ptr::addr_of_mut!(cwd).cast(),
+            std::mem::size_of_val(&cwd),
+        )
+    };
+    if !res.is_null() {
+        return cstr2wcstring(&cwd);
+    }
+
+    FLOGF!(
+        error,
+        "getcwd() failed with errno %d/%s",
+        errno::errno().0,
+        errno::errno().to_string()
+    );
+    WString::new()
+}
+
+/// Wide character version of readlink().
+pub fn wreadlink(file_name: &wstr) -> Option<WString> {
+    let md = lwstat(file_name).ok()?;
+    let bufsize = usize::try_from(md.len()).unwrap() + 1;
+    let mut target_buf = vec![b'\0'; bufsize];
+    let tmp = wcs2zstring(file_name);
+    let nbytes = unsafe {
+        libc::readlink(
+            tmp.as_ptr(),
+            std::ptr::addr_of_mut!(target_buf[0]).cast(),
+            bufsize,
+        )
+    };
+    if nbytes == -1 {
+        perror("readlink");
+        return None;
+    }
+    // The link might have been modified after our call to lstat.  If the link now points to a path
+    // that's longer than the original one, we can't read everything in our buffer.  Simply give
+    // up. We don't need to report an error since our only caller will already fall back to ENOENT.
+    let nbytes = usize::try_from(nbytes).unwrap();
+    if nbytes == bufsize {
+        return None;
+    }
+    Some(str2wcstring(&target_buf[0..nbytes]))
+}
+
+/// Wide character realpath. The last path component does not need to be valid. If an error occurs,
+/// `wrealpath()` returns `None`
+pub fn wrealpath(pathname: &wstr) -> Option<WString> {
+    if pathname.is_empty() {
+        return None;
+    }
+
+    let mut narrow_path: Vec<u8> = wcs2zstring(pathname).into();
+
+    // Strip trailing slashes. This is treats "/a//" as equivalent to "/a" if /a is a non-directory.
+    while narrow_path.len() > 1 && narrow_path[narrow_path.len() - 1] == b'/' {
+        narrow_path.pop();
+    }
+
+    let narrow_res = canonicalize(OsStr::from_bytes(&narrow_path));
+
+    let real_path = if let Ok(result) = narrow_res {
+        result.into_os_string().into_vec()
+    } else {
+        // Check if everything up to the last path component is valid.
+        let pathsep_idx = narrow_path.iter().rposition(|&c| c == b'/');
+
+        if pathsep_idx == Some(0) {
+            // If the only pathsep is the first character then it's an absolute path with a
+            // single path component and thus doesn't need conversion.
+            narrow_path
+        } else {
+            // Only call realpath() on the portion up to the last component.
+            let narrow_res = if let Some(pathsep_idx) = pathsep_idx {
+                // Only call realpath() on the portion up to the last component.
+                canonicalize(OsStr::from_bytes(&narrow_path[0..pathsep_idx]))
+            } else {
+                // If there is no "/", this is a file in $PWD, so give the realpath to that.
+                canonicalize(".")
+            };
+
+            let Ok(narrow_result) = narrow_res else {
+                return None;
+            };
+
+            let pathsep_idx = pathsep_idx.map_or(0, |idx| idx + 1);
+
+            let mut real_path = narrow_result.into_os_string().into_vec();
+
+            // This test is to deal with cases such as /../../x => //x.
+            if real_path.len() > 1 {
+                real_path.push(b'/');
+            }
+
+            real_path.extend_from_slice(&narrow_path[pathsep_idx..]);
+
+            real_path
+        }
+    };
+
+    Some(str2wcstring(&real_path))
+}
+
+/// Given an input path, "normalize" it:
+/// 1. Collapse multiple /s into a single /, except maybe at the beginning.
+/// 2. .. goes up a level.
+/// 3. Remove /./ in the middle.
+pub fn normalize_path(path: &wstr, allow_leading_double_slashes: bool) -> WString {
+    // Count the leading slashes.
+    let sep = '/';
+    let mut leading_slashes: usize = 0;
+    for c in path.chars() {
+        if c != sep {
+            break;
+        }
+        leading_slashes += 1;
+    }
+
+    let comps: Vec<&wstr> = path.split(sep).collect();
+    let mut new_comps = Vec::new();
+    for comp in comps {
+        if comp.is_empty() || comp == "." {
+            continue;
+        } else if comp != ".." {
+            new_comps.push(comp);
+        } else if !new_comps.is_empty() && new_comps.last().unwrap() != ".." {
+            // '..' with a real path component, drop that path component.
+            new_comps.pop();
+        } else if leading_slashes == 0 {
+            // We underflowed the .. and are a relative (not absolute) path.
+            new_comps.push(L!(".."));
+        }
+    }
+    let mut result = join_strings(&new_comps, sep);
+    // If we don't allow leading double slashes, collapse them to 1 if there are any.
+    let mut numslashes = if leading_slashes > 0 { 1 } else { 0 };
+    // If we do, prepend one or two leading slashes.
+    // Yes, three+ slashes are collapsed to one. (!)
+    if allow_leading_double_slashes && leading_slashes == 2 {
+        numslashes = 2;
+    }
+    for _ in 0..numslashes {
+        result.insert(0, sep);
+    }
+    // Ensure ./ normalizes to . and not empty.
+    if result.is_empty() {
+        result.push('.');
+    }
+    result
+}
+
+#[test]
+fn test_normalize_path() {
+    fn norm_path(path: &wstr) -> WString {
+        normalize_path(path, true)
+    }
+    assert_eq!(norm_path(L!("")), ".");
+    assert_eq!(norm_path(L!("..")), "..");
+    assert_eq!(norm_path(L!("./")), ".");
+    assert_eq!(norm_path(L!("./.")), ".");
+    assert_eq!(norm_path(L!("/")), "/");
+    assert_eq!(norm_path(L!("//")), "//");
+    assert_eq!(norm_path(L!("///")), "/");
+    assert_eq!(norm_path(L!("////")), "/");
+    assert_eq!(norm_path(L!("/.///")), "/");
+    assert_eq!(norm_path(L!(".//")), ".");
+    assert_eq!(norm_path(L!("/.//../")), "/");
+    assert_eq!(norm_path(L!("////abc")), "/abc");
+    assert_eq!(norm_path(L!("/abc")), "/abc");
+    assert_eq!(norm_path(L!("/abc/")), "/abc");
+    assert_eq!(norm_path(L!("/abc/..def/")), "/abc/..def");
+    assert_eq!(norm_path(L!("//abc/../def/")), "//def");
+    assert_eq!(norm_path(L!("abc/../abc/../abc/../abc")), "abc");
+    assert_eq!(norm_path(L!("../../")), "../..");
+    assert_eq!(norm_path(L!("foo/./bar")), "foo/bar");
+    assert_eq!(norm_path(L!("foo/../")), ".");
+    assert_eq!(norm_path(L!("foo/../foo")), "foo");
+    assert_eq!(norm_path(L!("foo/../foo/")), "foo");
+    assert_eq!(norm_path(L!("foo/././bar/.././baz")), "foo/baz");
+}
+
+/// Given an input path `path` and a working directory `wd`, do a "normalizing join" in a way
+/// appropriate for cd. That is, return effectively wd + path while resolving leading ../s from
+/// path. The intent here is to allow 'cd' out of a directory which may no longer exist, without
+/// allowing 'cd' into a directory that may not exist; see #5341.
+pub fn path_normalize_for_cd(wd: &wstr, path: &wstr) -> WString {
+    use std::collections::VecDeque;
+
+    // Fast paths.
+    const SEP: char = '/';
+    assert!(
+        wd.as_char_slice().first() == Some(&'/') && wd.as_char_slice().last() == Some(&'/'),
+        "Invalid working directory, it must start and end with /"
+    );
+    if path.is_empty() {
+        return wd.to_owned();
+    } else if path.as_char_slice().first() == Some(&SEP) {
+        return path.to_owned();
+    } else if path.as_char_slice().first() != Some(&'.') {
+        return wd.to_owned() + path;
+    }
+
+    // Split our strings by the sep.
+    let wd_comps: VecDeque<_> = wd.split(SEP).collect();
+    let mut path_comps = path.split(SEP).peekable();
+
+    // Remove empty segments from wd_comps.
+    // In particular this removes the leading and trailing empties.
+    let mut wd_comps = wd_comps.into_iter().filter(|comp| !comp.is_empty());
+
+    // Erase leading . and .. components from path_comps, popping from wd_comps as we go.
+    while let Some(comp) = path_comps.peek() {
+        if comp.is_empty() || comp == "." {
+            path_comps.next();
+        } else if comp == ".." && wd_comps.next_back().is_some() {
+            path_comps.next();
+        } else {
+            break;
+        }
+    }
+
+    // Append un-erased elements to wd_comps and join them, then prepend the leading /.
+    let paths: Vec<_> = wd_comps.chain(path_comps).collect();
+    let mut result = join_strings(&paths, SEP);
+    result.insert(0, '/');
+    result
+}
+
+#[cfg(test)]
+mod path_tests {
+    use crate::wchar::L;
+    use super::path_normalize_for_cd;
+
+    #[test]
+    fn test_relative_path() {
+        let wd = L!("/home/user/");
+        let path = L!("projects");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/projects"));
+    }
+
+    #[test]
+    fn test_absolute_path() {
+        let wd = L!("/home/user/");
+        let path = L!("/etc");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/etc"));
+    }
+
+    #[test]
+    fn test_parent_directory() {
+        let wd = L!("/home/user/projects/");
+        let path = L!("../docs");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/docs"));
+    }
+
+    #[test]
+    fn test_current_directory() {
+        let wd = L!("/home/user/");
+        let path = L!("./");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user"));
+    }
+
+    #[test]
+    fn test_nested_parent_directory() {
+        let wd = L!("/home/user/projects/");
+        let path = L!("../../");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/home"));
+    }
+
+    #[test]
+    fn test_complex_path() {
+        let wd = L!("/home/user/projects/");
+        let path = L!("./../other/projects/./.././../docs");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/other/projects/./.././../docs"));
+    }
+
+    #[test]
+    fn test_root_directory() {
+        let wd = L!("/");
+        let path = L!("..");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/.."));
+    }
+
+    #[test]
+    fn test_empty_path() {
+        let wd = L!("/home/user/");
+        let path = L!("");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/"));
+    }
+
+    #[test]
+    fn test_trailing_slash() {
+        let wd = L!("/home/user/projects/");
+        let path = L!("docs/");
+        eprintln!("({}, {})", wd, path);
+        assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/projects/docs/"));
+    }
+}
+
+/// Wide character version of dirname().
+pub fn wdirname(mut path: &wstr) -> &wstr {
+    // Do not use system-provided dirname (#7837).
+    // On Mac it's not thread safe, and will error for paths exceeding PATH_MAX.
+    // This follows OpenGroup dirname recipe.
+
+    // 1: Double-slash stays.
+    if path == "//" {
+        return path;
+    }
+
+    // 2: All slashes => return slash.
+    if !path.is_empty() && path.chars().all(|c| c == '/') {
+        return L!("/");
+    }
+
+    // 3: Trim trailing slashes.
+    while path.as_char_slice().last() == Some(&'/') {
+        path = path.slice_to(path.char_count() - 1);
+    }
+
+    // 4: No slashes left => return period.
+    let Some(last_slash) = path.chars().rposition(|c| c == '/') else {
+        return L!(".");
+    };
+
+    // 5: Remove trailing non-slashes.
+    path = path.slice_to(last_slash + 1);
+
+    // 6: Skip as permitted.
+    // 7: Remove trailing slashes again.
+    while path.as_char_slice().last() == Some(&'/') {
+        path = path.slice_to(path.char_count() - 1);
+    }
+
+    // 8: Empty => return slash.
+    if path.is_empty() {
+        return L!("/");
+    }
+    path
+}
+
+/// Wide character version of basename().
+pub fn wbasename(mut path: &wstr) -> &wstr {
+    // This follows OpenGroup basename recipe.
+    // 1: empty => allowed to return ".". This is what system impls do.
+    if path.is_empty() {
+        return L!(".");
+    }
+
+    // 2: Skip as permitted.
+    // 3: All slashes => return slash.
+    if !path.is_empty() && path.chars().all(|c| c == '/') {
+        return L!("/");
+    }
+
+    // 4: Remove trailing slashes.
+    while path.as_char_slice().last() == Some(&'/') {
+        path = path.slice_to(path.char_count() - 1);
+    }
+
+    // 5: Remove up to and including last slash.
+    if let Some(last_slash) = path.chars().rposition(|c| c == '/') {
+        path = path.slice_from(last_slash + 1);
+    }
+    path
+}
+
+/// Wide character version of mkdir.
+pub fn wmkdir(name: &wstr, mode: libc::mode_t) -> libc::c_int {
+    let name_narrow = wcs2zstring(name);
+    unsafe { libc::mkdir(name_narrow.as_ptr(), mode) }
+}
+
+/// Wide character version of rename.
+pub fn wrename(old_name: &wstr, new_name: &wstr) -> libc::c_int {
+    let old_narrow = wcs2zstring(old_name);
+    let new_narrow = wcs2zstring(new_name);
+    unsafe { libc::rename(old_narrow.as_ptr(), new_narrow.as_ptr()) }
+}
+
+pub fn write_to_fd(input: &[u8], fd: RawFd) -> nix::Result<usize> {
+    nix::unistd::write(fd, input)
+}
+
+/// Write a wide string to a file descriptor. This avoids doing any additional allocation.
+/// This does NOT retry on EINTR or EAGAIN, it simply returns.
+/// Return -1 on error in which case errno will have been set. In this event, the number of bytes
+/// actually written cannot be obtained.
+pub fn wwrite_to_fd(input: &wstr, fd: RawFd) -> Option<usize> {
+    // Accumulate data in a local buffer.
+    let mut accum = [b'\0'; 512];
+    let mut accumlen = 0;
+    let maxaccum: usize = std::mem::size_of_val(&accum);
+
+    // Helper to perform a write to 'fd', looping as necessary.
+    // Return true on success, false on error.
+    let mut total_written = 0;
+
+    fn do_write(fd: RawFd, total_written: &mut usize, mut buf: &[u8]) -> bool {
+        while !buf.is_empty() {
+            let Ok(amt) = write_to_fd(buf, fd) else {
+                return false;
+            };
+            *total_written += amt;
+            assert!(amt <= buf.len(), "Wrote more than requested");
+            buf = &buf[amt..];
+        }
+        true
+    }
+
+    // Helper to flush the accumulation buffer.
+    let flush_accum = |total_written: &mut usize, accum: &[u8], accumlen: &mut usize| {
+        if !do_write(fd, total_written, &accum[..*accumlen]) {
+            return false;
+        }
+        *accumlen = 0;
+        true
+    };
+
+    let mut success = wcs2string_callback(input, |buff: &[u8]| {
+        if buff.len() + accumlen > maxaccum {
+            // We have to flush.
+            if !flush_accum(&mut total_written, &accum, &mut accumlen) {
+                return false;
+            }
+        }
+        if buff.len() + accumlen <= maxaccum {
+            // Accumulate more.
+            unsafe {
+                std::ptr::copy(&buff[0], &mut accum[accumlen], buff.len());
+            }
+            accumlen += buff.len();
+            true
+        } else {
+            // Too much data to even fit, just write it immediately.
+            do_write(fd, &mut total_written, buff)
+        }
+    });
+    // Flush any remaining.
+    if success {
+        success = flush_accum(&mut total_written, &accum, &mut accumlen);
+    }
+    if success {
+        Some(total_written)
+    } else {
+        None
+    }
+}
+
+const PUA1_START: char = '\u{E000}';
+const PUA1_END: char = '\u{F900}';
+// const PUA2_START: char = '\u{F0000}';
+// const PUA2_END: char = '\u{FFFFE}';
+// const PUA3_START: char = '\u{100000}';
+// const PUA3_END: char = '\u{10FFFE}';
+
+/// Return one if the code point is in a Unicode private use area.
+pub(crate) fn fish_is_pua(c: char) -> bool {
+    PUA1_START <= c && c < PUA1_END
+}
+
+/// We need this because there are too many implementations that don't return the proper answer for
+/// some code points. See issue #3050.
+pub fn fish_iswalnum(c: char) -> bool {
+    !fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
+}
+
+extern "C" {
+    fn iswgraph(wc: libc::wchar_t) -> libc::c_int; // Technically it's wint_t
+}
+
+/// We need this because there are too many implementations that don't return the proper answer for
+/// some code points. See issue #3050.
+pub fn fish_iswgraph(c: char) -> bool {
+    !fish_reserved_codepoint(c) && (fish_is_pua(c) || unsafe { iswgraph(c as libc::wchar_t) } != 0)
+}
+
+pub fn fish_wcswidth(s: &wstr) -> isize {
+    fallback::fish_wcswidth(s)
+}
+
+/// Class for representing a file's inode. We use this to detect and avoid symlink loops, among
+/// other things. While an inode / dev pair is sufficient to distinguish co-existing files, Linux
+/// seems to aggressively re-use inodes, so it cannot determine if a file has been deleted (ABA
+/// problem). Therefore we include richer information.
+#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct FileId {
+    pub device: libc::dev_t,
+    pub inode: libc::ino_t,
+    pub size: u64,
+    pub change_seconds: libc::time_t,
+    pub change_nanoseconds: i64,
+    pub mod_seconds: libc::time_t,
+    pub mod_nanoseconds: i64,
+}
+
+impl FileId {
+    pub const fn new() -> Self {
+        FileId {
+            device: -1 as _,
+            inode: -1 as _,
+            size: -1 as _,
+            change_seconds: libc::time_t::MIN,
+            change_nanoseconds: i64::MIN,
+            mod_seconds: libc::time_t::MIN,
+            mod_nanoseconds: -1 as _,
+        }
+    }
+    pub fn from_stat(buf: &libc::stat) -> FileId {
+        let mut result = FileId::new();
+        result.device = buf.st_dev;
+        result.inode = buf.st_ino;
+        result.size = buf.st_size as u64;
+        result.change_seconds = buf.st_ctime;
+        result.mod_seconds = buf.st_mtime;
+        #[allow(clippy::unnecessary_cast)] // platform-dependent
+        #[cfg(not(target_os = "netbsd"))]
+        {
+            result.change_nanoseconds = buf.st_ctime_nsec as _;
+            result.mod_nanoseconds = buf.st_mtime_nsec as _;
+        }
+        #[cfg(target_os = "netbsd")]
+        {
+            result.change_nanoseconds = buf.st_ctimensec as _;
+            result.mod_nanoseconds = buf.st_mtimensec as _;
+        }
+        result
+    }
+
+    /// Return true if \param rhs has higher mtime seconds than this file_id_t.
+    /// If identical, nanoseconds are compared.
+    pub fn older_than(&self, rhs: &FileId) -> bool {
+        let lhs = (self.mod_seconds, self.mod_nanoseconds);
+        let rhs = (rhs.mod_seconds, rhs.mod_nanoseconds);
+        lhs.cmp(&rhs).is_lt()
+    }
+}
+
+pub const INVALID_FILE_ID: FileId = FileId::new();
+
+pub fn file_id_for_fd(fd: BorrowedFd<'_>) -> FileId {
+    let mut result = INVALID_FILE_ID;
+    let mut buf: libc::stat = unsafe { std::mem::zeroed() };
+    if unsafe { libc::fstat(fd.as_raw_fd(), &mut buf) } == 0 {
+        result = FileId::from_stat(&buf);
+    }
+    result
+}
+
+pub fn file_id_for_autoclose_fd(fd: &AutoCloseFd) -> FileId {
+    file_id_for_fd(fd.as_fd())
+}
+
+pub fn file_id_for_path(path: &wstr) -> FileId {
+    file_id_for_path_narrow(&wcs2zstring(path))
+}
+
+pub fn file_id_for_path_narrow(path: &CStr) -> FileId {
+    let mut result = INVALID_FILE_ID;
+    let mut buf: libc::stat = unsafe { std::mem::zeroed() };
+    if unsafe { libc::stat(path.as_ptr(), &mut buf) } == 0 {
+        result = FileId::from_stat(&buf);
+    }
+    result
+}
+/// Given that `cursor` is a pointer into `base`, return the offset in characters.
+/// This emulates C pointer arithmetic:
+///    `wstr_offset_in(cursor, base)` is equivalent to C++ `cursor - base`.
+pub fn wstr_offset_in(cursor: &wstr, base: &wstr) -> usize {
+    let cursor = cursor.as_slice();
+    let base = base.as_slice();
+    // cursor may be a zero-length slice at the end of base,
+    // which base.as_ptr_range().contains(cursor.as_ptr()) will reject.
+    let base_range = base.as_ptr_range();
+    let curs_range = cursor.as_ptr_range();
+    assert!(
+        base_range.start <= curs_range.start && curs_range.end <= base_range.end,
+        "cursor should be a subslice of base"
+    );
+    let offset = unsafe { cursor.as_ptr().offset_from(base.as_ptr()) };
+    assert!(offset >= 0, "offset should be non-negative");
+    offset as usize
+}
+
+#[test]
+fn test_wstr_offset_in() {
+    use crate::wchar::L;
+    let base = L!("hello world");
+    assert_eq!(wstr_offset_in(&base[6..], base), 6);
+    assert_eq!(wstr_offset_in(&base[0..], base), 0);
+    assert_eq!(wstr_offset_in(&base[6..], &base[6..]), 0);
+    assert_eq!(wstr_offset_in(&base[base.len()..], base), base.len());
+}