wchar: remove dependency on fish_common

Now, the `fish_wchar` crate does not have any local dependencies, making it easy to depend on it in other crates without worrying about cyclic dependencies. Additionally, remove the (non-fish) `widestring` crate as a direct dependency of the main crate. Now, only the `fish_wchar` and `fish_printf` crates directly depend on `widestring`. `fish_printf` could also depend on `fish_wchar`, but I left that as is since `fish_printf` was published, so depending on a crate which is not published to crates.io does not seem like a good idea. Part of #12313
2026-06-01 13:01:21 -03:00 · 2026-01-12 17:08:44 +01:00
parent 385cdef89b
commit 36db3b7f3f
15 changed files with 57 additions and 61 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -184,7 +184,6 @@ dependencies = [
 "serial_test",
 "terminfo",
 "unix_path",
- "widestring",
 "xterm-color",
 ]

@@ -208,9 +207,9 @@ name = "fish-common"
 version = "0.0.0"
 dependencies = [
 "bitflags",
+ "fish-wchar",
 "libc",
 "nix",
- "widestring",
 ]

 [[package]]
@@ -280,7 +279,6 @@ dependencies = [
 name = "fish-wchar"
 version = "0.0.0"
 dependencies = [
- "fish-common",
 "widestring",
 ]

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -111,7 +111,6 @@ pcre2.workspace = true
 rand.workspace = true
 terminfo.workspace = true
 xterm-color.workspace = true
-widestring.workspace = true

 [target.'cfg(not(target_has_atomic = "64"))'.dependencies]
 portable-atomic.workspace = true
@@ -187,7 +186,7 @@ rustdoc.private_intra_doc_links = "allow"
 assigning_clones = "warn"
 implicit_clone = "warn"
 cloned_instead_of_copied = "warn"
-len_without_is_empty = "allow"     # we're not a library crate
+len_without_is_empty = "allow"    # we're not a library crate
 let_and_return = "allow"
 manual_range_contains = "allow"
 map_unwrap_or = "warn"
--- a/crates/common/Cargo.toml
+++ b/crates/common/Cargo.toml
@@ -8,9 +8,9 @@ license.workspace = true

 [dependencies]
 bitflags.workspace = true
+fish-wchar.workspace = true
 libc.workspace = true
 nix.workspace = true
-widestring.workspace = true

 [lints]
 workspace = true
--- a/crates/common/src/lib.rs
+++ b/crates/common/src/lib.rs
@@ -1,4 +1,5 @@
 use bitflags::bitflags;
+use fish_wchar::{L, char_offset, wstr};
 use libc::{SIG_IGN, SIGTTOU, STDIN_FILENO};
 use std::cell::{Cell, RefCell};
 use std::io::Read;
@@ -8,7 +9,6 @@
 use std::sync::OnceLock;
 use std::sync::atomic::{AtomicI32, AtomicU32, Ordering};
 use std::{env, mem, time};
-use widestring::Utf32Str as wstr;

 pub const PACKAGE_NAME: &str = env!("CARGO_PKG_NAME");

@@ -40,21 +40,6 @@
 // Unicode range for our needs.
 const _: () = assert!(WILDCARD_RESERVED_END <= RESERVED_CHAR_END);

-// These are in the Unicode private-use range. We really shouldn't use this
-// range but have little choice in the matter given how our lexer/parser works.
-// We can't use non-characters for these two ranges because there are only 66 of
-// them and we need at least 256 + 64.
-//
-// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
-// would result in fish having different behavior on machines with 16 versus 32
-// bit wchar_t. It's better that fish behave the same on both types of systems.
-//
-// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
-// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
-// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
-pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
-pub const ENCODE_DIRECT_END: char = char_offset(ENCODE_DIRECT_BASE, 256);
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum EscapeStringStyle {
    Script(EscapeFlags),
@@ -78,7 +63,7 @@ fn try_from(s: &wstr) -> Result<Self, Self::Error> {
            s if s == "var" => Ok(Var),
            s if s == "url" => Ok(Url),
            s if s == "regex" => Ok(Regex),
-            _ => Err(widestring::utf32str!("Invalid escape style")),
+            _ => Err(L!("Invalid escape style")),
        }
    }
 }
@@ -124,7 +109,7 @@ fn try_from(s: &wstr) -> Result<Self, Self::Error> {
            s if s == "script" => Ok(Self::default()),
            s if s == "var" => Ok(Var),
            s if s == "url" => Ok(Url),
-            _ => Err(widestring::utf32str!("Invalid escape style")),
+            _ => Err(L!("Invalid escape style")),
        }
    }
 }
@@ -142,20 +127,6 @@ pub struct UnescapeFlags: u32 {
    }
 }

-pub const fn char_offset(base: char, offset: u32) -> char {
-    match char::from_u32(base as u32 + offset) {
-        Some(c) => c,
-        None => panic!("not a valid char"),
-    }
-}
-
-pub fn subslice_position<T: Eq>(a: &[T], b: &[T]) -> Option<usize> {
-    if b.is_empty() {
-        return Some(0);
-    }
-    a.windows(b.len()).position(|aw| aw == b)
-}
-
 /// This function attempts to distinguish between a console session (at the actual login vty) and a
 /// session within a terminal emulator inside a desktop environment or over SSH. Unfortunately
 /// there are few values of $TERM that we can interpret as being exclusively console sessions, and
@@ -196,7 +167,7 @@ pub fn get_ellipsis_char() -> char {
 /// The character or string to use where text has been truncated (ellipsis if possible, otherwise
 /// ...)
 pub fn get_ellipsis_str() -> &'static wstr {
-    widestring::utf32str!("\u{2026}")
+    L!("\u{2026}")
 }

 // Only pub for `src/common.rs`
@@ -570,7 +541,7 @@ pub const fn assert_sync<T: Sync>() {}
 /// # Examples
 ///
 /// ```
-/// use widestring::{utf32str as L,Utf32Str as wstr};
+/// use fish_wchar::{L, wstr};
 /// use fish_common::assert_sorted_by_name;
 ///
 /// const COLORS: &[(&wstr, u32)] = &[
@@ -586,7 +557,7 @@ pub const fn assert_sync<T: Sync>() {}
 /// While this example would fail to compile:
 ///
 /// ```compile_fail
-/// use widestring::{utf32str as L,Utf32Str as wstr};
+/// use fish_wchar::{L, wstr};
 /// use fish_common::assert_sorted_by_name;
 ///
 /// const COLORS: &[(&wstr, u32)] = &[
@@ -728,7 +699,6 @@ fn test_scope_guard() {

    #[test]
    fn test_truncate_at_nul() {
-        use widestring::utf32str as L;
        assert_eq!(truncate_at_nul(L!("abc\0def")), L!("abc"));
        assert_eq!(truncate_at_nul(L!("abc")), L!("abc"));
        assert_eq!(truncate_at_nul(L!("\0abc")), L!(""));
--- a/crates/wchar/Cargo.toml
+++ b/crates/wchar/Cargo.toml
@@ -7,7 +7,6 @@ repository.workspace = true
 license.workspace = true

 [dependencies]
-fish-common.workspace = true
 widestring.workspace = true

 [lints]
--- a/crates/wchar/src/lib.rs
+++ b/crates/wchar/src/lib.rs
@@ -6,7 +6,6 @@

 pub mod word_char;

-use fish_common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END, subslice_position};
 use std::{iter, slice};
 pub use widestring::{Utf32Str as wstr, Utf32String as WString, utf32str as L, utfstr::CharsUtf32};

@@ -14,6 +13,21 @@ pub mod prelude {
    pub use crate::{IntoCharIter, L, ToWString, WExt, WString, wstr};
 }

+// These are in the Unicode private-use range. We really shouldn't use this
+// range but have little choice in the matter given how our lexer/parser works.
+// We can't use non-characters for these two ranges because there are only 66 of
+// them and we need at least 256 + 64.
+//
+// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
+// would result in fish having different behavior on machines with 16 versus 32
+// bit wchar_t. It's better that fish behave the same on both types of systems.
+//
+// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
+// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
+// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
+pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
+pub const ENCODE_DIRECT_END: char = char_offset(ENCODE_DIRECT_BASE, 256);
+
 /// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
 /// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
 /// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
@@ -39,6 +53,20 @@ pub fn decode_byte_from_char(c: char) -> Option<u8> {
    }
 }

+pub const fn char_offset(base: char, offset: u32) -> char {
+    match char::from_u32(base as u32 + offset) {
+        Some(c) => c,
+        None => panic!("not a valid char"),
+    }
+}
+
+pub fn subslice_position<T: Eq>(a: &[T], b: &[T]) -> Option<usize> {
+    if b.is_empty() {
+        return Some(0);
+    }
+    a.windows(b.len()).position(|aw| aw == b)
+}
+
 /// Helpers to convert things to widestring.
 /// This is like std::string::ToString.
 pub trait ToWString {
--- a/src/builtins/math.rs
+++ b/src/builtins/math.rs
@@ -1,5 +1,5 @@
+use fish_wchar::L;
 use num_traits::pow;
-use widestring::utf32str;

 use super::prelude::*;
 use crate::tinyexpr::te_interp;
@@ -78,13 +78,13 @@ fn parse_cmd_opts(
            }
            'm' => {
                let optarg = w.woptarg.unwrap();
-                if optarg.eq(utf32str!("truncate")) || optarg.eq(utf32str!("trunc")) {
+                if optarg.eq(L!("truncate")) || optarg.eq(L!("trunc")) {
                    opts.scale_mode = ScaleMode::Truncate;
-                } else if optarg.eq(utf32str!("round")) {
+                } else if optarg.eq(L!("round")) {
                    opts.scale_mode = ScaleMode::Round;
-                } else if optarg.eq(utf32str!("floor")) {
+                } else if optarg.eq(L!("floor")) {
                    opts.scale_mode = ScaleMode::Floor;
-                } else if optarg.eq(utf32str!("ceiling")) || optarg.eq(utf32str!("ceil")) {
+                } else if optarg.eq(L!("ceiling")) || optarg.eq(L!("ceil")) {
                    opts.scale_mode = ScaleMode::Ceiling;
                } else {
                    streams
--- a/src/common.rs
+++ b/src/common.rs
@@ -16,7 +16,9 @@
 use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
 use crate::wutil::fish_iswalnum;
 use fish_fallback::fish_wcwidth;
-use fish_wchar::{decode_byte_from_char, encode_byte_to_char};
+use fish_wchar::{
+    ENCODE_DIRECT_END, decode_byte_from_char, encode_byte_to_char, subslice_position,
+};
 use std::env;
 use std::ffi::{CStr, CString, OsString};
 use std::os::unix::prelude::*;
@@ -1381,8 +1383,7 @@ mod tests {
        bytes2wcstring, escape_string, unescape_string, wcs2bytes,
    };
    use crate::util::get_seeded_rng;
-    use fish_common::ENCODE_DIRECT_BASE;
-    use fish_wchar::{L, WString, wstr};
+    use fish_wchar::{ENCODE_DIRECT_BASE, L, WString, wstr};
    use rand::{Rng, RngCore};

    #[test]
--- a/src/env_universal_common.rs
+++ b/src/env_universal_common.rs
@@ -807,9 +807,8 @@ fn skip_spaces(mut s: &wstr) -> &wstr {

 #[cfg(test)]
 mod tests {
-    use fish_common::ENCODE_DIRECT_BASE;
-    use fish_common::char_offset;
    use fish_tempfile::TempDir;
+    use fish_wchar::{ENCODE_DIRECT_BASE, char_offset};

    use crate::common::bytes2wcstring;
    use crate::common::wcs2osstring;
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -30,7 +30,8 @@
 use crate::wildcard::{wildcard_expand_string, wildcard_has_internal};
 use crate::wutil::{Options, normalize_path, wcstoi_partial};
 use bitflags::bitflags;
-use fish_common::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END, char_offset};
+use fish_common::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
+use fish_wchar::char_offset;
 use std::mem::MaybeUninit;

 bitflags! {
--- a/src/history/yaml_backend.rs
+++ b/src/history/yaml_backend.rs
@@ -5,7 +5,7 @@
    time::{Duration, SystemTime, UNIX_EPOCH},
 };

-use fish_common::subslice_position;
+use fish_wchar::subslice_position;

 use super::{HistoryItem, PersistenceMode};
 use crate::{common::bytes2wcstring, flog::flog};
--- a/src/localization/gettext.rs
+++ b/src/localization/gettext.rs
@@ -119,7 +119,7 @@ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 #[cfg(feature = "gettext-extract")]
 macro_rules! localizable_string {
    ($string:literal) => {
-        $crate::localization::LocalizableString::Static(widestring::utf32str!(
+        $crate::localization::LocalizableString::Static(fish_wchar::L!(
            fish_gettext_extraction::gettext_extract!($string)
        ))
    };
@@ -128,7 +128,7 @@ macro_rules! localizable_string {
 #[cfg(not(feature = "gettext-extract"))]
 macro_rules! localizable_string {
    ($string:literal) => {
-        $crate::localization::LocalizableString::Static(widestring::utf32str!($string))
+        $crate::localization::LocalizableString::Static(fish_wchar::L!($string))
    };
 }
 pub use localizable_string;
--- a/src/wcstringutil.rs
+++ b/src/wcstringutil.rs
@@ -213,7 +213,7 @@ pub fn try_create(
        // Helper to lazily compute if case insensitive matches should use icase or smartcase.
        // Use icase if the input contains any uppercase characters, smartcase otherwise.
        #[inline(always)]
-        fn get_case_fold(s: &widestring::Utf32Str) -> CaseSensitivity {
+        fn get_case_fold(s: &wstr) -> CaseSensitivity {
            if s.chars().any(|c| c.is_uppercase()) {
                CaseSensitivity::Insensitive
            } else {
--- a/src/wildcard.rs
+++ b/src/wildcard.rs
@@ -1,6 +1,7 @@
 // Enumeration of all wildcard types.

-use fish_common::{WILDCARD_RESERVED_BASE, char_offset};
+use fish_common::WILDCARD_RESERVED_BASE;
+use fish_wchar::char_offset;
 use libc::X_OK;
 use std::cmp::Ordering;
 use std::collections::HashSet;
--- a/src/wutil/printf.rs
+++ b/src/wutil/printf.rs
@@ -4,7 +4,7 @@ macro_rules! sprintf {
    // Allow a `&str` or `&Utf32Str` as a format, and return a `Utf32String`.
    ($fmt:expr $(, $arg:expr)* $(,)?) => {
        {
-            let mut target = widestring::Utf32String::new();
+            let mut target = fish_wchar::WString::new();
            $crate::sprintf!(=> &mut target, $fmt, $($arg),*);
            target
        }