wchar: remove dependency on fish_common

Now, the `fish_wchar` crate does not have any local dependencies, making
it easy to depend on it in other crates without worrying about cyclic
dependencies.

Additionally, remove the (non-fish) `widestring` crate as a direct
dependency of the main crate.
Now, only the `fish_wchar` and `fish_printf` crates directly depend on
`widestring`. `fish_printf` could also depend on `fish_wchar`, but I
left that as is since `fish_printf` was published, so depending on a
crate which is not published to crates.io does not seem like a good
idea.

Part of #12313
This commit is contained in:
Daniel Rainer
2026-01-12 17:08:44 +01:00
committed by Johannes Altmanninger
parent 385cdef89b
commit 36db3b7f3f
15 changed files with 57 additions and 61 deletions

4
Cargo.lock generated
View File

@@ -184,7 +184,6 @@ dependencies = [
"serial_test",
"terminfo",
"unix_path",
"widestring",
"xterm-color",
]
@@ -208,9 +207,9 @@ name = "fish-common"
version = "0.0.0"
dependencies = [
"bitflags",
"fish-wchar",
"libc",
"nix",
"widestring",
]
[[package]]
@@ -280,7 +279,6 @@ dependencies = [
name = "fish-wchar"
version = "0.0.0"
dependencies = [
"fish-common",
"widestring",
]

View File

@@ -111,7 +111,6 @@ pcre2.workspace = true
rand.workspace = true
terminfo.workspace = true
xterm-color.workspace = true
widestring.workspace = true
[target.'cfg(not(target_has_atomic = "64"))'.dependencies]
portable-atomic.workspace = true
@@ -187,7 +186,7 @@ rustdoc.private_intra_doc_links = "allow"
assigning_clones = "warn"
implicit_clone = "warn"
cloned_instead_of_copied = "warn"
len_without_is_empty = "allow" # we're not a library crate
len_without_is_empty = "allow" # we're not a library crate
let_and_return = "allow"
manual_range_contains = "allow"
map_unwrap_or = "warn"

View File

@@ -8,9 +8,9 @@ license.workspace = true
[dependencies]
bitflags.workspace = true
fish-wchar.workspace = true
libc.workspace = true
nix.workspace = true
widestring.workspace = true
[lints]
workspace = true

View File

@@ -1,4 +1,5 @@
use bitflags::bitflags;
use fish_wchar::{L, char_offset, wstr};
use libc::{SIG_IGN, SIGTTOU, STDIN_FILENO};
use std::cell::{Cell, RefCell};
use std::io::Read;
@@ -8,7 +9,6 @@
use std::sync::OnceLock;
use std::sync::atomic::{AtomicI32, AtomicU32, Ordering};
use std::{env, mem, time};
use widestring::Utf32Str as wstr;
pub const PACKAGE_NAME: &str = env!("CARGO_PKG_NAME");
@@ -40,21 +40,6 @@
// Unicode range for our needs.
const _: () = assert!(WILDCARD_RESERVED_END <= RESERVED_CHAR_END);
// These are in the Unicode private-use range. We really shouldn't use this
// range but have little choice in the matter given how our lexer/parser works.
// We can't use non-characters for these two ranges because there are only 66 of
// them and we need at least 256 + 64.
//
// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
// would result in fish having different behavior on machines with 16 versus 32
// bit wchar_t. It's better that fish behave the same on both types of systems.
//
// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
pub const ENCODE_DIRECT_END: char = char_offset(ENCODE_DIRECT_BASE, 256);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EscapeStringStyle {
Script(EscapeFlags),
@@ -78,7 +63,7 @@ fn try_from(s: &wstr) -> Result<Self, Self::Error> {
s if s == "var" => Ok(Var),
s if s == "url" => Ok(Url),
s if s == "regex" => Ok(Regex),
_ => Err(widestring::utf32str!("Invalid escape style")),
_ => Err(L!("Invalid escape style")),
}
}
}
@@ -124,7 +109,7 @@ fn try_from(s: &wstr) -> Result<Self, Self::Error> {
s if s == "script" => Ok(Self::default()),
s if s == "var" => Ok(Var),
s if s == "url" => Ok(Url),
_ => Err(widestring::utf32str!("Invalid escape style")),
_ => Err(L!("Invalid escape style")),
}
}
}
@@ -142,20 +127,6 @@ pub struct UnescapeFlags: u32 {
}
}
pub const fn char_offset(base: char, offset: u32) -> char {
match char::from_u32(base as u32 + offset) {
Some(c) => c,
None => panic!("not a valid char"),
}
}
pub fn subslice_position<T: Eq>(a: &[T], b: &[T]) -> Option<usize> {
if b.is_empty() {
return Some(0);
}
a.windows(b.len()).position(|aw| aw == b)
}
/// This function attempts to distinguish between a console session (at the actual login vty) and a
/// session within a terminal emulator inside a desktop environment or over SSH. Unfortunately
/// there are few values of $TERM that we can interpret as being exclusively console sessions, and
@@ -196,7 +167,7 @@ pub fn get_ellipsis_char() -> char {
/// The character or string to use where text has been truncated (ellipsis if possible, otherwise
/// ...)
pub fn get_ellipsis_str() -> &'static wstr {
widestring::utf32str!("\u{2026}")
L!("\u{2026}")
}
// Only pub for `src/common.rs`
@@ -570,7 +541,7 @@ pub const fn assert_sync<T: Sync>() {}
/// # Examples
///
/// ```
/// use widestring::{utf32str as L,Utf32Str as wstr};
/// use fish_wchar::{L, wstr};
/// use fish_common::assert_sorted_by_name;
///
/// const COLORS: &[(&wstr, u32)] = &[
@@ -586,7 +557,7 @@ pub const fn assert_sync<T: Sync>() {}
/// While this example would fail to compile:
///
/// ```compile_fail
/// use widestring::{utf32str as L,Utf32Str as wstr};
/// use fish_wchar::{L, wstr};
/// use fish_common::assert_sorted_by_name;
///
/// const COLORS: &[(&wstr, u32)] = &[
@@ -728,7 +699,6 @@ fn test_scope_guard() {
#[test]
fn test_truncate_at_nul() {
use widestring::utf32str as L;
assert_eq!(truncate_at_nul(L!("abc\0def")), L!("abc"));
assert_eq!(truncate_at_nul(L!("abc")), L!("abc"));
assert_eq!(truncate_at_nul(L!("\0abc")), L!(""));

View File

@@ -7,7 +7,6 @@ repository.workspace = true
license.workspace = true
[dependencies]
fish-common.workspace = true
widestring.workspace = true
[lints]

View File

@@ -6,7 +6,6 @@
pub mod word_char;
use fish_common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END, subslice_position};
use std::{iter, slice};
pub use widestring::{Utf32Str as wstr, Utf32String as WString, utf32str as L, utfstr::CharsUtf32};
@@ -14,6 +13,21 @@ pub mod prelude {
pub use crate::{IntoCharIter, L, ToWString, WExt, WString, wstr};
}
// These are in the Unicode private-use range. We really shouldn't use this
// range but have little choice in the matter given how our lexer/parser works.
// We can't use non-characters for these two ranges because there are only 66 of
// them and we need at least 256 + 64.
//
// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
// would result in fish having different behavior on machines with 16 versus 32
// bit wchar_t. It's better that fish behave the same on both types of systems.
//
// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
pub const ENCODE_DIRECT_END: char = char_offset(ENCODE_DIRECT_BASE, 256);
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
@@ -39,6 +53,20 @@ pub fn decode_byte_from_char(c: char) -> Option<u8> {
}
}
pub const fn char_offset(base: char, offset: u32) -> char {
match char::from_u32(base as u32 + offset) {
Some(c) => c,
None => panic!("not a valid char"),
}
}
pub fn subslice_position<T: Eq>(a: &[T], b: &[T]) -> Option<usize> {
if b.is_empty() {
return Some(0);
}
a.windows(b.len()).position(|aw| aw == b)
}
/// Helpers to convert things to widestring.
/// This is like std::string::ToString.
pub trait ToWString {

View File

@@ -1,5 +1,5 @@
use fish_wchar::L;
use num_traits::pow;
use widestring::utf32str;
use super::prelude::*;
use crate::tinyexpr::te_interp;
@@ -78,13 +78,13 @@ fn parse_cmd_opts(
}
'm' => {
let optarg = w.woptarg.unwrap();
if optarg.eq(utf32str!("truncate")) || optarg.eq(utf32str!("trunc")) {
if optarg.eq(L!("truncate")) || optarg.eq(L!("trunc")) {
opts.scale_mode = ScaleMode::Truncate;
} else if optarg.eq(utf32str!("round")) {
} else if optarg.eq(L!("round")) {
opts.scale_mode = ScaleMode::Round;
} else if optarg.eq(utf32str!("floor")) {
} else if optarg.eq(L!("floor")) {
opts.scale_mode = ScaleMode::Floor;
} else if optarg.eq(utf32str!("ceiling")) || optarg.eq(utf32str!("ceil")) {
} else if optarg.eq(L!("ceiling")) || optarg.eq(L!("ceil")) {
opts.scale_mode = ScaleMode::Ceiling;
} else {
streams

View File

@@ -16,7 +16,9 @@
use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
use crate::wutil::fish_iswalnum;
use fish_fallback::fish_wcwidth;
use fish_wchar::{decode_byte_from_char, encode_byte_to_char};
use fish_wchar::{
ENCODE_DIRECT_END, decode_byte_from_char, encode_byte_to_char, subslice_position,
};
use std::env;
use std::ffi::{CStr, CString, OsString};
use std::os::unix::prelude::*;
@@ -1381,8 +1383,7 @@ mod tests {
bytes2wcstring, escape_string, unescape_string, wcs2bytes,
};
use crate::util::get_seeded_rng;
use fish_common::ENCODE_DIRECT_BASE;
use fish_wchar::{L, WString, wstr};
use fish_wchar::{ENCODE_DIRECT_BASE, L, WString, wstr};
use rand::{Rng, RngCore};
#[test]

View File

@@ -807,9 +807,8 @@ fn skip_spaces(mut s: &wstr) -> &wstr {
#[cfg(test)]
mod tests {
use fish_common::ENCODE_DIRECT_BASE;
use fish_common::char_offset;
use fish_tempfile::TempDir;
use fish_wchar::{ENCODE_DIRECT_BASE, char_offset};
use crate::common::bytes2wcstring;
use crate::common::wcs2osstring;

View File

@@ -30,7 +30,8 @@
use crate::wildcard::{wildcard_expand_string, wildcard_has_internal};
use crate::wutil::{Options, normalize_path, wcstoi_partial};
use bitflags::bitflags;
use fish_common::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END, char_offset};
use fish_common::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
use fish_wchar::char_offset;
use std::mem::MaybeUninit;
bitflags! {

View File

@@ -5,7 +5,7 @@
time::{Duration, SystemTime, UNIX_EPOCH},
};
use fish_common::subslice_position;
use fish_wchar::subslice_position;
use super::{HistoryItem, PersistenceMode};
use crate::{common::bytes2wcstring, flog::flog};

View File

@@ -119,7 +119,7 @@ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
#[cfg(feature = "gettext-extract")]
macro_rules! localizable_string {
($string:literal) => {
$crate::localization::LocalizableString::Static(widestring::utf32str!(
$crate::localization::LocalizableString::Static(fish_wchar::L!(
fish_gettext_extraction::gettext_extract!($string)
))
};
@@ -128,7 +128,7 @@ macro_rules! localizable_string {
#[cfg(not(feature = "gettext-extract"))]
macro_rules! localizable_string {
($string:literal) => {
$crate::localization::LocalizableString::Static(widestring::utf32str!($string))
$crate::localization::LocalizableString::Static(fish_wchar::L!($string))
};
}
pub use localizable_string;

View File

@@ -213,7 +213,7 @@ pub fn try_create(
// Helper to lazily compute if case insensitive matches should use icase or smartcase.
// Use icase if the input contains any uppercase characters, smartcase otherwise.
#[inline(always)]
fn get_case_fold(s: &widestring::Utf32Str) -> CaseSensitivity {
fn get_case_fold(s: &wstr) -> CaseSensitivity {
if s.chars().any(|c| c.is_uppercase()) {
CaseSensitivity::Insensitive
} else {

View File

@@ -1,6 +1,7 @@
// Enumeration of all wildcard types.
use fish_common::{WILDCARD_RESERVED_BASE, char_offset};
use fish_common::WILDCARD_RESERVED_BASE;
use fish_wchar::char_offset;
use libc::X_OK;
use std::cmp::Ordering;
use std::collections::HashSet;

View File

@@ -4,7 +4,7 @@ macro_rules! sprintf {
// Allow a `&str` or `&Utf32Str` as a format, and return a `Utf32String`.
($fmt:expr $(, $arg:expr)* $(,)?) => {
{
let mut target = widestring::Utf32String::new();
let mut target = fish_wchar::WString::new();
$crate::sprintf!(=> &mut target, $fmt, $($arg),*);
target
}