wchar: extract logic into separate crate

Another reduction in size of the main crate. Also allows other crates to
depend on the new wchar crate.

The original `src/wchar.rs` file is kept around for now to keep the
prelude imports working.

Part of #12182
This commit is contained in:
Daniel Rainer
2025-12-17 23:45:35 +01:00
committed by Johannes Altmanninger
parent 5a35acf2e7
commit 2f37eda9d9
32 changed files with 128 additions and 126 deletions

9
Cargo.lock generated
View File

@@ -166,6 +166,7 @@ dependencies = [
"fish-gettext-mo-file-parser",
"fish-printf",
"fish-tempfile",
"fish-wchar",
"fish-widecharwidth",
"libc",
"lru",
@@ -256,6 +257,14 @@ dependencies = [
"rand 0.9.2",
]
[[package]]
name = "fish-wchar"
version = "0.0.0"
dependencies = [
"fish-common",
"widestring",
]
[[package]]
name = "fish-widecharwidth"
version = "0.0.0"

View File

@@ -24,6 +24,7 @@ fish-gettext-maps = { path = "crates/gettext-maps" }
fish-gettext-mo-file-parser = { path = "crates/gettext-mo-file-parser" }
fish-printf = { path = "crates/printf", features = ["widestring"] }
fish-tempfile = { path = "crates/tempfile" }
fish-wchar = { path = "crates/wchar" }
fish-widecharwidth = { path = "crates/widecharwidth" }
libc = "0.2.177"
# lru pulls in hashbrown by default, which uses a faster (though less DoS resistant) hashing algo.
@@ -96,6 +97,7 @@ fish-gettext = { workspace = true, optional = true }
fish-gettext-extraction = { workspace = true, optional = true }
fish-printf.workspace = true
fish-tempfile.workspace = true
fish-wchar.workspace = true
fish-widecharwidth.workspace = true
libc.workspace = true
lru.workspace = true

14
crates/wchar/Cargo.toml Normal file
View File

@@ -0,0 +1,14 @@
[package]
name = "fish-wchar"
edition.workspace = true
rust-version.workspace = true
version = "0.0.0"
repository.workspace = true
license.workspace = true
[dependencies]
fish-common.workspace = true
widestring.workspace = true
[lints]
workspace = true

View File

@@ -1,11 +1,51 @@
use std::{iter, slice};
//! Support for wide strings.
//!
//! There are two wide string types that are commonly used:
//! - wstr: a string slice without a nul terminator. Like `&str` but wide chars.
//! - WString: an owning string without a nul terminator. Like `String` but wide chars.
use crate::{
L,
wchar::{WString, wstr},
};
use fish_common::subslice_position;
use widestring::utfstr::CharsUtf32;
use fish_common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END, subslice_position};
use std::{iter, slice};
pub use widestring::{Utf32Str as wstr, Utf32String as WString, utfstr::CharsUtf32};
pub mod prelude {
pub use crate::{IntoCharIter, L, ToWString, WExt, WString, wstr};
}
/// Creates a wstr string slice, like the "L" prefix of C++.
/// The result is of type wstr.
/// It is NOT nul-terminated.
#[macro_export]
macro_rules! L {
($string:expr) => {
widestring::utf32str!($string)
};
}
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
/// as a UTF-32 character, printing them would result in several characters instead of one UTF-8
/// character.
///
/// See <https://github.com/fish-shell/fish-shell/issues/1894>.
pub fn encode_byte_to_char(byte: u8) -> char {
char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
.expect("private-use codepoint should be valid char")
}
/// Decode a literal byte from a UTF-32 character.
pub fn decode_byte_from_char(c: char) -> Option<u8> {
if c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END {
Some(
(u32::from(c) - u32::from(ENCODE_DIRECT_BASE))
.try_into()
.unwrap(),
)
} else {
None
}
}
/// Helpers to convert things to widestring.
/// This is like std::string::ToString.
@@ -303,7 +343,6 @@ fn as_char_slice(&self) -> &[char] {
#[cfg(test)]
mod tests {
use super::*;
use crate::wchar::L;
#[test]
fn test_to_wstring() {

View File

@@ -6,9 +6,8 @@
use crate::flogf;
use crate::io::IoChain;
use crate::parser::Parser;
use crate::wchar::{L, WString, wstr};
use crate::wchar_ext::WExt;
use crate::wutil::{FileId, INVALID_FILE_ID, file_id_for_path};
use fish_wchar::{L, WExt, WString, wstr};
use lru::LruCache;
use rust_embed::RustEmbed;
use std::collections::{HashMap, HashSet};

View File

@@ -4,7 +4,8 @@
use crate::io::IoStreams;
use crate::parser::Parser;
use crate::proc::{Job, add_disowned_job};
use crate::{builtins::shared::HelpOnlyCmdOpts, wchar::wstr, wutil::wgettext_fmt};
use crate::{builtins::shared::HelpOnlyCmdOpts, wutil::wgettext_fmt};
use fish_wchar::wstr;
use libc::SIGCONT;
/// Helper for builtin_disown.

View File

@@ -1,7 +1,7 @@
//! Implementation of the echo builtin.
use super::prelude::*;
use crate::wchar::encode_byte_to_char;
use fish_wchar::encode_byte_to_char;
#[derive(Debug, Clone, Copy)]
struct Options {

View File

@@ -6,13 +6,10 @@
use crate::job_group::{JobId, MaybeJobId};
use crate::parser::Parser;
use crate::proc::{Job, clock_ticks_to_seconds, have_proc_stat, proc_get_jiffies};
use crate::wchar_ext::WExt;
use crate::wgetopt::{ArgType, WGetopter, WOption, wopt};
use crate::wutil::wgettext;
use crate::{
wchar::{L, WString, wstr},
wutil::{fish_wcstoi, wgettext_fmt},
};
use crate::wutil::{fish_wcstoi, wgettext_fmt};
use fish_wchar::{L, WExt, WString, wstr};
use std::num::NonZeroU32;
/// Print modes for the jobs builtin.

View File

@@ -50,7 +50,6 @@
use super::prelude::*;
use crate::locale::{Locale, get_numeric_locale};
use crate::wchar::{decode_byte_from_char, encode_byte_to_char};
use crate::wutil::{
errors::Error,
wcstod::wcstod,
@@ -58,6 +57,7 @@
wstr_offset_in,
};
use fish_printf::{ToArg, sprintf_locale};
use fish_wchar::{decode_byte_from_char, encode_byte_to_char};
/// Return true if `c` is an octal digit.
fn is_octal_digit(c: char) -> bool {

View File

@@ -5,9 +5,9 @@
use crate::parse_util::parse_util_argument_is_help;
use crate::parser::{BlockType, LoopStatus};
use crate::proc::{Pid, ProcStatus, no_exec};
use crate::wchar::L;
use crate::{builtins::*, wutil};
use errno::errno;
use fish_wchar::L;
use std::fs::File;
use std::io::{BufRead, BufReader, Read};

View File

@@ -12,12 +12,13 @@
use crate::parse_util::parse_util_escape_string_with_quote;
use crate::terminal::Output;
use crate::termsize::Termsize;
use crate::wchar::{decode_byte_from_char, encode_byte_to_char, prelude::*};
use crate::wchar::prelude::*;
use crate::wcstringutil::wcs2bytes_callback;
use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
use crate::wutil::fish_iswalnum;
use bitflags::bitflags;
use fish_common::{ENCODE_DIRECT_END, char_offset, subslice_position};
use fish_wchar::{decode_byte_from_char, encode_byte_to_char};
use libc::{SIG_IGN, SIGTTOU, STDIN_FILENO};
use once_cell::sync::OnceCell;
use std::cell::{Cell, RefCell};
@@ -1995,16 +1996,15 @@ macro_rules! env_stack_set_from_env {
#[cfg(test)]
mod tests {
use super::{
ENCODE_DIRECT_END, ESCAPE_TEST_CHAR, EscapeFlags, EscapeStringStyle, ScopeGuard,
ScopedCell, ScopedRefCell, UnescapeStringStyle, bytes2wcstring, escape_string,
truncate_at_nul, unescape_string, wcs2bytes,
};
use crate::{
util::get_seeded_rng,
wchar::{L, WString, wstr},
};
use crate::util::get_seeded_rng;
use fish_common::ENCODE_DIRECT_BASE;
use fish_wchar::{L, WString, wstr};
use rand::{Rng, RngCore};
#[test]

View File

@@ -10,18 +10,6 @@
time::{Duration, Instant},
};
use crate::{
ast::unescape_keyword,
autoload::AutoloadResult,
common::charptr2wcstring,
reader::{get_quote, is_backslashed},
util::wcsfilecmp,
wcstringutil::{string_suffixes_string_case_insensitive, strip_executable_suffix},
wutil::{LocalizableString, localizable_string},
};
use bitflags::bitflags;
use once_cell::sync::Lazy;
use crate::{
abbrs::with_abbrs,
autoload::Autoload,
@@ -49,7 +37,6 @@
path::{path_get_path, path_try_get_path},
tokenizer::{Tok, TokFlags, TokenType, Tokenizer, variable_assignment_equals_pos},
wchar::prelude::*,
wchar_ext::WExt,
wcstringutil::{
StringFuzzyMatch, string_fuzzy_match_string, string_prefixes_string,
string_prefixes_string_case_insensitive,
@@ -57,6 +44,18 @@
wildcard::{wildcard_complete, wildcard_has, wildcard_match},
wutil::wrealpath,
};
use crate::{
ast::unescape_keyword,
autoload::AutoloadResult,
common::charptr2wcstring,
reader::{get_quote, is_backslashed},
util::wcsfilecmp,
wcstringutil::{string_suffixes_string_case_insensitive, strip_executable_suffix},
wutil::{LocalizableString, localizable_string},
};
use bitflags::bitflags;
use fish_wchar::WExt;
use once_cell::sync::Lazy;
// Completion description strings, mostly for different types of files, such as sockets, block
// devices, etc.

2
src/env/var.rs vendored
View File

@@ -1,7 +1,7 @@
use crate::signal::Signal;
use crate::wchar::{L, WString, wstr};
use crate::wcstringutil::join_strings;
use bitflags::bitflags;
use fish_wchar::{L, WString, wstr};
use libc::c_int;
use std::collections::HashMap;
use std::sync::Arc;

View File

@@ -5,9 +5,10 @@
use crate::flog::{flog, flogf};
use crate::fs::{PotentialUpdate, lock_and_load, rewrite_via_temporary_file};
use crate::path::path_get_config;
use crate::wchar::{decode_byte_from_char, prelude::*};
use crate::wchar::prelude::*;
use crate::wcstringutil::{LineIterator, join_strings};
use crate::wutil::{FileId, INVALID_FILE_ID, file_id_for_file, file_id_for_path_narrow, wrealpath};
use fish_wchar::decode_byte_from_char;
use std::collections::HashSet;
use std::collections::hash_map::Entry;
use std::ffi::CString;

View File

@@ -44,9 +44,9 @@
use crate::trace::trace_if_enabled_with_args;
use crate::tty_handoff::TtyHandoff;
use crate::wchar::prelude::*;
use crate::wchar_ext::ToWString;
use crate::wutil::{fish_wcstol, perror};
use errno::{errno, set_errno};
use fish_wchar::ToWString;
use libc::{
EACCES, ENOENT, ENOEXEC, ENOTDIR, EPIPE, EXIT_FAILURE, EXIT_SUCCESS, STDERR_FILENO,
STDIN_FILENO, STDOUT_FILENO,

View File

@@ -12,8 +12,6 @@
use crate::path::path_apply_working_directory;
use crate::redirection::RedirectionMode;
use crate::threads::assert_is_background_thread;
use crate::wchar::{L, WString, wstr};
use crate::wchar_ext::WExt;
use crate::wcstringutil::{
string_prefixes_string, string_prefixes_string_case_insensitive, string_suffixes_string,
};
@@ -21,6 +19,7 @@
use crate::wutil::{
dir_iter::DirIter, fish_wcstoi, normalize_path, waccess, wbasename, wdirname, wstat,
};
use fish_wchar::{L, WExt, WString, wstr};
use libc::PATH_MAX;
use std::collections::{HashMap, HashSet};
use std::os::fd::RawFd;

View File

@@ -30,9 +30,8 @@
use crate::text_face::{SpecifiedTextFace, TextFace, UnderlineStyle, parse_text_face};
use crate::threads::assert_is_background_thread;
use crate::tokenizer::{PipeOrRedir, variable_assignment_equals_pos};
use crate::wchar::{L, WString, wstr};
use crate::wchar_ext::WExt;
use crate::wcstringutil::string_prefixes_string;
use fish_wchar::{L, WExt, WString, wstr};
use std::collections::HashMap;
use std::collections::hash_map::Entry;

View File

@@ -16,8 +16,9 @@
maybe_set_kitty_keyboard_capability, maybe_set_scroll_content_up_capability,
};
use crate::universal_notifier::default_notifier;
use crate::wchar::{encode_byte_to_char, prelude::*};
use crate::wchar::prelude::*;
use crate::wutil::{fish_is_pua, fish_wcstol};
use fish_wchar::encode_byte_to_char;
use std::cell::{RefCell, RefMut};
use std::collections::VecDeque;
use std::mem::MaybeUninit;

View File

@@ -6,9 +6,10 @@
flog::FloggableDebug,
future_feature_flags::{FeatureFlag, test as feature_test},
reader::safe_get_terminal_mode_on_startup,
wchar::{decode_byte_from_char, prelude::*},
wchar::prelude::*,
wutil::fish_wcstoul,
};
use fish_wchar::decode_byte_from_char;
pub(crate) const Backspace: char = '\u{F500}'; // below ENCODE_DIRECT_BASE
pub(crate) const Delete: char = '\u{F501}';

View File

@@ -75,7 +75,6 @@
pub mod util;
pub mod wait_handle;
pub mod wchar;
pub mod wchar_ext;
pub mod wcstringutil;
pub mod wgetopt;
pub mod wildcard;

View File

@@ -49,8 +49,8 @@
use crate::tokenizer::{PipeOrRedir, TokenType, variable_assignment_equals_pos};
use crate::trace::{trace_if_enabled, trace_if_enabled_with_args};
use crate::wchar::prelude::*;
use crate::wchar_ext::WExt;
use crate::wildcard::wildcard_match;
use fish_wchar::WExt;
use libc::{ENOTDIR, EXIT_SUCCESS, STDERR_FILENO, STDOUT_FILENO, c_int};
use std::io::ErrorKind;
use std::rc::Rc;

View File

@@ -32,9 +32,9 @@
use crate::util::get_time;
use crate::wait_handle::WaitHandleStore;
use crate::wchar::prelude::*;
use crate::wchar_ext::WExt;
use crate::wutil::perror;
use crate::{flog, flogf, function};
use fish_wchar::WExt;
use libc::c_int;
#[cfg(not(target_has_atomic = "64"))]
use portable_atomic::AtomicU64;

View File

@@ -20,9 +20,9 @@
use crate::topic_monitor::{GenerationsList, Topic, topic_monitor_principal};
use crate::wait_handle::{InternalJobId, WaitHandle, WaitHandleRef, WaitHandleStore};
use crate::wchar::prelude::*;
use crate::wchar_ext::ToWString;
use crate::wutil::{wbasename, wperror};
use cfg_if::cfg_if;
use fish_wchar::ToWString;
use libc::{
_SC_CLK_TCK, EXIT_SUCCESS, SIG_DFL, SIG_IGN, SIGABRT, SIGBUS, SIGCONT, SIGFPE, SIGHUP, SIGILL,
SIGINT, SIGKILL, SIGPIPE, SIGQUIT, SIGSEGV, SIGSYS, SIGTTOU, WCONTINUED, WEXITSTATUS,

View File

@@ -23,8 +23,8 @@
use crate::fd_readable_set::{FdReadableSet, Timeout};
use crate::fds::{self, AutoClosePipes, make_fd_nonblocking};
use crate::flog::{FloggableDebug, flog};
use crate::wchar::WString;
use crate::wutil::perror;
use fish_wchar::WString;
use nix::errno::Errno;
use nix::unistd;
use std::cell::Cell;

View File

@@ -17,8 +17,8 @@
use crate::terminal::{Output, Outputter};
use crate::threads::assert_is_main_thread;
use crate::wchar::prelude::*;
use crate::wchar_ext::ToWString;
use crate::wutil::{perror, wcstoi};
use fish_wchar::ToWString;
use libc::{EINVAL, ENOTTY, EPERM, STDIN_FILENO, WNOHANG};
use once_cell::sync::OnceCell;
use std::mem::MaybeUninit;

View File

@@ -68,10 +68,8 @@ fn notification_fd_became_readable(&self, fd: RawFd) -> bool {
#[cfg(test)]
mod tests {
use super::InotifyNotifier;
use crate::{
universal_notifier::{UniversalNotifier, test_helpers::test_notifiers},
wchar::WString,
};
use crate::universal_notifier::{UniversalNotifier, test_helpers::test_notifiers};
use fish_wchar::WString;
#[test]
fn test_inotify_notifiers() {

View File

@@ -181,10 +181,8 @@ fn notification_fd_became_readable(&self, fd: RawFd) -> bool {
#[cfg(test)]
mod tests {
use super::KqueueNotifier;
use crate::{
universal_notifier::{UniversalNotifier, test_helpers::test_notifiers},
wchar::WString,
};
use crate::universal_notifier::{UniversalNotifier, test_helpers::test_notifiers};
use fish_wchar::WString;
#[test]
fn test_kqueue_notifiers() {

View File

@@ -1,58 +1,7 @@
//! Support for wide strings.
//!
//! There are two wide string types that are commonly used:
//! - wstr: a string slice without a nul terminator. Like `&str` but wide chars.
//! - WString: an owning string without a nul terminator. Like `String` but wide chars.
use fish_common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END};
pub use widestring::{Utf32Str as wstr, Utf32String as WString};
/// Pull in our extensions.
pub use crate::wchar_ext::IntoCharIter;
pub mod prelude {
pub use crate::{
wchar::{IntoCharIter, L, WString, wstr},
wchar_ext::{ToWString, WExt},
wutil::{
LocalizableString, eprintf, localizable_consts, localizable_string, sprintf, wgettext,
wgettext_fmt,
},
pub use crate::wutil::{
LocalizableString, eprintf, localizable_consts, localizable_string, sprintf, wgettext,
wgettext_fmt,
};
}
/// Creates a wstr string slice, like the "L" prefix of C++.
/// The result is of type wstr.
/// It is NOT nul-terminated.
#[macro_export]
macro_rules! L {
($string:expr) => {
widestring::utf32str!($string)
};
}
pub use L;
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
/// as a UTF-32 character, printing them would result in several characters instead of one UTF-8
/// character.
///
/// See <https://github.com/fish-shell/fish-shell/issues/1894>.
pub fn encode_byte_to_char(byte: u8) -> char {
char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
.expect("private-use codepoint should be valid char")
}
/// Decode a literal byte from a UTF-32 character.
pub fn decode_byte_from_char(c: char) -> Option<u8> {
if c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END {
Some(
(u32::from(c) - u32::from(ENCODE_DIRECT_BASE))
.try_into()
.unwrap(),
)
} else {
None
}
pub use fish_wchar::prelude::*;
}

View File

@@ -2,7 +2,8 @@
use crate::common::{get_ellipsis_char, get_ellipsis_str};
use crate::fallback::{fish_wcwidth, wcscasecmp, wcscasecmp_fuzzy};
use crate::wchar::{decode_byte_from_char, prelude::*};
use crate::wchar::prelude::*;
use fish_wchar::decode_byte_from_char;
/// Return the number of newlines in a string.
pub fn count_newlines(s: &wstr) -> usize {

View File

@@ -1,8 +1,8 @@
use super::wopendir;
use crate::common::{bytes2wcstring, wcs2zstring};
use crate::wchar::{WString, wstr};
use crate::wutil::DevInode;
use cfg_if::cfg_if;
use fish_wchar::{WString, wstr};
use libc::{
DT_BLK, DT_CHR, DT_DIR, DT_FIFO, DT_LNK, DT_REG, DT_SOCK, EACCES, EIO, ELOOP, ENAMETOOLONG,
ENODEV, ENOENT, ENOTDIR, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG,
@@ -329,6 +329,7 @@ fn next(&mut self) -> Option<Self::Item> {
mod tests {
use super::{DirEntryType, DirIter};
use crate::wchar::prelude::*;
use fish_wchar::L;
use nix::sys::stat::Mode;
use std::fs::File;
use std::path::PathBuf;
@@ -336,14 +337,12 @@ mod tests {
#[test]
fn test_dir_iter_bad_path() {
// Regression test: DirIter does not crash given a bad path.
use crate::wchar::L;
let dir = DirIter::new(L!("/a/bogus/path/which/does/notexist"));
assert!(dir.is_err());
}
#[test]
fn test_no_dots() {
use crate::wchar::L;
// DirIter does not return . or .. by default.
let dir = DirIter::new(L!(".")).expect("Should be able to open CWD");
for entry in dir {
@@ -355,7 +354,6 @@ fn test_no_dots() {
#[test]
fn test_dots() {
use crate::wchar::L;
// DirIter returns . or .. if you ask nicely.
let dir = DirIter::new_with_dots(L!(".")).expect("Should be able to open CWD");
let mut seen_dot = false;
@@ -375,7 +373,6 @@ fn test_dots() {
#[test]
#[allow(clippy::if_same_then_else)]
fn test_dir_iter() {
use crate::wchar::L;
use libc::{EACCES, ENOENT};
let baditer = DirIter::new(L!("/definitely/not/a/valid/directory/for/sure"));

View File

@@ -11,11 +11,10 @@
use crate::common::{
bytes2wcstring, fish_reserved_codepoint, wcs2bytes, wcs2osstring, wcs2zstring,
};
use crate::wchar::{L, WString, wstr};
use crate::wchar_ext::WExt;
use crate::wcstringutil::{join_strings, wcs2bytes_callback};
use crate::{fallback, flog};
use errno::errno;
use fish_wchar::{L, WExt, WString, wstr};
pub use gettext::{
LocalizableString, localizable_consts, localizable_string, wgettext, wgettext_fmt,
};
@@ -484,7 +483,7 @@ mod tests {
mod test_path_normalize_for_cd {
use super::super::path_normalize_for_cd;
use crate::wchar::L;
use fish_wchar::L;
#[test]
fn relative_path() {
@@ -694,7 +693,7 @@ fn test_wwrite_to_fd() {
#[test]
fn test_wstr_offset_in() {
use crate::wchar::L;
use fish_wchar::L;
let base = L!("hello world");
assert_eq!(wstr_offset_in(&base[6..], base), 6);
assert_eq!(wstr_offset_in(&base[0..], base), 0);

View File

@@ -1,6 +1,6 @@
use super::errors::Error;
use super::hex_float;
use crate::wchar::IntoCharIter;
use fish_wchar::IntoCharIter;
// Parse a decimal float from a sequence of characters.
// Return the parsed float, and (on success) the number of characters consumed.