diff --git a/Cargo.lock b/Cargo.lock index 6b84bf11e..a9852c068 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -184,7 +184,6 @@ dependencies = [ "serial_test", "terminfo", "unix_path", - "widestring", "xterm-color", ] @@ -208,9 +207,9 @@ name = "fish-common" version = "0.0.0" dependencies = [ "bitflags", + "fish-wchar", "libc", "nix", - "widestring", ] [[package]] @@ -280,7 +279,6 @@ dependencies = [ name = "fish-wchar" version = "0.0.0" dependencies = [ - "fish-common", "widestring", ] diff --git a/Cargo.toml b/Cargo.toml index 0f18405ca..e461f1db7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,7 +111,6 @@ pcre2.workspace = true rand.workspace = true terminfo.workspace = true xterm-color.workspace = true -widestring.workspace = true [target.'cfg(not(target_has_atomic = "64"))'.dependencies] portable-atomic.workspace = true @@ -187,7 +186,7 @@ rustdoc.private_intra_doc_links = "allow" assigning_clones = "warn" implicit_clone = "warn" cloned_instead_of_copied = "warn" -len_without_is_empty = "allow" # we're not a library crate +len_without_is_empty = "allow" # we're not a library crate let_and_return = "allow" manual_range_contains = "allow" map_unwrap_or = "warn" diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 351bb8734..479015893 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -8,9 +8,9 @@ license.workspace = true [dependencies] bitflags.workspace = true +fish-wchar.workspace = true libc.workspace = true nix.workspace = true -widestring.workspace = true [lints] workspace = true diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index fa066e4a1..c84aaf48a 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -1,4 +1,5 @@ use bitflags::bitflags; +use fish_wchar::{L, char_offset, wstr}; use libc::{SIG_IGN, SIGTTOU, STDIN_FILENO}; use std::cell::{Cell, RefCell}; use std::io::Read; @@ -8,7 +9,6 @@ use std::sync::OnceLock; use std::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use std::{env, mem, time}; -use widestring::Utf32Str as wstr; pub const PACKAGE_NAME: &str = env!("CARGO_PKG_NAME"); @@ -40,21 +40,6 @@ // Unicode range for our needs. const _: () = assert!(WILDCARD_RESERVED_END <= RESERVED_CHAR_END); -// These are in the Unicode private-use range. We really shouldn't use this -// range but have little choice in the matter given how our lexer/parser works. -// We can't use non-characters for these two ranges because there are only 66 of -// them and we need at least 256 + 64. -// -// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that -// would result in fish having different behavior on machines with 16 versus 32 -// bit wchar_t. It's better that fish behave the same on both types of systems. -// -// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know -// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF) -// on Mac OS X. See http://www.unicode.org/faq/private_use.html. -pub const ENCODE_DIRECT_BASE: char = '\u{F600}'; -pub const ENCODE_DIRECT_END: char = char_offset(ENCODE_DIRECT_BASE, 256); - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum EscapeStringStyle { Script(EscapeFlags), @@ -78,7 +63,7 @@ fn try_from(s: &wstr) -> Result { s if s == "var" => Ok(Var), s if s == "url" => Ok(Url), s if s == "regex" => Ok(Regex), - _ => Err(widestring::utf32str!("Invalid escape style")), + _ => Err(L!("Invalid escape style")), } } } @@ -124,7 +109,7 @@ fn try_from(s: &wstr) -> Result { s if s == "script" => Ok(Self::default()), s if s == "var" => Ok(Var), s if s == "url" => Ok(Url), - _ => Err(widestring::utf32str!("Invalid escape style")), + _ => Err(L!("Invalid escape style")), } } } @@ -142,20 +127,6 @@ pub struct UnescapeFlags: u32 { } } -pub const fn char_offset(base: char, offset: u32) -> char { - match char::from_u32(base as u32 + offset) { - Some(c) => c, - None => panic!("not a valid char"), - } -} - -pub fn subslice_position(a: &[T], b: &[T]) -> Option { - if b.is_empty() { - return Some(0); - } - a.windows(b.len()).position(|aw| aw == b) -} - /// This function attempts to distinguish between a console session (at the actual login vty) and a /// session within a terminal emulator inside a desktop environment or over SSH. Unfortunately /// there are few values of $TERM that we can interpret as being exclusively console sessions, and @@ -196,7 +167,7 @@ pub fn get_ellipsis_char() -> char { /// The character or string to use where text has been truncated (ellipsis if possible, otherwise /// ...) pub fn get_ellipsis_str() -> &'static wstr { - widestring::utf32str!("\u{2026}") + L!("\u{2026}") } // Only pub for `src/common.rs` @@ -570,7 +541,7 @@ pub const fn assert_sync() {} /// # Examples /// /// ``` -/// use widestring::{utf32str as L,Utf32Str as wstr}; +/// use fish_wchar::{L, wstr}; /// use fish_common::assert_sorted_by_name; /// /// const COLORS: &[(&wstr, u32)] = &[ @@ -586,7 +557,7 @@ pub const fn assert_sync() {} /// While this example would fail to compile: /// /// ```compile_fail -/// use widestring::{utf32str as L,Utf32Str as wstr}; +/// use fish_wchar::{L, wstr}; /// use fish_common::assert_sorted_by_name; /// /// const COLORS: &[(&wstr, u32)] = &[ @@ -728,7 +699,6 @@ fn test_scope_guard() { #[test] fn test_truncate_at_nul() { - use widestring::utf32str as L; assert_eq!(truncate_at_nul(L!("abc\0def")), L!("abc")); assert_eq!(truncate_at_nul(L!("abc")), L!("abc")); assert_eq!(truncate_at_nul(L!("\0abc")), L!("")); diff --git a/crates/wchar/Cargo.toml b/crates/wchar/Cargo.toml index 1525719fe..5633925cd 100644 --- a/crates/wchar/Cargo.toml +++ b/crates/wchar/Cargo.toml @@ -7,7 +7,6 @@ repository.workspace = true license.workspace = true [dependencies] -fish-common.workspace = true widestring.workspace = true [lints] diff --git a/crates/wchar/src/lib.rs b/crates/wchar/src/lib.rs index 23471efb1..629705833 100644 --- a/crates/wchar/src/lib.rs +++ b/crates/wchar/src/lib.rs @@ -6,7 +6,6 @@ pub mod word_char; -use fish_common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END, subslice_position}; use std::{iter, slice}; pub use widestring::{Utf32Str as wstr, Utf32String as WString, utf32str as L, utfstr::CharsUtf32}; @@ -14,6 +13,21 @@ pub mod prelude { pub use crate::{IntoCharIter, L, ToWString, WExt, WString, wstr}; } +// These are in the Unicode private-use range. We really shouldn't use this +// range but have little choice in the matter given how our lexer/parser works. +// We can't use non-characters for these two ranges because there are only 66 of +// them and we need at least 256 + 64. +// +// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that +// would result in fish having different behavior on machines with 16 versus 32 +// bit wchar_t. It's better that fish behave the same on both types of systems. +// +// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know +// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF) +// on Mac OS X. See http://www.unicode.org/faq/private_use.html. +pub const ENCODE_DIRECT_BASE: char = '\u{F600}'; +pub const ENCODE_DIRECT_END: char = char_offset(ENCODE_DIRECT_BASE, 256); + /// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose /// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g. /// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it @@ -39,6 +53,20 @@ pub fn decode_byte_from_char(c: char) -> Option { } } +pub const fn char_offset(base: char, offset: u32) -> char { + match char::from_u32(base as u32 + offset) { + Some(c) => c, + None => panic!("not a valid char"), + } +} + +pub fn subslice_position(a: &[T], b: &[T]) -> Option { + if b.is_empty() { + return Some(0); + } + a.windows(b.len()).position(|aw| aw == b) +} + /// Helpers to convert things to widestring. /// This is like std::string::ToString. pub trait ToWString { diff --git a/src/builtins/math.rs b/src/builtins/math.rs index 332d8c91a..8fc3f5a06 100644 --- a/src/builtins/math.rs +++ b/src/builtins/math.rs @@ -1,5 +1,5 @@ +use fish_wchar::L; use num_traits::pow; -use widestring::utf32str; use super::prelude::*; use crate::tinyexpr::te_interp; @@ -78,13 +78,13 @@ fn parse_cmd_opts( } 'm' => { let optarg = w.woptarg.unwrap(); - if optarg.eq(utf32str!("truncate")) || optarg.eq(utf32str!("trunc")) { + if optarg.eq(L!("truncate")) || optarg.eq(L!("trunc")) { opts.scale_mode = ScaleMode::Truncate; - } else if optarg.eq(utf32str!("round")) { + } else if optarg.eq(L!("round")) { opts.scale_mode = ScaleMode::Round; - } else if optarg.eq(utf32str!("floor")) { + } else if optarg.eq(L!("floor")) { opts.scale_mode = ScaleMode::Floor; - } else if optarg.eq(utf32str!("ceiling")) || optarg.eq(utf32str!("ceil")) { + } else if optarg.eq(L!("ceiling")) || optarg.eq(L!("ceil")) { opts.scale_mode = ScaleMode::Ceiling; } else { streams diff --git a/src/common.rs b/src/common.rs index dd100f400..9d153aa65 100644 --- a/src/common.rs +++ b/src/common.rs @@ -16,7 +16,9 @@ use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE}; use crate::wutil::fish_iswalnum; use fish_fallback::fish_wcwidth; -use fish_wchar::{decode_byte_from_char, encode_byte_to_char}; +use fish_wchar::{ + ENCODE_DIRECT_END, decode_byte_from_char, encode_byte_to_char, subslice_position, +}; use std::env; use std::ffi::{CStr, CString, OsString}; use std::os::unix::prelude::*; @@ -1381,8 +1383,7 @@ mod tests { bytes2wcstring, escape_string, unescape_string, wcs2bytes, }; use crate::util::get_seeded_rng; - use fish_common::ENCODE_DIRECT_BASE; - use fish_wchar::{L, WString, wstr}; + use fish_wchar::{ENCODE_DIRECT_BASE, L, WString, wstr}; use rand::{Rng, RngCore}; #[test] diff --git a/src/env_universal_common.rs b/src/env_universal_common.rs index c199b622d..d14bfbf12 100644 --- a/src/env_universal_common.rs +++ b/src/env_universal_common.rs @@ -807,9 +807,8 @@ fn skip_spaces(mut s: &wstr) -> &wstr { #[cfg(test)] mod tests { - use fish_common::ENCODE_DIRECT_BASE; - use fish_common::char_offset; use fish_tempfile::TempDir; + use fish_wchar::{ENCODE_DIRECT_BASE, char_offset}; use crate::common::bytes2wcstring; use crate::common::wcs2osstring; diff --git a/src/expand.rs b/src/expand.rs index f120d9344..5b1c1a506 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -30,7 +30,8 @@ use crate::wildcard::{wildcard_expand_string, wildcard_has_internal}; use crate::wutil::{Options, normalize_path, wcstoi_partial}; use bitflags::bitflags; -use fish_common::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END, char_offset}; +use fish_common::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END}; +use fish_wchar::char_offset; use std::mem::MaybeUninit; bitflags! { diff --git a/src/history/yaml_backend.rs b/src/history/yaml_backend.rs index a8c89e488..face6a9a0 100644 --- a/src/history/yaml_backend.rs +++ b/src/history/yaml_backend.rs @@ -5,7 +5,7 @@ time::{Duration, SystemTime, UNIX_EPOCH}, }; -use fish_common::subslice_position; +use fish_wchar::subslice_position; use super::{HistoryItem, PersistenceMode}; use crate::{common::bytes2wcstring, flog::flog}; diff --git a/src/localization/gettext.rs b/src/localization/gettext.rs index c75f6c1ec..a2da55117 100644 --- a/src/localization/gettext.rs +++ b/src/localization/gettext.rs @@ -119,7 +119,7 @@ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { #[cfg(feature = "gettext-extract")] macro_rules! localizable_string { ($string:literal) => { - $crate::localization::LocalizableString::Static(widestring::utf32str!( + $crate::localization::LocalizableString::Static(fish_wchar::L!( fish_gettext_extraction::gettext_extract!($string) )) }; @@ -128,7 +128,7 @@ macro_rules! localizable_string { #[cfg(not(feature = "gettext-extract"))] macro_rules! localizable_string { ($string:literal) => { - $crate::localization::LocalizableString::Static(widestring::utf32str!($string)) + $crate::localization::LocalizableString::Static(fish_wchar::L!($string)) }; } pub use localizable_string; diff --git a/src/wcstringutil.rs b/src/wcstringutil.rs index 31ca4f343..f6f04b116 100644 --- a/src/wcstringutil.rs +++ b/src/wcstringutil.rs @@ -213,7 +213,7 @@ pub fn try_create( // Helper to lazily compute if case insensitive matches should use icase or smartcase. // Use icase if the input contains any uppercase characters, smartcase otherwise. #[inline(always)] - fn get_case_fold(s: &widestring::Utf32Str) -> CaseSensitivity { + fn get_case_fold(s: &wstr) -> CaseSensitivity { if s.chars().any(|c| c.is_uppercase()) { CaseSensitivity::Insensitive } else { diff --git a/src/wildcard.rs b/src/wildcard.rs index ca2b13cf0..c0219014b 100644 --- a/src/wildcard.rs +++ b/src/wildcard.rs @@ -1,6 +1,7 @@ // Enumeration of all wildcard types. -use fish_common::{WILDCARD_RESERVED_BASE, char_offset}; +use fish_common::WILDCARD_RESERVED_BASE; +use fish_wchar::char_offset; use libc::X_OK; use std::cmp::Ordering; use std::collections::HashSet; diff --git a/src/wutil/printf.rs b/src/wutil/printf.rs index 9ee890f6b..0fb6750c2 100644 --- a/src/wutil/printf.rs +++ b/src/wutil/printf.rs @@ -4,7 +4,7 @@ macro_rules! sprintf { // Allow a `&str` or `&Utf32Str` as a format, and return a `Utf32String`. ($fmt:expr $(, $arg:expr)* $(,)?) => { { - let mut target = widestring::Utf32String::new(); + let mut target = fish_wchar::WString::new(); $crate::sprintf!(=> &mut target, $fmt, $($arg),*); target }