From 1d159277c658ae17b0710257062888f48b87a4a0 Mon Sep 17 00:00:00 2001 From: Mahmoud Al-Qudsi Date: Sat, 1 Jun 2024 11:15:19 -0500 Subject: [PATCH] Move Block fields specific to certain block types to separate enum This has a few advantages, * We now statically assert that all fields used by a particular block type are correctly initialized (i.e. you can't assign the function name but forget to assign its arguments), * Conversely, we can match directly on `BlockData` and be guaranteed that the fields we want to access are initialized and present, * We reduce the number of assertions, effectively "unwrapping" only once based off the block type instead of each time we try to access a conditional field, * We reduce the size of the `Block` struct by coalescing fields that cannot co-exist, bringing it down from 104 bytes to 88 bytes. It would be nice to make all of `Block` itself an enum, but it currently requires `Copy` and we take advantage of that to copy it around everywhere. Putting these fields directly in `Block` directly would mean a lot more memory traffic just checking block types. --- src/parse_execution.rs | 17 +- src/parser.rs | 89 ++++-- src/wutil/mod.rs2 | 692 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 757 insertions(+), 41 deletions(-) create mode 100644 src/wutil/mod.rs2 diff --git a/src/parse_execution.rs b/src/parse_execution.rs index 06da5979d..a56b84f98 100644 --- a/src/parse_execution.rs +++ b/src/parse_execution.rs @@ -33,7 +33,7 @@ }; use crate::parse_tree::{NodeRef, ParsedSourceRef}; use crate::parse_util::parse_util_unescape_wildcards; -use crate::parser::{Block, BlockId, BlockType, LoopStatus, Parser, ProfileItem}; +use crate::parser::{Block, BlockData, BlockId, BlockType, LoopStatus, Parser, ProfileItem}; use crate::parser_keywords::parser_keywords_is_subcommand; use crate::path::{path_as_implicit_cd, path_try_get_path}; use crate::pointer::ConstPointer; @@ -441,19 +441,22 @@ fn infinite_recursive_statement_in_job_list<'b>( // not inside a block in that function call. If, in the future, the rules for what // block scopes are pushed on function invocation changes, then this check will break. let parser = ctx.parser(); - let parent = { + let parent; + let parent_fn_name = { match (parser.block_at_index(0), parser.block_at_index(1)) { - (Some(current), Some(parent)) - if current.typ() == BlockType::top && parent.is_function_call() => - { - parent + (Some(current), Some(p)) if current.typ() == BlockType::top => { + parent = p; + match &parent.data { + BlockData::Function { name, .. } => name, + _ => return None, + } } _ => return None, // Not within function call. } }; // Get the function name of the immediate block. - let forbidden_function_name = &parent.function_name; + let forbidden_function_name = parent_fn_name; // Get the first job in the job list. let jc = &jobs.get(0)?; diff --git a/src/parser.rs b/src/parser.rs index af223599e..0453f87f6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -43,27 +43,37 @@ Arc, }; +#[derive(Default)] +pub enum BlockData { + #[default] + None, + Function { + /// Name of the function + name: WString, + /// Arguments passed to the function + args: Vec, + }, + Event(Rc), + Source { + /// The sourced file + file: Arc, + }, +} + /// block_t represents a block of commands. #[derive(Default)] pub struct Block { - /// If this is a function block, the function name. Otherwise empty. - pub function_name: WString, + /// [`BlockType`]-specific data. + /// + /// Ideally this would be coalesced into `BlockType` but we currently require that to implement + /// `Copy`, so now we have to awkwardly deal with a discriminant stored separately. + pub data: BlockData, /// List of event blocks. pub event_blocks: u64, - /// If this is a function block, the function args. Otherwise empty. - pub function_args: Vec, - - /// Name of file that created this block. - pub src_filename: Option, - - // If this is an event block, the event. Otherwise ignored. - pub event: Option>, - - // If this is a source block, the source'd file, interned. - // Otherwise nothing. - pub sourced_file: Option, + /// Name of the file that created this block + pub src_filename: Option>, /// Line number where this block was created. pub src_lineno: Option, @@ -133,18 +143,17 @@ pub fn if_block() -> Block { } pub fn event_block(event: Event) -> Block { let mut b = Block::new(BlockType::event); - b.event = Some(Rc::new(event)); + b.data = BlockData::Event(Rc::new(event)); b } pub fn function_block(name: WString, args: Vec, shadows: bool) -> Block { let mut b = Block::new(BlockType::function_call { shadows }); - b.function_name = name; - b.function_args = args; + b.data = BlockData::Function { name, args }; b } pub fn source_block(src: FilenameRef) -> Block { let mut b = Block::new(BlockType::source); - b.sourced_file = Some(src); + b.data = BlockData::Source { file: src }; b } pub fn for_block() -> Block { @@ -843,8 +852,10 @@ pub fn get_function_name(&self, level: i32) -> Option { .iter() .rev() .skip_while(|b| b.typ() != BlockType::breakpoint) - .find(|b| b.is_function_call()) - .map(|b| b.function_name.clone()); + .find_map(|b| match &b.data { + BlockData::Function { name, .. } => Some(name.clone()), + _ => None, + }); } self.blocks() @@ -863,7 +874,12 @@ pub fn get_function_name(&self, level: i32) -> Option { }) .skip_while(|(_, l)| *l != level) .inspect(|(b, _)| debug_assert!(b.is_function_call())) - .map(|(b, _)| b.function_name.clone()) + .map(|(b, _)| { + let BlockData::Function { name, .. } = &b.data else { + unreachable!() + }; + name.clone() + }) .next() } @@ -994,15 +1010,12 @@ pub fn current_filename(&self) -> Option { self.blocks() .iter() .rev() - .find_map(|b| { - if b.is_function_call() { - function::get_props(&b.function_name) - .and_then(|props| props.definition_file.clone()) - } else if b.typ() == BlockType::source { - b.sourced_file.clone() - } else { - None + .find_map(|b| match &b.data { + BlockData::Function { name, .. } => { + function::get_props(name).and_then(|props| props.definition_file.clone()) } + BlockData::Source { file } => Some(file.clone()), + _ => None, }) .or_else(|| self.libdata().current_filename.clone()) } @@ -1120,10 +1133,13 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m let mut print_call_site = false; match b.typ() { BlockType::function_call { .. } => { - trace.push_utfstr(&wgettext_fmt!("in function '%ls'", &b.function_name)); + let BlockData::Function { name, args, .. } = &b.data else { + unreachable!() + }; + trace.push_utfstr(&wgettext_fmt!("in function '%ls'", name)); // Print arguments on the same line. let mut args_str = WString::new(); - for arg in &b.function_args { + for arg in args { if !args_str.is_empty() { args_str.push(' '); } @@ -1150,7 +1166,10 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m print_call_site = true; } BlockType::source => { - let source_dest = b.sourced_file.as_ref().unwrap(); + let BlockData::Source { file, .. } = &b.data else { + unreachable!() + }; + let source_dest = file; trace.push_utfstr(&wgettext_fmt!( "from sourcing file %ls\n", &user_presentable_path(source_dest, parser.vars()) @@ -1158,8 +1177,10 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m print_call_site = true; } BlockType::event => { - let description = - event::get_desc(parser, b.event.as_ref().expect("Should have an event")); + let BlockData::Event(event) = &b.data else { + unreachable!() + }; + let description = event::get_desc(parser, event); trace.push_utfstr(&wgettext_fmt!("in event handler: %ls\n", &description)); print_call_site = true; } diff --git a/src/wutil/mod.rs2 b/src/wutil/mod.rs2 new file mode 100644 index 000000000..c35cf55c3 --- /dev/null +++ b/src/wutil/mod.rs2 @@ -0,0 +1,692 @@ +pub mod dir_iter; +pub mod encoding; +pub mod errors; +pub mod fileid; +pub mod gettext; +pub mod printf; +#[cfg(test)] +mod tests; +pub mod wcstod; +pub mod wcstoi; + +use crate::common::{ + cstr2wcstring, fish_reserved_codepoint, str2wcstring, wcs2osstring, wcs2string, wcs2zstring, +}; +use crate::fallback; +use crate::fds::AutoCloseFd; +use crate::flog::FLOGF; +use crate::wchar::{wstr, WString, L}; +use crate::wchar_ext::WExt; +use crate::wcstringutil::{join_strings, wcs2string_callback}; +use errno::errno; +pub use gettext::{wgettext, wgettext_fmt, wgettext_maybe_fmt, wgettext_str}; +pub use printf::sprintf; +use std::ffi::{CStr, OsStr}; +use std::fs::{self, canonicalize}; +use std::io::{self, Write}; +use std::os::unix::prelude::*; + +pub use wcstoi::*; + +/// Wide character version of opendir(). Note that opendir() is guaranteed to set close-on-exec by +/// POSIX (hooray). +pub fn wopendir(name: &wstr) -> *mut libc::DIR { + let tmp = wcs2zstring(name); + unsafe { libc::opendir(tmp.as_ptr()) } +} + +/// Wide character version of stat(). +pub fn wstat(file_name: &wstr) -> io::Result { + let tmp = wcs2osstring(file_name); + fs::metadata(tmp) +} + +/// Wide character version of lstat(). +pub fn lwstat(file_name: &wstr) -> io::Result { + let tmp = wcs2osstring(file_name); + fs::symlink_metadata(tmp) +} + +/// Wide character version of access(). +pub fn waccess(file_name: &wstr, mode: libc::c_int) -> libc::c_int { + let tmp = wcs2zstring(file_name); + unsafe { libc::access(tmp.as_ptr(), mode) } +} + +/// Wide character version of unlink(). +pub fn wunlink(file_name: &wstr) -> libc::c_int { + let tmp = wcs2zstring(file_name); + unsafe { libc::unlink(tmp.as_ptr()) } +} + +pub fn wperror(s: &wstr) { + let bytes = wcs2string(s); + // We can't guarantee the string is 100% Unicode (why?), so we don't use std::str::from_utf8() + let s = OsStr::from_bytes(&bytes).to_string_lossy(); + perror(&s) +} + +/// Port of the wide-string wperror from `src/wutil.cpp` but for rust `&str`. +pub fn perror(s: &str) { + let e = errno().0; + let mut stderr = std::io::stderr().lock(); + if !s.is_empty() { + let _ = write!(stderr, "{s}: "); + } + let slice = unsafe { + let msg = libc::strerror(e) as *const u8; + let len = libc::strlen(msg as *const _); + std::slice::from_raw_parts(msg, len) + }; + let _ = stderr.write_all(slice); + let _ = stderr.write_all(b"\n"); +} + +pub fn perror_io(s: &str, e: &io::Error) { + eprintln!("{}: {}", s, e); +} + +/// Wide character version of getcwd(). +pub fn wgetcwd() -> WString { + let mut cwd = [b'\0'; libc::PATH_MAX as usize]; + let res = unsafe { + libc::getcwd( + std::ptr::addr_of_mut!(cwd).cast(), + std::mem::size_of_val(&cwd), + ) + }; + if !res.is_null() { + return cstr2wcstring(&cwd); + } + + FLOGF!( + error, + "getcwd() failed with errno %d/%s", + errno::errno().0, + errno::errno().to_string() + ); + WString::new() +} + +/// Wide character version of readlink(). +pub fn wreadlink(file_name: &wstr) -> Option { + let md = lwstat(file_name).ok()?; + let bufsize = usize::try_from(md.len()).unwrap() + 1; + let mut target_buf = vec![b'\0'; bufsize]; + let tmp = wcs2zstring(file_name); + let nbytes = unsafe { + libc::readlink( + tmp.as_ptr(), + std::ptr::addr_of_mut!(target_buf[0]).cast(), + bufsize, + ) + }; + if nbytes == -1 { + perror("readlink"); + return None; + } + // The link might have been modified after our call to lstat. If the link now points to a path + // that's longer than the original one, we can't read everything in our buffer. Simply give + // up. We don't need to report an error since our only caller will already fall back to ENOENT. + let nbytes = usize::try_from(nbytes).unwrap(); + if nbytes == bufsize { + return None; + } + Some(str2wcstring(&target_buf[0..nbytes])) +} + +/// Wide character realpath. The last path component does not need to be valid. If an error occurs, +/// `wrealpath()` returns `None` +pub fn wrealpath(pathname: &wstr) -> Option { + if pathname.is_empty() { + return None; + } + + let mut narrow_path: Vec = wcs2zstring(pathname).into(); + + // Strip trailing slashes. This is treats "/a//" as equivalent to "/a" if /a is a non-directory. + while narrow_path.len() > 1 && narrow_path[narrow_path.len() - 1] == b'/' { + narrow_path.pop(); + } + + let narrow_res = canonicalize(OsStr::from_bytes(&narrow_path)); + + let real_path = if let Ok(result) = narrow_res { + result.into_os_string().into_vec() + } else { + // Check if everything up to the last path component is valid. + let pathsep_idx = narrow_path.iter().rposition(|&c| c == b'/'); + + if pathsep_idx == Some(0) { + // If the only pathsep is the first character then it's an absolute path with a + // single path component and thus doesn't need conversion. + narrow_path + } else { + // Only call realpath() on the portion up to the last component. + let narrow_res = if let Some(pathsep_idx) = pathsep_idx { + // Only call realpath() on the portion up to the last component. + canonicalize(OsStr::from_bytes(&narrow_path[0..pathsep_idx])) + } else { + // If there is no "/", this is a file in $PWD, so give the realpath to that. + canonicalize(".") + }; + + let Ok(narrow_result) = narrow_res else { + return None; + }; + + let pathsep_idx = pathsep_idx.map_or(0, |idx| idx + 1); + + let mut real_path = narrow_result.into_os_string().into_vec(); + + // This test is to deal with cases such as /../../x => //x. + if real_path.len() > 1 { + real_path.push(b'/'); + } + + real_path.extend_from_slice(&narrow_path[pathsep_idx..]); + + real_path + } + }; + + Some(str2wcstring(&real_path)) +} + +/// Given an input path, "normalize" it: +/// 1. Collapse multiple /s into a single /, except maybe at the beginning. +/// 2. .. goes up a level. +/// 3. Remove /./ in the middle. +pub fn normalize_path(path: &wstr, allow_leading_double_slashes: bool) -> WString { + // Count the leading slashes. + let sep = '/'; + let mut leading_slashes: usize = 0; + for c in path.chars() { + if c != sep { + break; + } + leading_slashes += 1; + } + + let comps: Vec<&wstr> = path.split(sep).collect(); + let mut new_comps = Vec::new(); + for comp in comps { + if comp.is_empty() || comp == "." { + continue; + } else if comp != ".." { + new_comps.push(comp); + } else if !new_comps.is_empty() && new_comps.last().unwrap() != ".." { + // '..' with a real path component, drop that path component. + new_comps.pop(); + } else if leading_slashes == 0 { + // We underflowed the .. and are a relative (not absolute) path. + new_comps.push(L!("..")); + } + } + let mut result = join_strings(&new_comps, sep); + // If we don't allow leading double slashes, collapse them to 1 if there are any. + let mut numslashes = if leading_slashes > 0 { 1 } else { 0 }; + // If we do, prepend one or two leading slashes. + // Yes, three+ slashes are collapsed to one. (!) + if allow_leading_double_slashes && leading_slashes == 2 { + numslashes = 2; + } + for _ in 0..numslashes { + result.insert(0, sep); + } + // Ensure ./ normalizes to . and not empty. + if result.is_empty() { + result.push('.'); + } + result +} + +#[test] +fn test_normalize_path() { + fn norm_path(path: &wstr) -> WString { + normalize_path(path, true) + } + assert_eq!(norm_path(L!("")), "."); + assert_eq!(norm_path(L!("..")), ".."); + assert_eq!(norm_path(L!("./")), "."); + assert_eq!(norm_path(L!("./.")), "."); + assert_eq!(norm_path(L!("/")), "/"); + assert_eq!(norm_path(L!("//")), "//"); + assert_eq!(norm_path(L!("///")), "/"); + assert_eq!(norm_path(L!("////")), "/"); + assert_eq!(norm_path(L!("/.///")), "/"); + assert_eq!(norm_path(L!(".//")), "."); + assert_eq!(norm_path(L!("/.//../")), "/"); + assert_eq!(norm_path(L!("////abc")), "/abc"); + assert_eq!(norm_path(L!("/abc")), "/abc"); + assert_eq!(norm_path(L!("/abc/")), "/abc"); + assert_eq!(norm_path(L!("/abc/..def/")), "/abc/..def"); + assert_eq!(norm_path(L!("//abc/../def/")), "//def"); + assert_eq!(norm_path(L!("abc/../abc/../abc/../abc")), "abc"); + assert_eq!(norm_path(L!("../../")), "../.."); + assert_eq!(norm_path(L!("foo/./bar")), "foo/bar"); + assert_eq!(norm_path(L!("foo/../")), "."); + assert_eq!(norm_path(L!("foo/../foo")), "foo"); + assert_eq!(norm_path(L!("foo/../foo/")), "foo"); + assert_eq!(norm_path(L!("foo/././bar/.././baz")), "foo/baz"); +} + +/// Given an input path `path` and a working directory `wd`, do a "normalizing join" in a way +/// appropriate for cd. That is, return effectively wd + path while resolving leading ../s from +/// path. The intent here is to allow 'cd' out of a directory which may no longer exist, without +/// allowing 'cd' into a directory that may not exist; see #5341. +pub fn path_normalize_for_cd(wd: &wstr, path: &wstr) -> WString { + use std::collections::VecDeque; + + // Fast paths. + const SEP: char = '/'; + assert!( + wd.as_char_slice().first() == Some(&'/') && wd.as_char_slice().last() == Some(&'/'), + "Invalid working directory, it must start and end with /" + ); + if path.is_empty() { + return wd.to_owned(); + } else if path.as_char_slice().first() == Some(&SEP) { + return path.to_owned(); + } else if path.as_char_slice().first() != Some(&'.') { + return wd.to_owned() + path; + } + + // Split our strings by the sep. + let wd_comps: VecDeque<_> = wd.split(SEP).collect(); + let mut path_comps = path.split(SEP).peekable(); + + // Remove empty segments from wd_comps. + // In particular this removes the leading and trailing empties. + let mut wd_comps = wd_comps.into_iter().filter(|comp| !comp.is_empty()); + + // Erase leading . and .. components from path_comps, popping from wd_comps as we go. + while let Some(comp) = path_comps.peek() { + if comp.is_empty() || comp == "." { + path_comps.next(); + } else if comp == ".." && wd_comps.next_back().is_some() { + path_comps.next(); + } else { + break; + } + } + + // Append un-erased elements to wd_comps and join them, then prepend the leading /. + let paths: Vec<_> = wd_comps.chain(path_comps).collect(); + let mut result = join_strings(&paths, SEP); + result.insert(0, '/'); + result +} + +#[cfg(test)] +mod path_tests { + use crate::wchar::L; + use super::path_normalize_for_cd; + + #[test] + fn test_relative_path() { + let wd = L!("/home/user/"); + let path = L!("projects"); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/projects")); + } + + #[test] + fn test_absolute_path() { + let wd = L!("/home/user/"); + let path = L!("/etc"); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/etc")); + } + + #[test] + fn test_parent_directory() { + let wd = L!("/home/user/projects/"); + let path = L!("../docs"); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/docs")); + } + + #[test] + fn test_current_directory() { + let wd = L!("/home/user/"); + let path = L!("./"); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user")); + } + + #[test] + fn test_nested_parent_directory() { + let wd = L!("/home/user/projects/"); + let path = L!("../../"); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/home")); + } + + #[test] + fn test_complex_path() { + let wd = L!("/home/user/projects/"); + let path = L!("./../other/projects/./.././../docs"); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/other/projects/./.././../docs")); + } + + #[test] + fn test_root_directory() { + let wd = L!("/"); + let path = L!(".."); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/..")); + } + + #[test] + fn test_empty_path() { + let wd = L!("/home/user/"); + let path = L!(""); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/")); + } + + #[test] + fn test_trailing_slash() { + let wd = L!("/home/user/projects/"); + let path = L!("docs/"); + eprintln!("({}, {})", wd, path); + assert_eq!(path_normalize_for_cd(wd, path), L!("/home/user/projects/docs/")); + } +} + +/// Wide character version of dirname(). +pub fn wdirname(mut path: &wstr) -> &wstr { + // Do not use system-provided dirname (#7837). + // On Mac it's not thread safe, and will error for paths exceeding PATH_MAX. + // This follows OpenGroup dirname recipe. + + // 1: Double-slash stays. + if path == "//" { + return path; + } + + // 2: All slashes => return slash. + if !path.is_empty() && path.chars().all(|c| c == '/') { + return L!("/"); + } + + // 3: Trim trailing slashes. + while path.as_char_slice().last() == Some(&'/') { + path = path.slice_to(path.char_count() - 1); + } + + // 4: No slashes left => return period. + let Some(last_slash) = path.chars().rposition(|c| c == '/') else { + return L!("."); + }; + + // 5: Remove trailing non-slashes. + path = path.slice_to(last_slash + 1); + + // 6: Skip as permitted. + // 7: Remove trailing slashes again. + while path.as_char_slice().last() == Some(&'/') { + path = path.slice_to(path.char_count() - 1); + } + + // 8: Empty => return slash. + if path.is_empty() { + return L!("/"); + } + path +} + +/// Wide character version of basename(). +pub fn wbasename(mut path: &wstr) -> &wstr { + // This follows OpenGroup basename recipe. + // 1: empty => allowed to return ".". This is what system impls do. + if path.is_empty() { + return L!("."); + } + + // 2: Skip as permitted. + // 3: All slashes => return slash. + if !path.is_empty() && path.chars().all(|c| c == '/') { + return L!("/"); + } + + // 4: Remove trailing slashes. + while path.as_char_slice().last() == Some(&'/') { + path = path.slice_to(path.char_count() - 1); + } + + // 5: Remove up to and including last slash. + if let Some(last_slash) = path.chars().rposition(|c| c == '/') { + path = path.slice_from(last_slash + 1); + } + path +} + +/// Wide character version of mkdir. +pub fn wmkdir(name: &wstr, mode: libc::mode_t) -> libc::c_int { + let name_narrow = wcs2zstring(name); + unsafe { libc::mkdir(name_narrow.as_ptr(), mode) } +} + +/// Wide character version of rename. +pub fn wrename(old_name: &wstr, new_name: &wstr) -> libc::c_int { + let old_narrow = wcs2zstring(old_name); + let new_narrow = wcs2zstring(new_name); + unsafe { libc::rename(old_narrow.as_ptr(), new_narrow.as_ptr()) } +} + +pub fn write_to_fd(input: &[u8], fd: RawFd) -> nix::Result { + nix::unistd::write(fd, input) +} + +/// Write a wide string to a file descriptor. This avoids doing any additional allocation. +/// This does NOT retry on EINTR or EAGAIN, it simply returns. +/// Return -1 on error in which case errno will have been set. In this event, the number of bytes +/// actually written cannot be obtained. +pub fn wwrite_to_fd(input: &wstr, fd: RawFd) -> Option { + // Accumulate data in a local buffer. + let mut accum = [b'\0'; 512]; + let mut accumlen = 0; + let maxaccum: usize = std::mem::size_of_val(&accum); + + // Helper to perform a write to 'fd', looping as necessary. + // Return true on success, false on error. + let mut total_written = 0; + + fn do_write(fd: RawFd, total_written: &mut usize, mut buf: &[u8]) -> bool { + while !buf.is_empty() { + let Ok(amt) = write_to_fd(buf, fd) else { + return false; + }; + *total_written += amt; + assert!(amt <= buf.len(), "Wrote more than requested"); + buf = &buf[amt..]; + } + true + } + + // Helper to flush the accumulation buffer. + let flush_accum = |total_written: &mut usize, accum: &[u8], accumlen: &mut usize| { + if !do_write(fd, total_written, &accum[..*accumlen]) { + return false; + } + *accumlen = 0; + true + }; + + let mut success = wcs2string_callback(input, |buff: &[u8]| { + if buff.len() + accumlen > maxaccum { + // We have to flush. + if !flush_accum(&mut total_written, &accum, &mut accumlen) { + return false; + } + } + if buff.len() + accumlen <= maxaccum { + // Accumulate more. + unsafe { + std::ptr::copy(&buff[0], &mut accum[accumlen], buff.len()); + } + accumlen += buff.len(); + true + } else { + // Too much data to even fit, just write it immediately. + do_write(fd, &mut total_written, buff) + } + }); + // Flush any remaining. + if success { + success = flush_accum(&mut total_written, &accum, &mut accumlen); + } + if success { + Some(total_written) + } else { + None + } +} + +const PUA1_START: char = '\u{E000}'; +const PUA1_END: char = '\u{F900}'; +// const PUA2_START: char = '\u{F0000}'; +// const PUA2_END: char = '\u{FFFFE}'; +// const PUA3_START: char = '\u{100000}'; +// const PUA3_END: char = '\u{10FFFE}'; + +/// Return one if the code point is in a Unicode private use area. +pub(crate) fn fish_is_pua(c: char) -> bool { + PUA1_START <= c && c < PUA1_END +} + +/// We need this because there are too many implementations that don't return the proper answer for +/// some code points. See issue #3050. +pub fn fish_iswalnum(c: char) -> bool { + !fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric() +} + +extern "C" { + fn iswgraph(wc: libc::wchar_t) -> libc::c_int; // Technically it's wint_t +} + +/// We need this because there are too many implementations that don't return the proper answer for +/// some code points. See issue #3050. +pub fn fish_iswgraph(c: char) -> bool { + !fish_reserved_codepoint(c) && (fish_is_pua(c) || unsafe { iswgraph(c as libc::wchar_t) } != 0) +} + +pub fn fish_wcswidth(s: &wstr) -> isize { + fallback::fish_wcswidth(s) +} + +/// Class for representing a file's inode. We use this to detect and avoid symlink loops, among +/// other things. While an inode / dev pair is sufficient to distinguish co-existing files, Linux +/// seems to aggressively re-use inodes, so it cannot determine if a file has been deleted (ABA +/// problem). Therefore we include richer information. +#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct FileId { + pub device: libc::dev_t, + pub inode: libc::ino_t, + pub size: u64, + pub change_seconds: libc::time_t, + pub change_nanoseconds: i64, + pub mod_seconds: libc::time_t, + pub mod_nanoseconds: i64, +} + +impl FileId { + pub const fn new() -> Self { + FileId { + device: -1 as _, + inode: -1 as _, + size: -1 as _, + change_seconds: libc::time_t::MIN, + change_nanoseconds: i64::MIN, + mod_seconds: libc::time_t::MIN, + mod_nanoseconds: -1 as _, + } + } + pub fn from_stat(buf: &libc::stat) -> FileId { + let mut result = FileId::new(); + result.device = buf.st_dev; + result.inode = buf.st_ino; + result.size = buf.st_size as u64; + result.change_seconds = buf.st_ctime; + result.mod_seconds = buf.st_mtime; + #[allow(clippy::unnecessary_cast)] // platform-dependent + #[cfg(not(target_os = "netbsd"))] + { + result.change_nanoseconds = buf.st_ctime_nsec as _; + result.mod_nanoseconds = buf.st_mtime_nsec as _; + } + #[cfg(target_os = "netbsd")] + { + result.change_nanoseconds = buf.st_ctimensec as _; + result.mod_nanoseconds = buf.st_mtimensec as _; + } + result + } + + /// Return true if \param rhs has higher mtime seconds than this file_id_t. + /// If identical, nanoseconds are compared. + pub fn older_than(&self, rhs: &FileId) -> bool { + let lhs = (self.mod_seconds, self.mod_nanoseconds); + let rhs = (rhs.mod_seconds, rhs.mod_nanoseconds); + lhs.cmp(&rhs).is_lt() + } +} + +pub const INVALID_FILE_ID: FileId = FileId::new(); + +pub fn file_id_for_fd(fd: BorrowedFd<'_>) -> FileId { + let mut result = INVALID_FILE_ID; + let mut buf: libc::stat = unsafe { std::mem::zeroed() }; + if unsafe { libc::fstat(fd.as_raw_fd(), &mut buf) } == 0 { + result = FileId::from_stat(&buf); + } + result +} + +pub fn file_id_for_autoclose_fd(fd: &AutoCloseFd) -> FileId { + file_id_for_fd(fd.as_fd()) +} + +pub fn file_id_for_path(path: &wstr) -> FileId { + file_id_for_path_narrow(&wcs2zstring(path)) +} + +pub fn file_id_for_path_narrow(path: &CStr) -> FileId { + let mut result = INVALID_FILE_ID; + let mut buf: libc::stat = unsafe { std::mem::zeroed() }; + if unsafe { libc::stat(path.as_ptr(), &mut buf) } == 0 { + result = FileId::from_stat(&buf); + } + result +} +/// Given that `cursor` is a pointer into `base`, return the offset in characters. +/// This emulates C pointer arithmetic: +/// `wstr_offset_in(cursor, base)` is equivalent to C++ `cursor - base`. +pub fn wstr_offset_in(cursor: &wstr, base: &wstr) -> usize { + let cursor = cursor.as_slice(); + let base = base.as_slice(); + // cursor may be a zero-length slice at the end of base, + // which base.as_ptr_range().contains(cursor.as_ptr()) will reject. + let base_range = base.as_ptr_range(); + let curs_range = cursor.as_ptr_range(); + assert!( + base_range.start <= curs_range.start && curs_range.end <= base_range.end, + "cursor should be a subslice of base" + ); + let offset = unsafe { cursor.as_ptr().offset_from(base.as_ptr()) }; + assert!(offset >= 0, "offset should be non-negative"); + offset as usize +} + +#[test] +fn test_wstr_offset_in() { + use crate::wchar::L; + let base = L!("hello world"); + assert_eq!(wstr_offset_in(&base[6..], base), 6); + assert_eq!(wstr_offset_in(&base[0..], base), 0); + assert_eq!(wstr_offset_in(&base[6..], &base[6..]), 0); + assert_eq!(wstr_offset_in(&base[base.len()..], base), base.len()); +}