From c7ea768a746f69562f12d967bd909651974a4f25 Mon Sep 17 00:00:00 2001 From: Victor Song Date: Sun, 26 Feb 2023 22:13:40 -0500 Subject: [PATCH] Rewrite `wrealpath` from `wutil` in Rust (#9613) * wutil: Rewrite `wrealpath` in Rust * Reduce use of FFI types in `wrealpath` * Addressed PR comments regarding allocation * Replace let binding assignment with regular comparison --- fish-rust/src/ffi.rs | 2 + fish-rust/src/wchar_ffi.rs | 12 ++++++ fish-rust/src/wutil/mod.rs | 2 + fish-rust/src/wutil/wrealpath.rs | 74 ++++++++++++++++++++++++++++++++ src/common.h | 4 +- 5 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 fish-rust/src/wutil/wrealpath.rs diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index a544b1946..a9675742a 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -91,6 +91,8 @@ generate!("re::regex_t") generate!("re::regex_result_ffi") generate!("re::try_compile_ffi") + generate!("wcs2string") + generate!("str2wcstring") } impl parser_t { diff --git a/fish-rust/src/wchar_ffi.rs b/fish-rust/src/wchar_ffi.rs index cc00c1ea7..a0eb61821 100644 --- a/fish-rust/src/wchar_ffi.rs +++ b/fish-rust/src/wchar_ffi.rs @@ -143,3 +143,15 @@ fn from_ffi(&self) -> WString { WString::from_chars(self.as_chars()) } } + +impl WCharFromFFI> for cxx::UniquePtr { + fn from_ffi(&self) -> Vec { + self.as_bytes().to_vec() + } +} + +impl WCharFromFFI> for cxx::SharedPtr { + fn from_ffi(&self) -> Vec { + self.as_bytes().to_vec() + } +} diff --git a/fish-rust/src/wutil/mod.rs b/fish-rust/src/wutil/mod.rs index f4bec1c99..7f30eca98 100644 --- a/fish-rust/src/wutil/mod.rs +++ b/fish-rust/src/wutil/mod.rs @@ -1,12 +1,14 @@ pub mod format; pub mod gettext; mod wcstoi; +mod wrealpath; use std::io::Write; pub(crate) use format::printf::sprintf; pub(crate) use gettext::{wgettext, wgettext_fmt}; pub use wcstoi::*; +pub use wrealpath::*; /// Port of the wide-string wperror from `src/wutil.cpp` but for rust `&str`. pub fn perror(s: &str) { diff --git a/fish-rust/src/wutil/wrealpath.rs b/fish-rust/src/wutil/wrealpath.rs new file mode 100644 index 000000000..87e6872e5 --- /dev/null +++ b/fish-rust/src/wutil/wrealpath.rs @@ -0,0 +1,74 @@ +use std::{ + ffi::OsStr, + fs::canonicalize, + os::unix::prelude::{OsStrExt, OsStringExt}, +}; + +use cxx::let_cxx_string; + +use crate::{ + ffi::{str2wcstring, wcs2string}, + wchar::{wstr, WString}, + wchar_ffi::{WCharFromFFI, WCharToFFI}, +}; + +/// Wide character realpath. The last path component does not need to be valid. If an error occurs, +/// `wrealpath()` returns `None` +pub fn wrealpath(pathname: &wstr) -> Option { + if pathname.is_empty() { + return None; + } + + let mut narrow_path: Vec = wcs2string(&pathname.to_ffi()).from_ffi(); + + // Strip trailing slashes. This is treats "/a//" as equivalent to "/a" if /a is a non-directory. + while narrow_path.len() > 1 && narrow_path[narrow_path.len() - 1] == b'/' { + narrow_path.pop(); + } + + // `from_bytes` is Unix specific but there isn't really any other way to do this + // since `libc::realpath` is also Unix specific. I also don't think we support Windows + // outside of WSL + Cygwin (which should be fairly Unix-like anyways) + let narrow_res = canonicalize(OsStr::from_bytes(&narrow_path)); + + let real_path = if let Ok(result) = narrow_res { + result.into_os_string().into_vec() + } else { + // Check if everything up to the last path component is valid. + let pathsep_idx = narrow_path.iter().rposition(|&c| c == b'/'); + + if pathsep_idx == Some(0) { + // If the only pathsep is the first character then it's an absolute path with a + // single path component and thus doesn't need conversion. + narrow_path + } else { + // Only call realpath() on the portion up to the last component. + let narrow_res = if let Some(pathsep_idx) = pathsep_idx { + // Only call realpath() on the portion up to the last component. + canonicalize(OsStr::from_bytes(&narrow_path[0..pathsep_idx])) + } else { + // If there is no "/", this is a file in $PWD, so give the realpath to that. + canonicalize(".") + }; + + let Ok(narrow_result) = narrow_res else { return None; }; + + let pathsep_idx = pathsep_idx.map_or(0, |idx| idx + 1); + + let mut real_path = narrow_result.into_os_string().into_vec(); + + // This test is to deal with cases such as /../../x => //x. + if real_path.len() > 1 { + real_path.push(b'/'); + } + + real_path.extend_from_slice(&narrow_path[pathsep_idx..]); + + real_path + } + }; + + let_cxx_string!(s = real_path); + + Some(str2wcstring(&s).from_ffi()) +} diff --git a/src/common.h b/src/common.h index 8997be79e..343e3150f 100644 --- a/src/common.h +++ b/src/common.h @@ -289,10 +289,10 @@ void show_stackframe(int frame_count = 100, int skip_levels = 0); /// /// This function encodes illegal character sequences in a reversible way using the private use /// area. -wcstring str2wcstring(const char *in); -wcstring str2wcstring(const char *in, size_t len); wcstring str2wcstring(const std::string &in); wcstring str2wcstring(const std::string &in, size_t len); +wcstring str2wcstring(const char *in); +wcstring str2wcstring(const char *in, size_t len); /// Returns a newly allocated multibyte character string equivalent of the specified wide character /// string.