From 60d439ab22228622ade90a63a60b038e41d2d044 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 14 May 2023 17:25:55 -0700 Subject: [PATCH] Rationalize fish_wcstoi/d and friends Historically fish has used the functions `fish_wcstol`, `fish_wcstoi`, and `fish_wcstoul` (and some long long variants) for most integer conversions. These have semantics that are deliberately different from the libc functions, such as consuming trailing whitespace, and disallowing `-` in unsigned versions. fish has started to drift away from these semantics; some divergence from C++ has crept in. Rename the existing `fish_wcs*` functions in Rust to remove the fish prefix, to express that they attempt to mirror libc semantics; then introduce `fish_` wrappers which are ported from C++. Also fix some miscellaneous bugs which have crept in, such as missing range checks. --- fish-rust/src/builtins/bg.rs | 33 +++--- fish-rust/src/builtins/math.rs | 54 +++++----- fish-rust/src/builtins/printf.rs | 6 +- fish-rust/src/builtins/random.rs | 41 ++++--- fish-rust/src/env/environment_impl.rs | 11 +- fish-rust/src/termsize.rs | 14 ++- fish-rust/src/wutil/wcstoi.rs | 149 ++++++++++++++++++++++---- src/parser.cpp | 2 +- src/parser.h | 2 +- 9 files changed, 211 insertions(+), 101 deletions(-) diff --git a/fish-rust/src/builtins/bg.rs b/fish-rust/src/builtins/bg.rs index affdb02f3..6e1b1315a 100644 --- a/fish-rust/src/builtins/bg.rs +++ b/fish-rust/src/builtins/bg.rs @@ -93,24 +93,25 @@ pub fn bg(parser: &mut parser_t, streams: &mut io_streams_t, args: &mut [&wstr]) } // The user specified at least one job to be backgrounded. + let mut pids: Vec = Vec::new(); // If one argument is not a valid pid (i.e. integer >= 0), fail without backgrounding anything, // but still print errors for all of them. - let mut retval = STATUS_CMD_OK; - let pids: Vec = args[opts.optind..] - .iter() - .map(|&arg| { - fish_wcstoi(arg).unwrap_or_else(|_| { - streams.err.append(wgettext_fmt!( - "%ls: '%ls' is not a valid job specifier\n", - cmd, - arg - )); - retval = STATUS_INVALID_ARGS; - 0 - }) - }) - .collect(); + let mut retval: Option = STATUS_CMD_OK; + for arg in &args[opts.optind..] { + let pid = fish_wcstoi(arg); + #[allow(clippy::unnecessary_unwrap)] + if pid.is_err() || pid.unwrap() < 0 { + streams.err.append(wgettext_fmt!( + "%ls: '%ls' is not a valid job specifier\n", + cmd, + arg + )); + retval = STATUS_INVALID_ARGS; + } else { + pids.push(pid.unwrap()); + } + } if retval != STATUS_CMD_OK { return retval; @@ -122,7 +123,7 @@ pub fn bg(parser: &mut parser_t, streams: &mut io_streams_t, args: &mut [&wstr]) let mut job_pos = 0; let job = unsafe { parser - .job_get_from_pid1(pid, Pin::new(&mut job_pos)) + .job_get_from_pid1(autocxx::c_int(pid), Pin::new(&mut job_pos)) .as_ref() }; diff --git a/fish-rust/src/builtins/math.rs b/fish-rust/src/builtins/math.rs index ef48df357..e5e02756c 100644 --- a/fish-rust/src/builtins/math.rs +++ b/fish-rust/src/builtins/math.rs @@ -59,35 +59,41 @@ fn parse_cmd_opts( let optarg = w.woptarg.unwrap(); have_scale = true; // "max" is the special value that tells us to pick the maximum scale. - opts.scale = if optarg == "max"L { - 15 - } else if let Ok(base) = fish_wcstoi(optarg) { - base + if optarg == "max" { + opts.scale = 15; } else { - streams.err.append(wgettext_fmt!( - "%ls: %ls: invalid base value\n", - cmd, - optarg - )); - return Err(STATUS_INVALID_ARGS); - }; + let scale = fish_wcstoi(optarg); + if scale.is_err() || scale.unwrap() < 0 || scale.unwrap() > 15 { + streams.err.append(wgettext_fmt!( + "%ls: %ls: invalid base value\n", + cmd, + optarg + )); + return Err(STATUS_INVALID_ARGS); + } + // We know the value is in the range [0, 15] + opts.scale = scale.unwrap() as usize; + } } 'b' => { let optarg = w.woptarg.unwrap(); - opts.base = if optarg == "hex"L { - 16 - } else if optarg == "octal"L { - 8 - } else if let Ok(base) = fish_wcstoi(optarg) { - base + if optarg == "hex" { + opts.base = 16; + } else if optarg == "octal" { + opts.base = 8; } else { - streams.err.append(wgettext_fmt!( - "%ls: %ls: invalid base value\n", - cmd, - optarg - )); - return Err(STATUS_INVALID_ARGS); - }; + let base = fish_wcstoi(optarg); + if base.is_err() || (base.unwrap() != 8 && base.unwrap() != 16) { + streams.err.append(wgettext_fmt!( + "%ls: %ls: invalid base value\n", + cmd, + optarg + )); + return Err(STATUS_INVALID_ARGS); + } + // We know the value is 8 or 16. + opts.base = base.unwrap() as usize; + } } 'h' => { opts.print_help = true; diff --git a/fish-rust/src/builtins/printf.rs b/fish-rust/src/builtins/printf.rs index c76efc976..000c2456d 100644 --- a/fish-rust/src/builtins/printf.rs +++ b/fish-rust/src/builtins/printf.rs @@ -59,7 +59,7 @@ use crate::wutil::errors::Error; use crate::wutil::gettext::{wgettext, wgettext_fmt}; use crate::wutil::wcstod::wcstod; -use crate::wutil::wcstoi::{fish_wcstoi_partial, Options as WcstoiOpts}; +use crate::wutil::wcstoi::{wcstoi_partial, Options as WcstoiOpts}; use crate::wutil::{sprintf, wstr_offset_in}; use printf_compat::args::ToArg; use printf_compat::printf::sprintf_locale; @@ -129,7 +129,7 @@ fn raw_string_to_scalar_type<'a>( end: &mut &'a wstr, ) -> Result { let mut consumed = 0; - let res = fish_wcstoi_partial(s, WcstoiOpts::default(), &mut consumed); + let res = wcstoi_partial(s, WcstoiOpts::default(), &mut consumed); *end = s.slice_from(consumed); res } @@ -142,7 +142,7 @@ fn raw_string_to_scalar_type<'a>( end: &mut &'a wstr, ) -> Result { let mut consumed = 0; - let res = fish_wcstoi_partial( + let res = wcstoi_partial( s, WcstoiOpts { wrap_negatives: true, diff --git a/fish-rust/src/builtins/random.rs b/fish-rust/src/builtins/random.rs index 747a81821..613ce3b21 100644 --- a/fish-rust/src/builtins/random.rs +++ b/fish-rust/src/builtins/random.rs @@ -7,12 +7,10 @@ use crate::ffi::parser_t; use crate::wchar::{wstr, L}; use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t}; -use crate::wutil::{self, fish_wcstoi_opts, sprintf, wgettext_fmt, Options as WcstoiOptions}; -use num_traits::PrimInt; +use crate::wutil::{self, fish_wcstol, fish_wcstoul, sprintf, wgettext_fmt}; use once_cell::sync::Lazy; use rand::rngs::SmallRng; use rand::{Rng, SeedableRng}; -use std::default::Default; use std::sync::Mutex; static RNG: Lazy> = Lazy::new(|| Mutex::new(SmallRng::from_entropy())); @@ -69,22 +67,21 @@ pub fn random( .append(sprintf!(L!("%ls\n"), argv[i + 1 + rand])); return STATUS_CMD_OK; } - fn parse( - streams: &mut io_streams_t, - cmd: &wstr, - num: &wstr, - ) -> Result { - let res = fish_wcstoi_opts( - num, - WcstoiOptions { - consume_all: true, - ..Default::default() - }, - ); + fn parse_ll(streams: &mut io_streams_t, cmd: &wstr, num: &wstr) -> Result { + let res = fish_wcstol(num); if res.is_err() { streams .err - .append(wgettext_fmt!("%ls: %ls: invalid integer\n", cmd, num,)); + .append(wgettext_fmt!("%ls: %ls: invalid integer\n", cmd, num)); + } + return res; + } + fn parse_ull(streams: &mut io_streams_t, cmd: &wstr, num: &wstr) -> Result { + let res = fish_wcstoul(num); + if res.is_err() { + streams + .err + .append(wgettext_fmt!("%ls: %ls: invalid integer\n", cmd, num)); } return res; } @@ -95,7 +92,7 @@ fn parse( } 1 => { // Seed the engine persistently - let num = parse::(streams, cmd, argv[i]); + let num = parse_ll(streams, cmd, argv[i]); match num { Err(_) => return STATUS_INVALID_ARGS, Ok(x) => { @@ -107,25 +104,25 @@ fn parse( } 2 => { // start is first, end is second - match parse::(streams, cmd, argv[i]) { + match parse_ll(streams, cmd, argv[i]) { Err(_) => return STATUS_INVALID_ARGS, Ok(x) => start = x, } - match parse::(streams, cmd, argv[i + 1]) { + match parse_ll(streams, cmd, argv[i + 1]) { Err(_) => return STATUS_INVALID_ARGS, Ok(x) => end = x, } } 3 => { // start, step, end - match parse::(streams, cmd, argv[i]) { + match parse_ll(streams, cmd, argv[i]) { Err(_) => return STATUS_INVALID_ARGS, Ok(x) => start = x, } // start, step, end - match parse::(streams, cmd, argv[i + 1]) { + match parse_ull(streams, cmd, argv[i + 1]) { Err(_) => return STATUS_INVALID_ARGS, Ok(0) => { streams @@ -136,7 +133,7 @@ fn parse( Ok(x) => step = x, } - match parse::(streams, cmd, argv[i + 2]) { + match parse_ll(streams, cmd, argv[i + 2]) { Err(_) => return STATUS_INVALID_ARGS, Ok(x) => end = x, } diff --git a/fish-rust/src/env/environment_impl.rs b/fish-rust/src/env/environment_impl.rs index 525b554dd..3f69f72ff 100644 --- a/fish-rust/src/env/environment_impl.rs +++ b/fish-rust/src/env/environment_impl.rs @@ -11,7 +11,7 @@ use crate::wchar::{widestrs, wstr, WExt, WString, L}; use crate::wchar_ext::ToWString; use crate::wchar_ffi::{WCharFromFFI, WCharToFFI}; -use crate::wutil::{fish_wcstoi_opts, sprintf, Options}; +use crate::wutil::{fish_wcstol_radix, sprintf}; use autocxx::WithinUniquePtr; use cxx::UniquePtr; @@ -95,12 +95,7 @@ fn set_umask(list_val: &Vec) -> EnvStackSetResult { if list_val.len() != 1 || list_val[0].is_empty() { return EnvStackSetResult::ENV_INVALID; } - let opts = Options { - wrap_negatives: false, - consume_all: false, - mradix: Some(8), - }; - let Ok(mask) = fish_wcstoi_opts(&list_val[0], opts) else { + let Ok(mask) = fish_wcstol_radix(&list_val[0], 8) else { return EnvStackSetResult::ENV_INVALID; }; @@ -114,7 +109,7 @@ fn set_umask(list_val: &Vec) -> EnvStackSetResult { } // Do not actually create a umask variable. On env_stack_t::get() it will be calculated. // SAFETY: umask cannot fail. - unsafe { libc::umask(mask) }; + unsafe { libc::umask(mask as libc::mode_t) }; EnvStackSetResult::ENV_OK } diff --git a/fish-rust/src/termsize.rs b/fish-rust/src/termsize.rs index 089d62f6c..fd26e1536 100644 --- a/fish-rust/src/termsize.rs +++ b/fish-rust/src/termsize.rs @@ -40,17 +40,15 @@ pub struct Termsize { /// Convert an environment variable to an int, or return a default value. /// The int must be >0 and , default: isize) -> isize { - match var { - Some(s) => { - let proposed = fish_wcstoi(&s); - if let Ok(proposed) = proposed { - proposed - } else { - default + if var.is_some() && !var.as_ref().unwrap().is_empty() { + #[allow(clippy::unnecessary_unwrap)] + if let Ok(proposed) = fish_wcstoi(&var.unwrap()) { + if proposed > 0 && proposed <= u16::MAX as i32 { + return proposed as isize; } } - None => default, } + default } /// \return a termsize from ioctl, or None on error or if not supported. diff --git a/fish-rust/src/wutil/wcstoi.rs b/fish-rust/src/wutil/wcstoi.rs index b49e5959a..8c84c4be9 100644 --- a/fish-rust/src/wutil/wcstoi.rs +++ b/fish-rust/src/wutil/wcstoi.rs @@ -1,5 +1,6 @@ pub use super::errors::Error; -use crate::wchar::IntoCharIter; +use crate::wchar::{wstr, IntoCharIter}; +use crate::wchar_ext::WExt; use num_traits::{NumCast, PrimInt}; use std::default::Default; use std::iter::{Fuse, Peekable}; @@ -64,7 +65,7 @@ fn parse_radix>( error_if_negative: bool, ) -> Result { if let Some(r) = mradix { - assert!((2..=36).contains(&r), "fish_parse_radix: invalid radix {r}"); + assert!((2..=36).contains(&r), "parse_radix: invalid radix {r}"); } // Construct a CharsIterator to keep track of how many we consume. @@ -161,7 +162,7 @@ fn parse_radix>( } /// Parse some iterator over Chars into some Integer type, optionally with a radix. -fn fish_wcstoi_impl( +fn wcstoi_impl( src: Chars, options: Options, out_consumed: &mut usize, @@ -171,7 +172,7 @@ fn fish_wcstoi_impl( Int: PrimInt, { let bits = Int::zero().count_zeros(); - assert!(bits <= 64, "fish_wcstoi: Int must be <= 64 bits"); + assert!(bits <= 64, "wcstoi: Int must be <= 64 bits"); let signed = Int::min_value() < Int::zero(); let Options { @@ -228,22 +229,22 @@ fn fish_wcstoi_impl( /// - Leading whitespace is skipped. /// - 0 means octal, 0x means hex /// - Leading + is supported. -pub fn fish_wcstoi(src: Chars) -> Result +pub fn wcstoi(src: Chars) -> Result where Chars: IntoCharIter, Int: PrimInt, { - fish_wcstoi_impl(src.chars(), Default::default(), &mut 0) + wcstoi_impl(src.chars(), Default::default(), &mut 0) } /// Convert the given wide string to an integer using the given radix. /// Leading whitespace is skipped. -pub fn fish_wcstoi_opts(src: Chars, options: Options) -> Result +pub fn wcstoi_opts(src: Chars, options: Options) -> Result where Chars: IntoCharIter, Int: PrimInt, { - fish_wcstoi_impl(src.chars(), options, &mut 0) + wcstoi_impl(src.chars(), options, &mut 0) } /// Convert the given wide string to an integer. @@ -252,7 +253,7 @@ pub fn fish_wcstoi_opts(src: Chars, options: Options) -> Result( +pub fn wcstoi_partial( src: Chars, options: Options, out_consumed: &mut usize, @@ -261,23 +262,93 @@ pub fn fish_wcstoi_partial( Chars: IntoCharIter, Int: PrimInt, { - fish_wcstoi_impl(src.chars(), options, out_consumed) + wcstoi_impl(src.chars(), options, out_consumed) +} + +/// A historic "enhanced" version of wcstol. +/// Leading whitespace is ignored (per wcstol). +/// Trailing whitespace is also ignored. +/// Trailing characters other than whitespace are errors. +pub fn fish_wcstol_radix(mut src: &wstr, radix: u32) -> Result { + // Unlike wcstol, we do not infer the radix. + assert!(radix > 0, "radix cannot be 0"); + let options: Options = Options { + mradix: Some(radix), + ..Default::default() + }; + let mut consumed = 0; + let result = wcstoi_partial(src, options, &mut consumed)?; + // Skip trailing whitespace, erroring if we encounter a non-whitespace character. + src = src.slice_from(consumed); + while !src.is_empty() && src.char_at(0).is_whitespace() { + src = src.slice_from(1); + } + if !src.is_empty() { + return Err(Error::InvalidChar); + } + Ok(result) +} + +/// Variant of fish_wcstol_radix which assumes base 10. +pub fn fish_wcstol(src: &wstr) -> Result { + fish_wcstol_radix(src, 10) +} + +/// Variant of fish_wcstol for ints, erroring if it does not fit. +pub fn fish_wcstoi(src: &wstr) -> Result { + let res = fish_wcstol(src)?; + if let Ok(val) = res.try_into() { + Ok(val) + } else { + Err(Error::Overflow) + } +} + +/// Historic "enhanced" version of wcstoul. +/// Leading minus is considered invalid. +/// Leading whitespace is ignored (per wcstoul). +/// Trailing whitespace is also ignored. +pub fn fish_wcstoul(mut src: &wstr) -> Result { + // Skip leading whitespace. + while !src.is_empty() && src.char_at(0).is_whitespace() { + src = src.slice_from(1); + } + // Disallow minus as the first character to avoid questionable wrap-around. + if src.is_empty() || src.char_at(0) == '-' { + return Err(Error::InvalidChar); + } + let options: Options = Options { + mradix: Some(10), + ..Default::default() + }; + let mut consumed = 0; + let result = wcstoi_partial(src, options, &mut consumed)?; + // Skip trailling whitespace. + src = src.slice_from(consumed); + while !src.is_empty() && src.char_at(0).is_whitespace() { + src = src.slice_from(1); + } + if !src.is_empty() { + return Err(Error::InvalidChar); + } + Ok(result) } #[cfg(test)] mod tests { use super::*; + use crate::wchar::L; fn test_min_max(min: Int, max: Int) { - assert_eq!(fish_wcstoi(min.to_string().chars()), Ok(min)); - assert_eq!(fish_wcstoi(max.to_string().chars()), Ok(max)); + assert_eq!(wcstoi(min.to_string().chars()), Ok(min)); + assert_eq!(wcstoi(max.to_string().chars()), Ok(max)); } #[test] fn test_signed() { - let run1 = |s: &str| -> Result { fish_wcstoi(s.chars()) }; + let run1 = |s: &str| -> Result { wcstoi(s.chars()) }; let run1_rad = |s: &str, radix: u32| -> Result { - fish_wcstoi_opts( + wcstoi_opts( s.chars(), Options { mradix: Some(radix), @@ -335,7 +406,7 @@ fn negu(x: u64) -> u64 { } let run1 = |s: &str| -> Result { - fish_wcstoi_opts( + wcstoi_opts( s.chars(), Options { wrap_negatives: true, @@ -344,7 +415,7 @@ fn negu(x: u64) -> u64 { ) }; let run1_rad = |s: &str, radix: u32| -> Result { - fish_wcstoi_opts( + wcstoi_opts( s.chars(), Options { wrap_negatives: true, @@ -394,7 +465,7 @@ fn negu(x: u64) -> u64 { std::u64::MAX - x + 1 } - let run1 = |s: &str, opts: Options| -> Result { fish_wcstoi_opts(s, opts) }; + let run1 = |s: &str, opts: Options| -> Result { wcstoi_opts(s, opts) }; let mut opts = Options::default(); assert_eq!(run1("-123", opts), Err(Error::InvalidChar)); assert_eq!(run1("-0x123", opts), Err(Error::InvalidChar)); @@ -410,7 +481,7 @@ fn negu(x: u64) -> u64 { fn test_partial() { let run1 = |s: &str| -> (i32, usize) { let mut consumed = 0; - let res = fish_wcstoi_partial(s, Default::default(), &mut consumed) + let res = wcstoi_partial(s, Default::default(), &mut consumed) .expect("Should have parsed an int"); (res, consumed) }; @@ -430,4 +501,46 @@ fn test_partial() { assert_eq!(run1("0x"), (0, 1)); assert_eq!(run1("0xx"), (0, 1)); } + + #[test] + fn test_fish_wcstol() { + assert_eq!(fish_wcstol(L!("0")), Ok(0)); + assert_eq!(fish_wcstol(L!("10")), Ok(10)); + assert_eq!(fish_wcstol(L!(" 10")), Ok(10)); + assert_eq!(fish_wcstol(L!(" 10 ")), Ok(10)); + assert_eq!(fish_wcstol(L!("-10")), Ok(-10)); + assert_eq!(fish_wcstol(L!(" +10 ")), Ok(10)); + assert_eq!(fish_wcstol(L!("10foo")), Err(Error::InvalidChar)); + assert_eq!(fish_wcstol(L!("10.5")), Err(Error::InvalidChar)); + assert_eq!(fish_wcstol(L!("10 x ")), Err(Error::InvalidChar)); + } + + #[test] + fn test_fish_wcstoi() { + assert_eq!(fish_wcstoi(L!("0")), Ok(0)); + assert_eq!(fish_wcstoi(L!("10")), Ok(10)); + assert_eq!(fish_wcstoi(L!(" 10")), Ok(10)); + assert_eq!(fish_wcstoi(L!(" 10 ")), Ok(10)); + assert_eq!(fish_wcstoi(L!("-10")), Ok(-10)); + assert_eq!(fish_wcstoi(L!(" +10 ")), Ok(10)); + assert_eq!(fish_wcstoi(L!(" 2147483647 ")), Ok(2147483647)); + assert_eq!(fish_wcstoi(L!(" 2147483648 ")), Err(Error::Overflow)); + assert_eq!(fish_wcstoi(L!(" -2147483647 ")), Ok(-2147483647)); + assert_eq!(fish_wcstoi(L!(" -2147483648 ")), Ok(-2147483648)); + assert_eq!(fish_wcstoi(L!(" -2147483649 ")), Err(Error::Overflow)); + assert_eq!(fish_wcstoi(L!("10foo")), Err(Error::InvalidChar)); + assert_eq!(fish_wcstoi(L!("10.5")), Err(Error::InvalidChar)); + } + + #[test] + fn test_fish_wcstoul() { + assert_eq!(fish_wcstoul(L!("0")), Ok(0)); + assert_eq!(fish_wcstoul(L!("10")), Ok(10)); + assert_eq!(fish_wcstoul(L!(" +10")), Ok(10)); + assert_eq!(fish_wcstoul(L!(" -10")), Err(Error::InvalidChar)); + assert_eq!(fish_wcstoul(L!(" 10 ")), Ok(10)); + assert_eq!(fish_wcstoul(L!("10foo")), Err(Error::InvalidChar)); + assert_eq!(fish_wcstoul(L!("10.5")), Err(Error::InvalidChar)); + assert_eq!(fish_wcstoul(L!("18446744073709551615")), Ok(u64::MAX)); + } } diff --git a/src/parser.cpp b/src/parser.cpp index 207f5434a..58da3fbe4 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -501,7 +501,7 @@ job_t *parser_t::job_get_from_pid(pid_t pid) const { return job_get_from_pid(pid, job_pos); } -job_t *parser_t::job_get_from_pid(int64_t pid, size_t &job_pos) const { +job_t *parser_t::job_get_from_pid(int pid, size_t &job_pos) const { for (auto it = job_list.begin(); it != job_list.end(); ++it) { for (const process_ptr_t &p : (*it)->processes) { if (p->pid == pid) { diff --git a/src/parser.h b/src/parser.h index e897229bd..8feb28acc 100644 --- a/src/parser.h +++ b/src/parser.h @@ -449,7 +449,7 @@ class parser_t : public std::enable_shared_from_this { job_t *job_get_from_pid(pid_t pid) const; /// Returns the job and position with the given pid. - job_t *job_get_from_pid(int64_t pid, size_t &job_pos) const; + job_t *job_get_from_pid(int pid, size_t &job_pos) const; /// Returns a new profile item if profiling is active. The caller should fill it in. /// The parser_t will deallocate it.