Files
fish-shell/src/util.rs
Johannes Altmanninger 525c9bbdcb Move Rust tests to implementation files
For historical reasons[^1], most of our Rust tests are in src/tests,
which
1. is unconventional (Rust unit tests are supposed to be either in the
   same module as the implementation, or in a child module).
   This makes them slightly harder to discover, navigate etc.
2. can't test private APIs (motivating some of the "exposed for
   testing" comments).

Fix this by moving tests to the corresponding implementation file.
Reviewed with

        git show $commit \
            --color-moved=dimmed-zebra \
            --color-moved-ws=allow-indentation-change

- Shared test-only code lives in
  src/tests/prelude.rs,
  src/builtins/string/test_helpers.rs
  src/universal_notifier/test_helpers.rs
  We might want to slim down the prelude in future.
- I put our two benchmarks below tests ("mod tests" followed by "mod bench").

Verified that "cargo +nightly bench --features=benchmark" still
compiles and runs.

[^1]: Separate files are idiomatic in most other languages; also
separate files makes it easy to ignore when navigating the call graph.
("rg --vimgrep | rg -v tests/").  Fortunately, rust-analyzer provides
a setting called references.excludeTests for textDocument/references,
the textDocument/prepareCallHierarchy family, and potentially
textDocument/documentHighlight (which can be used to find all
references in the current file).

Closes #11992
2025-11-01 12:42:55 +01:00

330 lines
12 KiB
Rust

//! Generic utilities library.
use crate::wchar::prelude::*;
use rand::{SeedableRng, rngs::SmallRng};
use std::cmp::Ordering;
use std::time;
use std::time::{SystemTime, UNIX_EPOCH};
/// Compares two wide character strings with an (arguably) intuitive ordering. This function tries
/// to order strings in a way which is intuitive to humans with regards to sorting strings
/// containing numbers.
///
/// Most sorting functions would sort the strings 'file1.txt' 'file5.txt' and 'file12.txt' as:
///
/// file1.txt
/// file12.txt
/// file5.txt
///
/// This function regards any sequence of digits as a single entity when performing comparisons, so
/// the output is instead:
///
/// file1.txt
/// file5.txt
/// file12.txt
///
/// Which most people would find more intuitive.
///
/// This won't return the optimum results for numbers in bases higher than ten, such as hexadecimal,
/// but at least a stable sort order will result.
///
/// This function performs a two-tiered sort, where difference in case and in number of leading
/// zeroes in numbers only have effect if no other differences between strings are found. This way,
/// a 'file1' and 'File1' will not be considered identical, and hence their internal sort order is
/// not arbitrary, but the names 'file1', 'File2' and 'file3' will still be sorted in the order
/// given above.
pub fn wcsfilecmp(a: &wstr, b: &wstr) -> Ordering {
let mut retval = Ordering::Equal;
let mut ai = 0;
let mut bi = 0;
while ai < a.len() && bi < b.len() {
let ac = a.as_char_slice()[ai];
let bc = b.as_char_slice()[bi];
if ac.is_ascii_digit() && bc.is_ascii_digit() {
let (ad, bd);
(retval, ad, bd) = wcsfilecmp_leading_digits(&a[ai..], &b[bi..]);
ai += ad;
bi += bd;
if retval != Ordering::Equal || ai == a.len() || bi == b.len() {
break;
}
continue;
}
// Fast path: Skip towupper.
if ac == bc {
ai += 1;
bi += 1;
continue;
}
// Sort dashes after Z - see #5634
let mut acl = if ac == '-' { '[' } else { ac };
let mut bcl = if bc == '-' { '[' } else { bc };
// TODO Compare the tail (enabled by Rust's Unicode support).
acl = acl.to_uppercase().next().unwrap();
bcl = bcl.to_uppercase().next().unwrap();
match acl.cmp(&bcl) {
Ordering::Equal => {
ai += 1;
bi += 1;
}
o => {
retval = o;
break;
}
}
}
if retval != Ordering::Equal {
return retval; // we already know the strings aren't logically equal
}
if ai == a.len() {
if bi == b.len() {
// The strings are logically equal. They may or may not be the same length depending on
// whether numbers were present but that doesn't matter. Disambiguate strings that
// differ by letter case or length. We don't bother optimizing the case where the file
// names are literally identical because that won't occur given how this function is
// used. And even if it were to occur (due to being reused in some other context) it
// would be so rare that it isn't worth optimizing for.
a.cmp(b)
} else {
Ordering::Less // string a is a prefix of b and b is longer
}
} else {
assert!(bi == b.len());
Ordering::Greater // string b is a prefix of a and a is longer
}
}
/// wcsfilecmp, but frozen in time for glob usage.
pub fn wcsfilecmp_glob(a: &wstr, b: &wstr) -> Ordering {
let mut retval = Ordering::Equal;
let mut ai = 0;
let mut bi = 0;
while ai < a.len() && bi < b.len() {
let ac = a.as_char_slice()[ai];
let bc = b.as_char_slice()[bi];
if ac.is_ascii_digit() && bc.is_ascii_digit() {
let (ad, bd);
(retval, ad, bd) = wcsfilecmp_leading_digits(&a[ai..], &b[bi..]);
ai += ad;
bi += bd;
// If we know the strings aren't logically equal or we've reached the end of one or both
// strings we can stop iterating over the chars in each string.
if retval != Ordering::Equal || ai == a.len() || bi == b.len() {
break;
}
continue;
}
// Fast path: Skip towlower.
if ac == bc {
ai += 1;
bi += 1;
continue;
}
// TODO Compare the tail (enabled by Rust's Unicode support).
let acl = ac.to_lowercase().next().unwrap();
let bcl = bc.to_lowercase().next().unwrap();
match acl.cmp(&bcl) {
Ordering::Equal => {
ai += 1;
bi += 1;
}
o => {
retval = o;
break;
}
}
}
if retval != Ordering::Equal {
return retval; // we already know the strings aren't logically equal
}
if ai == a.len() {
if bi == b.len() {
// The strings are logically equal. They may or may not be the same length depending on
// whether numbers were present but that doesn't matter. Disambiguate strings that
// differ by letter case or length. We don't bother optimizing the case where the file
// names are literally identical because that won't occur given how this function is
// used. And even if it were to occur (due to being reused in some other context) it
// would be so rare that it isn't worth optimizing for.
a.cmp(b)
} else {
Ordering::Less // string a is a prefix of b and b is longer
}
} else {
assert!(bi == b.len());
Ordering::Greater // string b is a prefix of a and a is longer
}
}
/// Get the current time in microseconds since Jan 1, 1970.
pub fn get_time() -> i64 {
match time::SystemTime::now().duration_since(time::UNIX_EPOCH) {
Ok(difference) => difference.as_micros() as i64,
Err(until_epoch) => -(until_epoch.duration().as_micros() as i64),
}
}
// Helper to get a small RNG seed, based on nanoseconds.
pub fn get_rng_seed() -> u128 {
// Note we use an explicit seed to avoid the "getrandom" crate, which uses `getentropy()` on macOS which
// is not available before macOS 10.12.
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos()
}
// Helper to get a small RNG with a seed.
pub fn get_seeded_rng(seed: u128) -> SmallRng {
let seed = ((seed >> 64) as u64) ^ (seed as u64);
SmallRng::seed_from_u64(seed)
}
// Helper to get a small RNG using the current time.
pub fn get_rng() -> SmallRng {
get_seeded_rng(get_rng_seed())
}
// Compare the strings to see if they begin with an integer that can be compared and return the
// result of that comparison.
fn wcsfilecmp_leading_digits(a: &wstr, b: &wstr) -> (Ordering, usize, usize) {
// Ignore leading 0s.
let mut ai = a.as_char_slice().iter().take_while(|c| **c == '0').count();
let mut bi = b.as_char_slice().iter().take_while(|c| **c == '0').count();
let mut ret = Ordering::Equal;
loop {
let ac = a.as_char_slice().get(ai).unwrap_or(&'\0');
let bc = b.as_char_slice().get(bi).unwrap_or(&'\0');
if ac.is_ascii_digit() && bc.is_ascii_digit() {
// We keep the cmp value for the
// first differing digit.
//
// If the numbers have the same length, that's the value.
if ret == Ordering::Equal {
// Comparing the string value is the same as numerical
// for wchar_t digits!
ret = ac.cmp(bc);
}
} else {
// We don't have negative numbers and we only allow ints,
// and we have already skipped leading zeroes,
// so the longer number is larger automatically.
if ac.is_ascii_digit() {
ret = Ordering::Greater;
}
if bc.is_ascii_digit() {
ret = Ordering::Less;
}
break;
}
ai += 1;
bi += 1;
}
// For historical reasons, we skip trailing whitespace
// like fish_wcstol does!
// This is used in sorting globs, and that's supposed to be stable.
ai += a
.as_char_slice()
.iter()
.skip(ai)
.take_while(|c| c.is_whitespace())
.count();
bi += b
.as_char_slice()
.iter()
.skip(bi)
.take_while(|c| c.is_whitespace())
.count();
(ret, ai, bi)
}
/// Finds `needle` in a `haystack` and returns the index of the first matching element, if any.
///
/// # Examples
///
/// ```
/// use fish::util::find_subslice;
/// let haystack = b"ABC ABCDAB ABCDABCDABDE";
///
/// assert_eq!(find_subslice(b"ABCDABD", haystack), Some(15));
/// assert_eq!(find_subslice(b"ABCDE", haystack), None);
/// ```
pub fn find_subslice<T: PartialEq>(
needle: impl AsRef<[T]>,
haystack: impl AsRef<[T]>,
) -> Option<usize> {
let needle = needle.as_ref();
if needle.is_empty() {
return Some(0);
}
let haystack = haystack.as_ref();
haystack.windows(needle.len()).position(|w| w == needle)
}
#[cfg(test)]
mod tests {
use super::wcsfilecmp;
use crate::wchar::prelude::*;
use std::cmp::Ordering;
/// Verify the behavior of the `wcsfilecmp()` function.
#[test]
fn test_wcsfilecmp() {
macro_rules! validate {
($str1:expr, $str2:expr, $expected_rc:expr) => {
assert_eq!(wcsfilecmp(L!($str1), L!($str2)), $expected_rc)
};
}
// Not using L as suffix because the macro munges error locations.
validate!("", "", Ordering::Equal);
validate!("", "def", Ordering::Less);
validate!("abc", "", Ordering::Greater);
validate!("abc", "def", Ordering::Less);
validate!("abc", "DEF", Ordering::Less);
validate!("DEF", "abc", Ordering::Greater);
validate!("abc", "abc", Ordering::Equal);
validate!("ABC", "ABC", Ordering::Equal);
validate!("AbC", "abc", Ordering::Less);
validate!("AbC", "ABC", Ordering::Greater);
validate!("def", "abc", Ordering::Greater);
validate!("1ghi", "1gHi", Ordering::Greater);
validate!("1ghi", "2ghi", Ordering::Less);
validate!("1ghi", "01ghi", Ordering::Greater);
validate!("1ghi", "02ghi", Ordering::Less);
validate!("01ghi", "1ghi", Ordering::Less);
validate!("1ghi", "002ghi", Ordering::Less);
validate!("002ghi", "1ghi", Ordering::Greater);
validate!("abc01def", "abc1def", Ordering::Less);
validate!("abc1def", "abc01def", Ordering::Greater);
validate!("abc12", "abc5", Ordering::Greater);
validate!("51abc", "050abc", Ordering::Greater);
validate!("abc5", "abc12", Ordering::Less);
validate!("5abc", "12ABC", Ordering::Less);
validate!("abc0789", "abc789", Ordering::Less);
validate!("abc0xA789", "abc0xA0789", Ordering::Greater);
validate!("abc002", "abc2", Ordering::Less);
validate!("abc002g", "abc002", Ordering::Greater);
validate!("abc002g", "abc02g", Ordering::Less);
validate!("abc002.txt", "abc02.txt", Ordering::Less);
validate!("abc005", "abc012", Ordering::Less);
validate!("abc02", "abc002", Ordering::Greater);
validate!("abc002.txt", "abc02.txt", Ordering::Less);
validate!("GHI1abc2.txt", "ghi1abc2.txt", Ordering::Less);
validate!("a0", "a00", Ordering::Less);
validate!("a00b", "a0b", Ordering::Less);
validate!("a0b", "a00b", Ordering::Greater);
validate!("a-b", "azb", Ordering::Greater);
}
}