Wildcard tree walking to only rely on dev, inode to detect changes

When applying a wildcard, it's important to keep track of the files that have
been visited, to avoid symlink loops. Previously fish used a FileId for the
purpose. However FileId also includes richer information like modification time;
thus if a file is modified during wildcard expansion then fish may believe that
the file is different and visit it twice.

The richer information like modification time is important for atomic file
writes but should be ignored for wildcard expansion; just use the (dev, inode)
pair instead.

This also somewhat reduces our reliance on struct stat, but we still need it for
fstatat which Rust does not expose.
This commit is contained in:
Peter Ammon
2024-07-27 18:23:21 -07:00
parent 89794ccfdb
commit 3d816174fd
4 changed files with 41 additions and 28 deletions

View File

@@ -449,7 +449,7 @@ mod expander {
use crate::{
common::scoped_push,
path::append_path_component,
wutil::{dir_iter::DirIter, normalize_path, FileId},
wutil::{dir_iter::DirIter, normalize_path, DevInode},
};
use super::*;
@@ -461,8 +461,8 @@ pub struct WildCardExpander<'e> {
working_directory: &'e wstr,
/// The set of items we have resolved, used to efficiently avoid duplication.
completion_set: HashSet<WString>,
/// The set of file IDs we have visited, used to avoid symlink loops.
visited_files: HashSet<FileId>,
/// The set of (device, inode) pairs we have visited, used to avoid symlink loops.
visited_files: HashSet<DevInode>,
/// Flags controlling expansion.
flags: ExpandFlags,
/// Resolved items get inserted into here. This is transient of course.
@@ -736,12 +736,11 @@ fn expand_intermediate_segment(
continue;
}
let Some(statbuf) = entry.stat() else {
let Some(dev_inode) = entry.dev_inode() else {
continue;
};
let file_id = FileId::from_stat(&statbuf);
if !self.visited_files.insert(file_id) {
if !self.visited_files.insert(dev_inode) {
// Symlink loop! This directory was already visited, so skip it.
continue;
}
@@ -753,7 +752,7 @@ fn expand_intermediate_segment(
// Now remove the visited file. This is for #2414: only directories "beneath" us should be
// considered visited.
self.visited_files.remove(&file_id);
self.visited_files.remove(&dev_inode);
}
}

View File

@@ -1,6 +1,7 @@
use super::wopendir;
use crate::common::{cstr2wcstring, wcs2zstring};
use crate::wchar::{wstr, WString};
use crate::wutil::DevInode;
use libc::{
DT_BLK, DT_CHR, DT_DIR, DT_FIFO, DT_LNK, DT_REG, DT_SOCK, EACCES, EIO, ELOOP, ENAMETOOLONG,
ENODEV, ENOENT, ENOTDIR, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG,
@@ -35,8 +36,8 @@ pub struct DirEntry {
/// inode of this entry.
pub inode: libc::ino_t,
// Stat buff for this entry, or none if not yet computed.
stat: Cell<Option<libc::stat>>,
// Device, inode pair for this entry, or none if not yet computed.
dev_inode: Cell<Option<DevInode>>,
// The type of the entry. This is initially none; it may be populated eagerly via readdir()
// on some filesystems, or later via stat(). If stat() fails, the error is silently ignored
@@ -76,12 +77,12 @@ pub fn is_dir(&self) -> bool {
pub fn is_possible_link(&self) -> Option<bool> {
self.possible_link
}
/// Return the stat buff for this entry, invoking stat() if necessary.
pub fn stat(&self) -> Option<libc::stat> {
if self.stat.get().is_none() {
/// Return the device, inode pair for this entry, invoking stat() if necessary.
pub fn dev_inode(&self) -> Option<DevInode> {
if self.dev_inode.get().is_none() {
self.do_stat();
}
self.stat.get()
self.dev_inode.get()
}
// Reset our fields.
@@ -89,7 +90,7 @@ fn reset(&mut self) {
self.name.clear();
self.inode = unsafe { std::mem::zeroed() };
self.typ.set(None);
self.stat.set(None);
self.dev_inode.set(None);
}
// Populate our stat buffer, and type. Errors are silently ignored.
@@ -104,7 +105,11 @@ fn do_stat(&self) {
let narrow = wcs2zstring(&self.name);
let mut s: libc::stat = unsafe { std::mem::zeroed() };
if unsafe { libc::fstatat(fd, narrow.as_ptr(), &mut s, 0) } == 0 {
self.stat.set(Some(s));
let dev_inode = DevInode {
device: s.st_dev as u64,
inode: s.st_ino as u64,
};
self.dev_inode.set(Some(dev_inode));
self.typ.set(stat_mode_to_entry_type(s.st_mode));
} else {
match errno::errno().0 {
@@ -219,7 +224,7 @@ fn new_impl(path: &wstr, withdot: bool) -> io::Result<Self> {
let entry = DirEntry {
name: WString::new(),
inode: 0,
stat: Cell::new(None),
dev_inode: Cell::new(None),
typ: Cell::new(None),
dirfd: dir.clone(),
possible_link: None,

View File

@@ -6,13 +6,19 @@
use std::os::unix::prelude::*;
/// Struct for representing a file's inode. We use this to detect and avoid symlink loops, among
/// other things. While an inode / dev pair is sufficient to distinguish co-existing files, Linux
/// seems to aggressively re-use inodes, so it cannot determine if a file has been deleted (ABA
/// problem). Therefore we include richer information.
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct FileId {
/// other things.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DevInode {
pub device: u64,
pub inode: u64,
}
/// While an inode / dev pair is sufficient to distinguish co-existing files, Linux
/// seems to aggressively re-use inodes, so it cannot determine if a file has been deleted
/// (ABA problem). Therefore we include richer information to detect file changes.
#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct FileId {
pub dev_inode: DevInode,
pub size: u64,
pub change_seconds: i64,
pub change_nanoseconds: i64,
@@ -26,8 +32,10 @@ pub fn from_md(buf: &Metadata) -> Self {
// on different platforms.
#[allow(clippy::useless_conversion)]
FileId {
device: buf.dev(),
inode: buf.ino(),
dev_inode: DevInode {
device: buf.dev(),
inode: buf.ino(),
},
size: buf.size(),
change_seconds: buf.ctime().into(),
change_nanoseconds: buf.ctime_nsec().into(),
@@ -58,8 +66,7 @@ pub fn from_stat(buf: &libc::stat) -> FileId {
mod_nanoseconds = buf.st_mtimensec as _;
}
FileId {
device,
inode,
dev_inode: DevInode { device, inode },
size,
change_seconds,
change_nanoseconds,
@@ -78,8 +85,10 @@ pub fn older_than(&self, rhs: &FileId) -> bool {
}
pub const INVALID_FILE_ID: FileId = FileId {
device: u64::MAX,
inode: u64::MAX,
dev_inode: DevInode {
device: u64::MAX,
inode: u64::MAX,
},
size: u64::MAX,
change_seconds: i64::MIN,
change_nanoseconds: i64::MIN,

View File

@@ -29,7 +29,7 @@
pub use fish_printf::sprintf;
pub use fileid::{
file_id_for_fd, file_id_for_path, file_id_for_path_narrow, FileId, INVALID_FILE_ID,
file_id_for_fd, file_id_for_path, file_id_for_path_narrow, DevInode, FileId, INVALID_FILE_ID,
};
pub use wcstoi::*;