Introduce dir_iter_t

This introduces dir_iter_t, a new class for iterating the contents of a
directory. dir_iter_t encapsulates the logic that tries to avoid using
stat() to determine the type of a file, when possible.
This commit is contained in:
ridiculousfish
2022-09-25 12:04:33 -07:00
parent ef844a63b9
commit b684f7b076
5 changed files with 369 additions and 3 deletions

View File

@@ -261,6 +261,11 @@ bool contains(const Col &col, const T2 &val) {
return std::find(std::begin(col), std::end(col), val) != std::end(col);
}
template <typename T1, typename T2>
bool contains(std::initializer_list<T1> col, const T2 &val) {
return std::find(std::begin(col), std::end(col), val) != std::end(col);
}
/// Append a vector \p donator to the vector \p receiver.
template <typename T>
void vec_append(std::vector<T> &receiver, std::vector<T> &&donator) {

View File

@@ -1707,6 +1707,93 @@ static void test_is_sorted_by_name() {
static_assert(!is_sorted_by_name(not_sorted), "is_sorted_by_name failure");
}
void test_dir_iter() {
dir_iter_t baditer(L"/definitely/not/a/valid/directory/for/sure");
do_test(!baditer.valid());
do_test(baditer.error() == ENOENT || baditer.error() == EACCES);
do_test(baditer.next() == nullptr);
char t1[] = "/tmp/fish_test_dir_iter.XXXXXX";
const std::string basepathn = mkdtemp(t1);
const wcstring basepath = str2wcstring(basepathn);
auto makepath = [&](const wcstring &s) { return wcs2string(basepath + L"/" + s); };
const wcstring dirname = L"dir";
const wcstring regname = L"reg";
const wcstring reglinkname = L"reglink"; // link to regular file
const wcstring dirlinkname = L"dirlink"; // link to directory
const wcstring badlinkname = L"badlink"; // link to nowhere
const wcstring selflinkname = L"selflink"; // link to self
const wcstring fifoname = L"fifo";
const wcstring_list_t names = {dirname, regname, reglinkname, dirlinkname,
badlinkname, selflinkname, fifoname};
const auto is_link_name = [&](const wcstring &name) -> bool {
return contains({reglinkname, dirlinkname, badlinkname, selflinkname}, name);
};
// Make our different file types
int ret = mkdir(makepath(dirname).c_str(), 0700);
do_test(ret == 0);
ret = open(makepath(regname).c_str(), O_CREAT | O_WRONLY, 0600);
do_test(ret >= 0);
close(ret);
ret = symlink(makepath(regname).c_str(), makepath(reglinkname).c_str());
do_test(ret == 0);
ret = symlink(makepath(dirname).c_str(), makepath(dirlinkname).c_str());
do_test(ret == 0);
ret = symlink("/this/is/an/invalid/path", makepath(badlinkname).c_str());
do_test(ret == 0);
ret = symlink(makepath(selflinkname).c_str(), makepath(selflinkname).c_str());
do_test(ret == 0);
ret = mkfifo(makepath(fifoname).c_str(), 0600);
do_test(ret == 0);
dir_iter_t iter1(basepath);
do_test(iter1.valid());
do_test(iter1.error() == 0);
size_t seen = 0;
while (const auto *entry = iter1.next()) {
seen += 1;
do_test(entry->name != L"." && entry->name != L"..");
do_test(contains(names, entry->name));
maybe_t<dir_entry_type_t> expected{};
if (entry->name == dirname) {
expected = dir_entry_type_t::dir;
} else if (entry->name == regname) {
expected = dir_entry_type_t::reg;
} else if (entry->name == reglinkname) {
expected = dir_entry_type_t::reg;
} else if (entry->name == dirlinkname) {
expected = dir_entry_type_t::dir;
} else if (entry->name == badlinkname) {
expected = none();
} else if (entry->name == selflinkname) {
expected = dir_entry_type_t::lnk;
} else if (entry->name == fifoname) {
expected = dir_entry_type_t::fifo;
} else {
err(L"Unexpected file type");
continue;
}
// Links should never have a fast type if we are resolving them, since we cannot resolve a
// symlink from readdir.
if (is_link_name(entry->name)) {
do_test(entry->fast_type() == none());
}
// If we have a fast type, it should be correct.
do_test(entry->fast_type() == none() || entry->fast_type() == expected);
do_test(entry->check_type() == expected);
}
do_test(seen == names.size());
// Clean up.
for (const auto &name : names) {
(void)unlink(makepath(name).c_str());
}
(void)rmdir(basepathn.c_str());
}
static void test_utility_functions() {
say(L"Testing utility functions");
test_wcsfilecmp();
@@ -6947,6 +7034,7 @@ struct test_comparator_t {
#define TEST_GROUP(x) x
static const test_t s_tests[]{
{TEST_GROUP("utility_functions"), test_utility_functions},
{TEST_GROUP("dir_iter"), test_dir_iter},
{TEST_GROUP("string_split"), test_split_string_tok},
{TEST_GROUP("wwrite_to_fd"), test_wwrite_to_fd},
{TEST_GROUP("env_vars"), test_env_vars},

View File

@@ -440,7 +440,8 @@ static bool wildcard_test_flags_then_complete(const wcstring &filepath, const wc
const bool need_directory = expand_flags & expand_flag::directories_only;
// Fast path: If we need directories, and we already know it is one,
// and we don't need to do anything else, just return it.
// This is a common case for cd completions, and removes the `stat` entirely in case the system supports it.
// This is a common case for cd completions, and removes the `stat` entirely in case the system
// supports it.
if (known_dir && !executables_only && !(expand_flags & expand_flag::gen_descriptions)) {
return wildcard_complete(filename + L'/', wc, const_desc(L""), out, expand_flags,
COMPLETE_NO_SPACE) == wildcard_result_t::match;
@@ -626,7 +627,8 @@ class wildcard_expander_t {
}
void try_add_completion_result(const wcstring &filepath, const wcstring &filename,
const wcstring &wildcard, const wcstring &prefix, bool known_dir) {
const wcstring &wildcard, const wcstring &prefix,
bool known_dir) {
// This function is only for the completions case.
assert(this->flags & expand_flag::for_completions);
@@ -738,7 +740,8 @@ void wildcard_expander_t::expand_trailing_slash(const wcstring &base_dir, const
// for example, cd ../<tab> should complete "without resolving symlinks".
path = normalize_path(path);
}
while (wreaddir_resolving(dir, path, next, need_dir ? &is_dir : nullptr) && !interrupted_or_overflowed()) {
while (wreaddir_resolving(dir, path, next, need_dir ? &is_dir : nullptr) &&
!interrupted_or_overflowed()) {
if (need_dir && !is_dir) continue;
if (!next.empty() && next.at(0) != L'.') {
this->try_add_completion_result(base_dir + next, next, L"", prefix, is_dir);

View File

@@ -155,6 +155,179 @@ DIR *wopendir(const wcstring &name) {
return opendir(tmp.c_str());
}
#ifdef HAVE_STRUCT_DIRENT_D_TYPE
static maybe_t<dir_entry_type_t> dirent_type_to_entry_type(uint8_t dt) {
switch (dt) {
case DT_FIFO:
return dir_entry_type_t::fifo;
case DT_CHR:
return dir_entry_type_t::chr;
case DT_DIR:
return dir_entry_type_t::dir;
case DT_BLK:
return dir_entry_type_t::blk;
case DT_REG:
return dir_entry_type_t::reg;
case DT_LNK:
return dir_entry_type_t::lnk;
case DT_SOCK:
return dir_entry_type_t::sock;
case DT_WHT:
return dir_entry_type_t::whiteout;
case DT_UNKNOWN:
default:
return none();
}
}
#endif
static maybe_t<dir_entry_type_t> stat_mode_to_entry_type(mode_t m) {
switch (m & S_IFMT) {
case S_IFIFO:
return dir_entry_type_t::fifo;
case S_IFCHR:
return dir_entry_type_t::chr;
case S_IFDIR:
return dir_entry_type_t::dir;
case S_IFBLK:
return dir_entry_type_t::blk;
case S_IFREG:
return dir_entry_type_t::reg;
case S_IFLNK:
return dir_entry_type_t::lnk;
case S_IFSOCK:
return dir_entry_type_t::sock;
#if defined(S_IFWHT)
case S_IFWHT:
return dir_entry_type_t::whiteout;
#endif
default:
return none();
}
}
dir_iter_t::entry_t::entry_t() = default;
dir_iter_t::entry_t::~entry_t() = default;
void dir_iter_t::entry_t::reset() {
this->name.clear();
this->inode = {};
this->type_.reset();
this->stat_.reset();
}
maybe_t<dir_entry_type_t> dir_iter_t::entry_t::check_type() const {
// Call stat if needed to populate our type, swallowing errors.
if (!this->type_) {
this->do_stat();
}
return this->type_;
}
const maybe_t<struct stat> &dir_iter_t::entry_t::stat() const {
if (!stat_) {
(void)this->do_stat();
}
return stat_;
}
void dir_iter_t::entry_t::do_stat() const {
// We want to set both our type and our stat buffer.
// If we follow symlinks and stat() errors with a bad symlink, set the type to link, but do not
// populate the stat buffer.
if (this->dirfd_ < 0) {
return;
}
std::string narrow = wcs2string(this->name);
struct stat s {};
if (fstatat(this->dirfd_, narrow.c_str(), &s, 0) == 0) {
this->stat_ = s;
this->type_ = stat_mode_to_entry_type(s.st_mode);
} else {
switch (errno) {
case ELOOP:
this->type_ = dir_entry_type_t::lnk;
break;
case EACCES:
case EIO:
case ENOENT:
case ENOTDIR:
case ENAMETOOLONG:
// These are "expected" errors.
this->type_ = none();
break;
default:
wperror(L"fstatat");
break;
}
}
}
dir_iter_t::dir_iter_t(const wcstring &path) {
dir_ = wopendir(path);
if (!dir_) {
error_ = errno;
return;
}
entry_.dirfd_ = dirfd(dir_);
}
dir_iter_t::dir_iter_t(dir_iter_t &&rhs) {
// Steal the fields; ensure rhs no longer has FILE* and forgets its fd.
this->dir_ = rhs.dir_;
this->error_ = rhs.error_;
this->entry_ = std::move(rhs.entry_);
rhs.dir_ = nullptr;
rhs.entry_.dirfd_ = -1;
}
dir_iter_t &dir_iter_t::operator=(dir_iter_t &&rhs) {
if (this->dir_) {
(void)closedir(this->dir_);
}
this->dir_ = rhs.dir_;
this->error_ = rhs.error_;
this->entry_ = std::move(rhs.entry_);
rhs.dir_ = nullptr;
rhs.entry_.dirfd_ = -1;
return *this;
}
dir_iter_t::~dir_iter_t() {
if (dir_) {
(void)closedir(dir_);
}
}
const dir_iter_t::entry_t *dir_iter_t::next() {
if (!dir_) {
return nullptr;
}
errno = 0;
struct dirent *dent = readdir(dir_);
if (!dent) {
error_ = errno;
return nullptr;
}
// Skip . and ..
if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
return next();
}
entry_.reset();
entry_.name = str2wcstring(dent->d_name);
entry_.inode = dent->d_ino;
#ifdef HAVE_STRUCT_DIRENT_D_TYPE
auto type = dirent_type_to_entry_type(dent->d_type);
// Do not store symlinks as we will need to resolve them.
if (type != dir_entry_type_t::lnk) {
entry_.type_ = type;
}
#endif
return &entry_;
}
dir_t::dir_t(const wcstring &path) {
const cstring tmp = wcs2string(path);
this->dir = opendir(tmp.c_str());

View File

@@ -7,6 +7,7 @@
#include <dirent.h>
#include <stddef.h>
#include <stdint.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifdef __APPLE__
// This include is required on macOS 10.10 for locale_t
@@ -163,6 +164,102 @@ struct file_id_t {
int compare_file_id(const file_id_t &rhs) const;
};
/// Types of files that may be in a directory.
enum class dir_entry_type_t : uint8_t {
fifo = 1, // FIFO file
chr, // character device
dir, // directory
blk, // block device
reg, // regular file
lnk, // symlink
sock, // socket
whiteout, // whiteout (from BSD)
};
/// Class for iterating over a directory, wrapping readdir().
/// This allows enumerating the contents of a directory, exposing the file type if the filesystem
/// itself exposes that from readdir(). stat() is incurred only if necessary: if the entry is a
/// symlink, or if the caller asks for the stat buffer.
/// Symlinks are followed.
class dir_iter_t : noncopyable_t {
public:
struct entry_t;
/// Open a directory at a given path. On failure, \p error() will return the error code.
/// Note opendir is guaranteed to set close-on-exec by POSIX (hooray).
explicit dir_iter_t(const wcstring &path);
/// Advance this iterator.
/// \return a pointer to the entry, or nullptr if the entry is finished, or an error occurred.
/// The returned pointer is only valid until the next call to next().
const entry_t *next();
/// \return the errno value for the last error, or 0 if none.
int error() const { return error_; }
/// \return if we are valid: successfully opened a directory.
bool valid() const { return dir_ != nullptr; }
/// \return the underlying file descriptor, or -1 if invalid.
int fd() const { return dir_ ? dirfd(dir_) : -1; }
~dir_iter_t();
dir_iter_t(dir_iter_t &&);
dir_iter_t &operator=(dir_iter_t &&);
/// An entry returned by dir_iter_t.
struct entry_t : noncopyable_t {
/// File name of this entry.
wcstring name{};
/// inode of this entry.
ino_t inode{};
/// \return the type of this entry if it is already available, otherwise none().
maybe_t<dir_entry_type_t> fast_type() const { return type_; }
/// \return the type of this entry, falling back to stat() if necessary.
/// If stat() fails because the file has disappeared, this will return none().
/// If stat() fails because of a broken symlink, this will return type lnk.
maybe_t<dir_entry_type_t> check_type() const;
/// \return whether this is a directory. This may call stat().
bool is_dir() const { return check_type() == dir_entry_type_t::dir; }
/// \return the stat buff for this entry, invoking stat() if necessary.
const maybe_t<struct stat> &stat() const;
private:
// Reset our fields.
void reset();
// Populate our stat buffer, and type. Errors are silently ignored.
void do_stat() const;
// Stat buff for this entry, or none if not yet computed.
mutable maybe_t<struct stat> stat_{};
// The type of the entry. This is initially none; it may be populated eagerly via readdir()
// on some filesystems, or later via stat(). If stat() fails, the error is silently ignored
// and the type is left as none(). Note this is an unavoidable race.
mutable maybe_t<dir_entry_type_t> type_{};
// fd of the DIR*, used for fstatat().
int dirfd_{-1};
entry_t();
~entry_t();
entry_t(entry_t &&) = default;
entry_t &operator=(entry_t &&) = default;
friend class dir_iter_t;
};
private:
DIR *dir_{nullptr};
int error_{0};
entry_t entry_;
};
/// RAII wrapper for DIR*
struct dir_t {
DIR *dir;