mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-05-30 03:01:15 -03:00
Overhaul job and terminal control
* Instead of reaping all child processes when we receive a SIGCHLD, try reaping only processes belonging to process groups from fully- constructed jobs, which should eliminate the need for the keepalive process entirely (WSL's lack of zombies not withstanding) as now completed processes are not reaped until the job has been fully constructed (i.e. all processes launched), which means their process group should still be around for new processes to join. * When `tcgetpgrp()` calls return 0, attempt to `tcsetpgrp()` before invoking failure handling code. * When forking a builtin and not running interactively, do not bail if unable to set/restore terminal attributes. Fixes #4178. Fixes #3805. Fixes #5210.
This commit is contained in:
37
src/exec.cpp
37
src/exec.cpp
@@ -985,14 +985,11 @@ static bool exec_process_in_job(parser_t &parser, process_t *p, job_t *j,
|
||||
}
|
||||
|
||||
void exec_job(parser_t &parser, job_t *j) {
|
||||
assert(j != nullptr && "null job_t passed to exec_job!");
|
||||
|
||||
// Set to true if something goes wrong while exec:ing the job, in which case the cleanup code
|
||||
// will kick in.
|
||||
bool exec_error = false;
|
||||
bool needs_keepalive = false;
|
||||
process_t keepalive;
|
||||
|
||||
CHECK(j, );
|
||||
CHECK_BLOCK();
|
||||
|
||||
// If fish was invoked with -n or --no-execute, then no_exec will be set and we do nothing.
|
||||
if (no_exec) {
|
||||
@@ -1024,10 +1021,9 @@ void exec_job(parser_t &parser, job_t *j) {
|
||||
// We may have block IOs that conflict with fd redirections. For example, we may have a command
|
||||
// with a redireciton like <&3; we may also have chosen 3 as the fd for our pipe. Ensure we have
|
||||
// no conflicts.
|
||||
for (size_t i = 0; i < all_ios.size(); i++) {
|
||||
io_data_t *io = all_ios.at(i).get();
|
||||
for (const auto io : all_ios) {
|
||||
if (io->io_mode == IO_BUFFER) {
|
||||
io_buffer_t *io_buffer = static_cast<io_buffer_t *>(io);
|
||||
auto *io_buffer = static_cast<io_buffer_t *>(io.get());
|
||||
if (!io_buffer->avoid_conflicts_with_io_chain(all_ios)) {
|
||||
// We could not avoid conflicts, probably due to fd exhaustion. Mark an error.
|
||||
exec_error = true;
|
||||
@@ -1037,21 +1033,17 @@ void exec_job(parser_t &parser, job_t *j) {
|
||||
}
|
||||
}
|
||||
|
||||
// See if we need to create a group keepalive process. This is a process that we create to make
|
||||
// sure that the process group doesn't die accidentally, and is often needed when a
|
||||
// builtin/block/function is inside a pipeline, since that usually means we have to wait for one
|
||||
// program to exit before continuing in the pipeline, causing the group leader to exit.
|
||||
if (j->get_flag(JOB_CONTROL) && !exec_error) {
|
||||
// When running under WSL, create a keepalive process unconditionally if our first
|
||||
// process is external as WSL does not permit joining the pgrp of an exited process.
|
||||
// Fixed in Windows 10 17713 and later, but keep this hack around until an RTM build is
|
||||
// released with that resolution. See #4676, #5210, https://github.com/Microsoft/WSL/issues/1353,
|
||||
// and https://github.com/Microsoft/WSL/issues/2786.
|
||||
process_t keepalive;
|
||||
bool needs_keepalive = false;
|
||||
if (is_windows_subsystem_for_linux() && j->get_flag(JOB_CONTROL) && !exec_error) {
|
||||
for (const process_ptr_t &p : j->processes) {
|
||||
if (p->type != EXTERNAL && (!p->is_last_in_job || !p->is_first_in_job)) {
|
||||
needs_keepalive = true;
|
||||
break;
|
||||
}
|
||||
// When running under WSL, create a keepalive process unconditionally if our first process is external.
|
||||
// This is because WSL does not permit joining the pgrp of an exited process.
|
||||
// (see https://github.com/Microsoft/WSL/issues/2786), also fish PR #4676
|
||||
if (is_windows_subsystem_for_linux() && j->processes.front()->type == EXTERNAL
|
||||
&& !p->is_first_in_job) { //but not if it's the only process
|
||||
// but not if it's the only process
|
||||
if (j->processes.front()->type == EXTERNAL && !p->is_first_in_job) {
|
||||
needs_keepalive = true;
|
||||
break;
|
||||
}
|
||||
@@ -1105,7 +1097,6 @@ void exec_job(parser_t &parser, job_t *j) {
|
||||
kill(keepalive.pid, SIGKILL);
|
||||
}
|
||||
|
||||
debug(3, L"Job is constructed");
|
||||
j->set_flag(JOB_CONSTRUCTED, true);
|
||||
if (!j->get_flag(JOB_FOREGROUND)) {
|
||||
proc_last_bg_pid = j->pgid;
|
||||
|
||||
@@ -389,7 +389,6 @@ static bool input_mapping_is_match(const input_mapping_t &m) {
|
||||
const wcstring &str = m.seq;
|
||||
|
||||
assert(str.size() > 0 && "zero-length input string passed to input_mapping_is_match!");
|
||||
debug(4, L"trying to match mapping %ls", escape_string(m.seq.c_str(), ESCAPE_ALL).c_str());
|
||||
|
||||
bool timed = false;
|
||||
for (size_t i = 0; i < str.size(); ++i) {
|
||||
|
||||
61
src/proc.cpp
61
src/proc.cpp
@@ -264,6 +264,12 @@ int job_signal(job_t *j, int signal) {
|
||||
return res;
|
||||
}
|
||||
|
||||
static void mark_job_complete(const job_t *j) {
|
||||
for (auto &p : j->processes) {
|
||||
p->completed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Store the status of the process pid that was returned by waitpid.
|
||||
static void mark_process_status(process_t *p, int status) {
|
||||
// debug( 0, L"Process %ls %ls", p->argv[0], WIFSTOPPED (status)?L"stopped":(WIFEXITED( status
|
||||
@@ -362,9 +368,12 @@ typedef unsigned int process_generation_count_t;
|
||||
/// the SIGCHLD signal handler, and therefore does not need atomics or locks.
|
||||
static volatile process_generation_count_t s_sigchld_generation_cnt = 0;
|
||||
|
||||
/// If we have received a SIGCHLD signal, process any children. If await is false, this returns
|
||||
/// immediately if no SIGCHLD has been received. If await is true, this waits for one. Returns true
|
||||
/// if something was processed. This returns the number of children processed, or -1 on error.
|
||||
/// If we have received a SIGCHLD signal, reap exited children of fully-constructed jobs. We cannot
|
||||
/// reap **all** children (as in, `waitpid(-1, ...)`) since that may reap a pgrp leader that has
|
||||
/// exited but has another process in its job that has yet to launch and join its pgrp (#5219).
|
||||
/// If await is false, this returns immediately if no SIGCHLD has been received. If await is true,
|
||||
/// this waits for one. Returns true if something was processed.
|
||||
/// This returns the number of children processed, or -1 on error.
|
||||
static int process_mark_finished_children(bool wants_await) {
|
||||
ASSERT_IS_MAIN_THREAD();
|
||||
|
||||
@@ -399,12 +408,39 @@ static int process_mark_finished_children(bool wants_await) {
|
||||
}
|
||||
|
||||
int status = -1;
|
||||
pid_t pid = waitpid(-1, &status, options);
|
||||
pid_t pid = 0;
|
||||
|
||||
bool any_jobs = false;
|
||||
// Reap only processes belonging to fully-constructed jobs to prevent reaping of processes
|
||||
// before other processes in the same process group have a chance to join their pgrp.
|
||||
job_t *j; job_iterator_t jobs;
|
||||
while ((j = jobs.next())) {
|
||||
any_jobs = true;
|
||||
if (j->pgid == -2 || !j->get_flag(JOB_CONSTRUCTED)) {
|
||||
// Job has not been fully constructed yet
|
||||
debug(4, "Skipping iteration of not fully constructed job %d", j->pgid);
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(j->pgid != 0);
|
||||
debug(4, "Waiting on processes from job %d", j->pgid);
|
||||
pid = waitpid(-1 * j->pgid, &status, options);
|
||||
if (pid != 0) {
|
||||
// We'll handle this below
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_jobs) {
|
||||
debug(4, "No jobs found to wait for!");
|
||||
}
|
||||
|
||||
if (pid > 0) {
|
||||
// We got a valid pid.
|
||||
handle_child_status(pid, status);
|
||||
processed_count += 1;
|
||||
} else if (pid == 0) {
|
||||
continue;
|
||||
}
|
||||
else if (pid == 0) {
|
||||
// No ready-and-waiting children, we're done.
|
||||
break;
|
||||
} else {
|
||||
@@ -789,7 +825,7 @@ bool terminal_give_to_job(const job_t *j, bool cont) {
|
||||
// http://curiousthing.org/sigttin-sigttou-deep-dive-linux In all cases, our goal here was just
|
||||
// to hand over control of the terminal to this process group, which is a no-op if it's already
|
||||
// been done.
|
||||
if (tcgetpgrp(STDIN_FILENO) == j->pgid) {
|
||||
if (j->pgid == -2 || tcgetpgrp(STDIN_FILENO) == j->pgid) {
|
||||
debug(4, L"Process group %d already has control of terminal\n", j->pgid);
|
||||
} else {
|
||||
debug(4,
|
||||
@@ -806,6 +842,7 @@ bool terminal_give_to_job(const job_t *j, bool cont) {
|
||||
// guarantee the process isn't going to exit while we wait (which would cause us to possibly
|
||||
// block indefinitely).
|
||||
while (tcsetpgrp(STDIN_FILENO, j->pgid) != 0) {
|
||||
debug(3, "tcsetpgrp failed");
|
||||
bool pgroup_terminated = false;
|
||||
if (errno == EINTR) {
|
||||
; // Always retry on EINTR, see comments in tcsetattr EINTR code below.
|
||||
@@ -844,7 +881,9 @@ bool terminal_give_to_job(const job_t *j, bool cont) {
|
||||
// the group terminated, and didn't need to access the terminal, otherwise it would
|
||||
// have hung waiting for terminal IO (SIGTTIN). We can ignore this.
|
||||
debug(3, L"tcsetpgrp called but process group %d has terminated.\n", j->pgid);
|
||||
break;
|
||||
mark_job_complete(j);
|
||||
signal_unblock();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -971,19 +1010,21 @@ void job_continue(job_t *j, bool cont) {
|
||||
|
||||
// Wait for job to report.
|
||||
while (!reader_exit_forced() && !job_is_stopped(j) && !job_is_completed(j)) {
|
||||
// debug( 1, L"select_try()" );
|
||||
switch (select_try(j)) {
|
||||
case 1: {
|
||||
// debug(1, L"select_try() 1" );
|
||||
read_try(j);
|
||||
process_mark_finished_children(false);
|
||||
break;
|
||||
}
|
||||
case 0: {
|
||||
// debug(1, L"select_try() 0" );
|
||||
// No FDs are ready. Look for finished processes.
|
||||
process_mark_finished_children(false);
|
||||
process_mark_finished_children(true);
|
||||
break;
|
||||
}
|
||||
case -1: {
|
||||
// debug(1, L"select_try() -1" );
|
||||
// If there is no funky IO magic, we can use waitpid instead of handling
|
||||
// child deaths through signals. This gives a rather large speed boost (A
|
||||
// factor 3 startup time improvement on my 300 MHz machine) on short-lived
|
||||
|
||||
@@ -1742,8 +1742,23 @@ static void reader_interactive_init() {
|
||||
// Eventually we just give up and assume we're orphaend.
|
||||
for (unsigned long loop_count = 0;; loop_count++) {
|
||||
pid_t owner = tcgetpgrp(STDIN_FILENO);
|
||||
shell_pgid = getpgrp();
|
||||
// 0 is a valid return code from `tcgetpgrp()` under at least FreeBSD and testing
|
||||
// indicates that a subsequent call to `tcsetpgrp()` will succeed. 0 is the
|
||||
// pid of the top-level kernel process, so I'm not sure if this means ownership
|
||||
// of the terminal has gone back to the kernel (i.e. it's not owned) or if it is
|
||||
// just an "invalid" pid for all intents and purposes.
|
||||
if (owner == 0) {
|
||||
tcsetpgrp(STDIN_FILENO, shell_pgid);
|
||||
// Since we expect the above to work, call `tcgetpgrp()` immediately to
|
||||
// avoid a second pass through this loop.
|
||||
owner = tcgetpgrp(STDIN_FILENO);
|
||||
}
|
||||
if (owner == -1 && errno == ENOTTY) {
|
||||
if (!is_interactive_session) {
|
||||
// It's OK if we're not able to take control of the terminal. We handle
|
||||
// the fallout from this in a few other places.
|
||||
break;
|
||||
}
|
||||
// No TTY, cannot be interactive?
|
||||
redirect_tty_output();
|
||||
debug(1, _(L"No TTY for interactive shell (tcgetpgrp failed)"));
|
||||
@@ -2437,10 +2452,12 @@ const wchar_t *reader_readline(int nchars) {
|
||||
// Get the current terminal modes. These will be restored when the function returns.
|
||||
if (tcgetattr(STDIN_FILENO, &old_modes) == -1 && errno == EIO) redirect_tty_output();
|
||||
// Set the new modes.
|
||||
if (is_interactive_session) {
|
||||
if (tcsetattr(0, TCSANOW, &shell_modes) == -1) {
|
||||
if (errno == EIO) redirect_tty_output();
|
||||
wperror(L"tcsetattr");
|
||||
}
|
||||
}
|
||||
|
||||
while (!finished && !data->end_loop) {
|
||||
if (0 < nchars && (size_t)nchars <= data->command_line.size()) {
|
||||
@@ -3317,7 +3334,9 @@ const wchar_t *reader_readline(int nchars) {
|
||||
}
|
||||
|
||||
if (!reader_exit_forced()) {
|
||||
if (tcsetattr(0, TCSANOW, &old_modes) == -1) {
|
||||
// The order of the two conditions below is important. Try to restore the mode
|
||||
// in all cases, but only complain if interactive.
|
||||
if (tcsetattr(0, TCSANOW, &old_modes) == -1 && is_interactive_session) {
|
||||
if (errno == EIO) redirect_tty_output();
|
||||
wperror(L"tcsetattr"); // return to previous mode
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user