diff --git a/src/builtin_disown.cpp b/src/builtin_disown.cpp index 70a91baa0..3c9822df7 100644 --- a/src/builtin_disown.cpp +++ b/src/builtin_disown.cpp @@ -24,8 +24,9 @@ static int disown_job(const wchar_t *cmd, parser_t &parser, io_streams_t &stream } // Stopped disowned jobs must be manually signaled; explain how to do so. + auto pgid = j->get_pgid(); if (j->is_stopped()) { - killpg(j->pgid, SIGCONT); + if (pgid) killpg(*pgid, SIGCONT); const wchar_t *fmt = _(L"%ls: job %d ('%ls') was stopped and has been signalled to continue.\n"); streams.err.append_format(fmt, cmd, j->job_id(), j->command_wcstr()); @@ -35,7 +36,7 @@ static int disown_job(const wchar_t *cmd, parser_t &parser, io_streams_t &stream // within the context of a subjob which will cause the parent job to crash in exec_job(). // Instead, we set a flag and the parser removes the job from the jobs list later. j->mut_flags().disown_requested = true; - add_disowned_pgid(j->pgid); + if (pgid) add_disowned_pgid(*pgid); return STATUS_CMD_OK; } diff --git a/src/builtin_jobs.cpp b/src/builtin_jobs.cpp index 111887d08..9ac98c1bd 100644 --- a/src/builtin_jobs.cpp +++ b/src/builtin_jobs.cpp @@ -53,6 +53,11 @@ static int cpu_use(const job_t *j) { /// Print information about the specified job. static void builtin_jobs_print(const job_t *j, int mode, int header, io_streams_t &streams) { + int pgid = INVALID_PID; + if (auto job_pgid = j->get_pgid()) { + pgid = *job_pgid; + } + switch (mode) { case JOBS_PRINT_NOTHING: { break; @@ -67,7 +72,7 @@ static void builtin_jobs_print(const job_t *j, int mode, int header, io_streams_ streams.out.append(_(L"State\tCommand\n")); } - streams.out.append_format(L"%d\t%d\t", j->job_id(), j->pgid); + streams.out.append_format(L"%d\t%d\t", j->job_id(), pgid); if (have_proc_stat()) { streams.out.append_format(L"%d%%\t", cpu_use(j)); @@ -84,7 +89,7 @@ static void builtin_jobs_print(const job_t *j, int mode, int header, io_streams_ // Print table header before first job. streams.out.append(_(L"Group\n")); } - streams.out.append_format(L"%d\n", j->pgid); + streams.out.append_format(L"%d\n", pgid); break; } case JOBS_PRINT_PID: { diff --git a/src/builtin_wait.cpp b/src/builtin_wait.cpp index c02edae96..298f942c0 100644 --- a/src/builtin_wait.cpp +++ b/src/builtin_wait.cpp @@ -19,7 +19,7 @@ /// doesn't work properly, so use this function in wait command. static job_id_t get_job_id_from_pid(pid_t pid, const parser_t &parser) { for (const auto &j : parser.jobs()) { - if (j->pgid == pid) { + if (j->get_pgid() == maybe_t{pid}) { return j->job_id(); } // Check if the specified pid is a child process of the job. diff --git a/src/exec.cpp b/src/exec.cpp index 2a9614a86..2e51d0297 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -202,13 +202,13 @@ static void internal_exec(env_stack_t &vars, job_t *j, const io_chain_t &block_i } /// If our pgroup assignment mode wants us to use the first external proc, then apply it here. -static void maybe_assign_pgid_from_child(const std::shared_ptr &j, pid_t child_pid) { - // If our assignment mode is the first process, then assign it. - if (j->pgid == INVALID_PID && - j->pgroup_provenance == pgroup_provenance_t::first_external_proc) { +/// \returns the job's pgid, which should always be set to something valid after this call. +static pid_t maybe_assign_pgid_from_child(const std::shared_ptr &j, pid_t child_pid) { + if (j->pgroup_provenance == pgroup_provenance_t::first_external_proc && + !j->job_tree->get_pgid()) { j->job_tree->set_pgid(child_pid); - j->pgid = child_pid; } + return *j->job_tree->get_pgid(); } /// Construct an internal process for the process p. In the background, write the data \p outdata to @@ -338,15 +338,20 @@ bool blocked_signals_for_job(const job_t &job, sigset_t *sigmask) { static bool fork_child_for_process(const std::shared_ptr &job, process_t *p, const dup2_list_t &dup2s, const char *fork_type, const std::function &child_action) { + assert(!job->job_tree->is_placeholder() && + "Placeholders are for internal functions, they should never fork"); pid_t pid = execute_fork(); if (pid == 0) { // This is the child process. Setup redirections, print correct output to // stdout and stderr, and then exit. - maybe_t new_termowner{}; p->pid = getpid(); - child_set_group(job.get(), p); - child_setup_process(job->should_claim_terminal() ? job->pgid : INVALID_PID, *job, true, - dup2s); + pid_t pgid = maybe_assign_pgid_from_child(job, p->pid); + + // The child attempts to join the pgroup. + if (int err = execute_setpgid(p->pid, pgid, false /* not parent */)) { + report_setpgid_error(err, pgid, job.get(), p); + } + child_setup_process(job->should_claim_terminal() ? pgid : INVALID_PID, *job, true, dup2s); child_action(); DIE("Child process returned control to fork_child lambda!"); } @@ -364,8 +369,13 @@ static bool fork_child_for_process(const std::shared_ptr &job, process_t p->argv0()); p->pid = pid; - maybe_assign_pgid_from_child(job, p->pid); - set_child_group(job.get(), p->pid); + pid_t pgid = maybe_assign_pgid_from_child(job, p->pid); + + // The parent attempts to send the child to its pgroup. + // EACCESS is an expected benign error as the child may have called exec(). + if (int err = execute_setpgid(p->pid, pgid, true /* is parent */)) { + if (err != EACCES) report_setpgid_error(err, pgid, job.get(), p); + } terminal_maybe_give_to_job(job.get(), false); return true; } @@ -576,12 +586,13 @@ static bool exec_external_command(parser_t &parser, const std::shared_ptr // these are all things do_fork() takes care of normally (for forked processes): p->pid = pid; - maybe_assign_pgid_from_child(j, p->pid); + pid_t pgid = maybe_assign_pgid_from_child(j, p->pid); + // posix_spawn should in principle set the pgid before returning. // In glibc, posix_spawn uses fork() and the pgid group is set on the child side; // therefore the parent may not have seen it be set yet. // Ensure it gets set. See #4715, also https://github.com/Microsoft/WSL/issues/2997. - set_child_group(j.get(), p->pid); + execute_setpgid(p->pid, pgid, true /* is parent */); terminal_maybe_give_to_job(j.get(), false); } else #endif @@ -942,17 +953,12 @@ bool exec_job(parser_t &parser, const shared_ptr &j, const io_chain_t &bl const bool reclaim_foreground_pgrp = (tcgetpgrp(STDIN_FILENO) == pgrp); // Perhaps we know our pgroup already. - assert(j->pgid == INVALID_PID && "Should not yet have a pid."); switch (j->pgroup_provenance) { case pgroup_provenance_t::lineage: { - auto pgid = j->job_tree->get_pgid(); - assert(pgid && *pgid != INVALID_PID && "Should have valid pgid"); - j->pgid = *pgid; break; } case pgroup_provenance_t::fish_itself: - j->pgid = pgrp; // TODO: should not need this 'if' here. Rationalize this. if (! j->job_tree->is_placeholder()) { j->job_tree->set_pgid(pgrp); @@ -1050,11 +1056,13 @@ bool exec_job(parser_t &parser, const shared_ptr &j, const io_chain_t &bl } FLOGF(exec_job_exec, L"Executed job %d from command '%ls' with pgrp %d", j->job_id(), - j->command_wcstr(), j->pgid); + j->command_wcstr(), j->get_pgid() ? *j->get_pgid() : -2); j->mark_constructed(); if (!j->is_foreground()) { - parser.vars().set_one(L"last_pid", ENV_GLOBAL, to_string(j->pgid)); + auto pgid = j->get_pgid(); + assert(pgid.has_value() && "Backgroudn jobs should always have a pgroup"); + parser.vars().set_one(L"last_pid", ENV_GLOBAL, to_string(*pgid)); } if (exec_error) { diff --git a/src/parser.cpp b/src/parser.cpp index 302612199..f62e76277 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -613,18 +613,10 @@ const job_t *parser_t::job_get(job_id_t id) const { } job_t *parser_t::job_get_from_pid(pid_t pid) const { - pid_t pgid = getpgid(pid); - - if (pgid == -1) { - return nullptr; - } - for (const auto &job : jobs()) { - if (job->pgid == pgid) { - for (const process_ptr_t &p : job->processes) { - if (p->pid == pid) { - return job.get(); - } + for (const process_ptr_t &p : job->processes) { + if (p->pid == pid) { + return job.get(); } } } diff --git a/src/postfork.cpp b/src/postfork.cpp index 29990a16d..1a9b29466 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -41,7 +41,7 @@ static char *get_interpreter(const char *command, char *buffer, size_t buff_size); /// Report the error code \p err for a failed setpgid call. -static void report_setpgid_error(int err, const job_t *j, const process_t *p) { +void report_setpgid_error(int err, pid_t desired_pgid, const job_t *j, const process_t *p) { char pid_buff[128]; char job_id_buff[128]; char getpgid_buff[128]; @@ -52,7 +52,7 @@ static void report_setpgid_error(int err, const job_t *j, const process_t *p) { format_long_safe(pid_buff, p->pid); format_long_safe(job_id_buff, j->job_id()); format_long_safe(getpgid_buff, getpgid(p->pid)); - format_long_safe(job_pgid_buff, j->pgid); + format_long_safe(job_pgid_buff, desired_pgid); narrow_string_safe(argv0, p->argv0()); narrow_string_safe(command, j->command_wcstr()); @@ -69,20 +69,22 @@ static void report_setpgid_error(int err, const job_t *j, const process_t *p) { safe_perror("setpgid"); } -/// Called only by the child to set its own process group (possibly creating a new group in the -/// process if it is the first in a JOB_CONTROL job. -/// Returns true on success, false on failure. -bool child_set_group(job_t *j, process_t *p) { - if (j->pgid == INVALID_PID) { - assert(j->pgroup_provenance == pgroup_provenance_t::first_external_proc && - "pgroup should only be unset if we need to become the leader"); - j->pgid = p->pid; - } - // Put a cap on how many times we retry so we are never stuck here. - for (int i = 0; i < 100; i++) { - if (setpgid(p->pid, j->pgid) == 0) { - return true; - } else if (errno == EPERM) { +int execute_setpgid(pid_t pid, pid_t pgroup, bool is_parent) { + // Historically we have looped here to support WSL. + unsigned eperm_count = 0; + for (;;) { + if (setpgid(pid, pgroup) == 0) { + return 0; + } + int err = errno; + if (err == EACCES && is_parent) { + // We are the parent process and our child has called exec(). + // This is an unavoidable benign race. + return 0; + } else if (err == EINTR) { + // Paranoia. + continue; + } else if (err == EPERM && eperm_count++ < 100) { // The setpgid(2) man page says that EPERM is returned only if attempts are made // to move processes into groups across session boundaries (which can never be // the case in fish, anywhere) or to change the process group ID of a session @@ -90,49 +92,9 @@ bool child_set_group(job_t *j, process_t *p) { // we see the same with tcsetpgrp(2) in other places and it disappears on retry. debug_safe(2, "setpgid(2) returned EPERM. Retrying"); continue; - } else if (errno == EINTR) { - // Retry on EINTR. - continue; - } else { - break; } + return err; } - report_setpgid_error(errno, j, p); - return false; -} - -/// Called only by the parent only after a child forks and successfully calls child_set_group, -/// guaranteeing the job control process group has been created and that the child belongs to the -/// correct process group. Here we can update our job_t structure to reflect the correct process -/// group in the case of JOB_CONTROL, and we can give the new process group control of the terminal -/// if it's to run in the foreground. -bool set_child_group(job_t *j, pid_t child_pid) { - if (j->wants_job_control()) { - assert(j->pgid != INVALID_PID && - "set_child_group called with JOB_CONTROL before job pgid determined!"); - - // The parent sets the child's group. This incurs the well-known unavoidable race with the - // child exiting, so ignore ESRCH and EPERM (in case the pid was recycled). - // Additionally ignoring EACCES. See #4715 and #4884. - if (setpgid(child_pid, j->pgid) < 0) { - if (errno != ESRCH && errno != EPERM && errno != EACCES) { - safe_perror("setpgid"); - return false; - } else { - // Just in case it's ever not right to ignore the setpgid call, (i.e. if this - // ever leads to a terminal hang due if both this setpgid call AND posix_spawn's - // internal setpgid calls failed), write to the debug log so a future developer - // doesn't go crazy trying to track this down. - FLOGF(proc_pgroup, - "Error %d while calling setpgid for child %d (probably harmless)", errno, - child_pid); - } - } - } else { - j->pgid = getpgrp(); - } - - return true; } int child_setup_process(pid_t new_termowner, const job_t &job, bool is_forked, @@ -240,17 +202,17 @@ bool fork_actions_make_spawn_properties(posix_spawnattr_t *attr, return false; } - bool should_set_process_group_id = false; - int desired_process_group_id = 0; - if (j->wants_job_control()) { - should_set_process_group_id = true; - - // set_child_group puts each job into its own process group - // do the same here if there is no PGID yet (i.e. PGID == -2) - desired_process_group_id = j->pgid; - if (desired_process_group_id == INVALID_PID) { - desired_process_group_id = 0; - } + // desired_pgid tracks the pgroup for the process. If it is none, the pgroup is left unchanged. + // If it is zero, create a new pgroup from the pid. If it is >0, join that pgroup. + maybe_t desired_pgid = none(); + if (auto job_pgid = j->job_tree->get_pgid()) { + desired_pgid = *job_pgid; + } else { + assert(j->pgroup_provenance == pgroup_provenance_t::first_external_proc && + "We should have already known our pgroup"); + // We are the first external proc in the job tree. Set the desired_pgid to 0 to indicate we + // should creating a new process group. + desired_pgid = 0; } // Set the handling for job control signals back to the default. @@ -263,13 +225,14 @@ bool fork_actions_make_spawn_properties(posix_spawnattr_t *attr, short flags = 0; if (reset_signal_handlers) flags |= POSIX_SPAWN_SETSIGDEF; if (reset_sigmask) flags |= POSIX_SPAWN_SETSIGMASK; - if (should_set_process_group_id) flags |= POSIX_SPAWN_SETPGROUP; + if (desired_pgid.has_value()) flags |= POSIX_SPAWN_SETPGROUP; int err = 0; if (!err) err = posix_spawnattr_setflags(attr, flags); - if (!err && should_set_process_group_id) - err = posix_spawnattr_setpgroup(attr, desired_process_group_id); + if (!err && desired_pgid.has_value()) { + err = posix_spawnattr_setpgroup(attr, *desired_pgid); + } // Everybody gets default handlers. if (!err && reset_signal_handlers) { diff --git a/src/postfork.h b/src/postfork.h index a93fa31df..78e7d7b69 100644 --- a/src/postfork.h +++ b/src/postfork.h @@ -18,8 +18,17 @@ class dup2_list_t; class job_t; class process_t; -bool set_child_group(job_t *j, pid_t child_pid); // called by parent -bool child_set_group(job_t *j, process_t *p); // called by child +/// Tell the proc \p pid to join process group \p pgrp. +/// If \p is_child is true, we are the child process; otherwise we are fish. +/// Called by both parent and child; this is an unavoidable race inherent to Unix. +/// If is_parent is set, then we are the parent process and should swallow EACCESS. +/// \return 0 on success, an errno error code on failure. +int execute_setpgid(pid_t pid, pid_t pgrp, bool is_parent); + +/// Report the error code \p err for a failed setpgid call. +/// Note not all errors should be reported; in particular EACCESS is expected and benign in the +/// parent only. +void report_setpgid_error(int err, pid_t desired_pgid, const job_t *j, const process_t *p); /// Initialize a new child process. This should be called right away after forking in the child /// process. If job control is enabled for this job, the process is put in the process group of the diff --git a/src/proc.cpp b/src/proc.cpp index 0c7e6a132..9e9fc80e2 100644 --- a/src/proc.cpp +++ b/src/proc.cpp @@ -145,7 +145,7 @@ bool job_t::should_report_process_exits() const { // itself. // TODO: rationalize this. // If we never got a pgid then we never launched the external process, so don't report it. - if (this->pgid == INVALID_PID) { + if (!this->get_pgid()) { return false; } @@ -170,11 +170,11 @@ bool job_t::signal(int signal) { // Presumably we are distinguishing between the two cases below because we do // not want to send ourselves the signal in question in case the job shares // a pgid with the shell. - - if (pgid != getpgrp()) { - if (killpg(pgid, signal) == -1) { + auto pgid = get_pgid(); + if (pgid.has_value() && *pgid != getpgrp()) { + if (killpg(*pgid, signal) == -1) { char buffer[512]; - sprintf(buffer, "killpg(%d, %s)", pgid, strsignal(signal)); + sprintf(buffer, "killpg(%d, %s)", *pgid, strsignal(signal)); wperror(str2wcstring(buffer).c_str()); return false; } @@ -692,8 +692,8 @@ static bool process_clean_after_marking(parser_t &parser, bool allow_interactive // handlers. if (!j->from_event_handler() && j->is_completed()) { if (j->should_report_process_exits()) { - exit_events.push_back( - proc_create_event(L"JOB_EXIT", event_type_t::exit, -j->pgid, 0)); + pid_t pgid = *j->get_pgid(); + exit_events.push_back(proc_create_event(L"JOB_EXIT", event_type_t::exit, -pgid, 0)); } exit_events.push_back( proc_create_event(L"JOB_EXIT", event_type_t::caller_exit, j->job_id(), 0)); @@ -796,7 +796,11 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { return notneeded; } - if (j->pgid == INVALID_PID || j->pgid == 0) { + // Get the pgid; we may not have one. + pid_t pgid{}; + if (auto mpgid = j->get_pgid()) { + pgid = *mpgid; + } else { FLOG(proc_termowner, L"terminal_give_to_job() returning early due to no process group"); return notneeded; } @@ -831,12 +835,12 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { return error; } assert(getpgrp_res >= 0); - if (getpgrp_res == j->pgid) { - FLOGF(proc_termowner, L"Process group %d already has control of terminal", j->pgid); + if (getpgrp_res == pgid) { + FLOGF(proc_termowner, L"Process group %d already has control of terminal", pgid); } else { FLOGF(proc_termowner, L"Attempting to bring process group to foreground via tcsetpgrp for job->pgid %d", - j->pgid); + pgid); // The tcsetpgrp(2) man page says that EPERM is thrown if "pgrp has a supported value, but // is not the process group ID of a process in the same session as the calling process." @@ -847,7 +851,7 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { // 4.4.0), EPERM does indeed disappear on retry. The important thing is that we can // guarantee the process isn't going to exit while we wait (which would cause us to possibly // block indefinitely). - while (tcsetpgrp(STDIN_FILENO, j->pgid) != 0) { + while (tcsetpgrp(STDIN_FILENO, pgid) != 0) { FLOGF(proc_termowner, L"tcsetpgrp failed: %d", errno); bool pgroup_terminated = false; @@ -858,7 +862,7 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { pgroup_terminated = true; } else if (errno == EPERM) { // Retry so long as this isn't because the process group is dead. - int wait_result = waitpid(-1 * j->pgid, &wait_result, WNOHANG); + int wait_result = waitpid(-1 * pgid, &wait_result, WNOHANG); if (wait_result == -1) { // Note that -1 is technically an "error" for waitpid in the sense that an // invalid argument was specified because no such process group exists any @@ -869,7 +873,7 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { } else { // Debug the original tcsetpgrp error (not the waitpid errno) to the log, and // then retry until not EPERM or the process group has exited. - FLOGF(proc_termowner, L"terminal_give_to_job(): EPERM.\n", j->pgid); + FLOGF(proc_termowner, L"terminal_give_to_job(): EPERM.\n", pgid); continue; } } else { @@ -879,7 +883,7 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { return notneeded; } else { FLOGF(warning, _(L"Could not send job %d ('%ls') with pgid %d to foreground"), - j->job_id(), j->command_wcstr(), j->pgid); + j->job_id(), j->command_wcstr(), pgid); wperror(L"tcsetpgrp"); } return error; @@ -892,7 +896,7 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { // process in the group terminated and didn't need to access the terminal, otherwise // it would have hung waiting for terminal IO (SIGTTIN). We can safely ignore this. FLOGF(proc_termowner, L"tcsetpgrp called but process group %d has terminated.\n", - j->pgid); + pgid); return notneeded; } @@ -914,12 +918,13 @@ int terminal_maybe_give_to_job(const job_t *j, bool continuing_from_stopped) { /// so that we can restore the terminal ownership to the job at a later time. static bool terminal_return_from_job(job_t *j, int restore_attrs) { errno = 0; - if (j->pgid == 0) { + auto pgid = j->get_pgid(); + if (!pgid.has_value()) { FLOG(proc_pgroup, "terminal_return_from_job() returning early due to no process group"); return true; } - FLOG(proc_pgroup, "fish reclaiming terminal after job pgid", j->pgid); + FLOG(proc_pgroup, "fish reclaiming terminal after job pgid", *pgid); if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1) { if (errno == ENOTTY) redirect_tty_output(); FLOGF(warning, _(L"Could not return shell to foreground")); @@ -957,6 +962,9 @@ void job_t::continue_job(parser_t &parser, bool reclaim_foreground_pgrp, bool se parser.job_promote(this); mut_flags().notified = false; + int pgid = -2; + if (auto tmp = get_pgid()) pgid = *tmp; + FLOGF(proc_job_run, L"%ls job %d, gid %d (%ls), %ls, %ls", send_sigcont ? L"Continue" : L"Start", job_id(), pgid, command_wcstr(), is_completed() ? L"COMPLETED" : L"UNCOMPLETED", @@ -1063,7 +1071,8 @@ void proc_wait_any(parser_t &parser) { void hup_jobs(const job_list_t &jobs) { pid_t fish_pgrp = getpgrp(); for (const auto &j : jobs) { - if (j->pgid != INVALID_PID && j->pgid != fish_pgrp && !j->is_completed()) { + auto pgid = j->get_pgid(); + if (pgid && *pgid != fish_pgrp && !j->is_completed()) { if (j->is_stopped()) { j->signal(SIGCONT); } diff --git a/src/proc.h b/src/proc.h index ebd428e2f..c8808eb4c 100644 --- a/src/proc.h +++ b/src/proc.h @@ -422,7 +422,7 @@ class job_t { } else if (p->pid <= 0) { // Can't reap without a pid. return false; - } else if (!is_constructed() && pgid > 0 && p->pid == pgid) { + } else if (!is_constructed() && this->get_pgid() == maybe_t{p->pid}) { // p is the the group leader in an under-construction job. return false; } else { @@ -459,7 +459,12 @@ class job_t { /// Process group ID for the process group that this job is running in. /// Set to a nonexistent, non-return-value of getpgid() integer by the constructor - pid_t pgid{INVALID_PID}; + // pid_t pgid{INVALID_PID}; + + /// \return the pgid for the job, based on the job tree. + /// This may be none if the job consists of just internal fish functions or builtins. + /// This may also be fish itself. + maybe_t get_pgid() const { return job_tree->get_pgid(); } /// How the above pgroup is assigned. This should be set at construction and not modified after. pgroup_provenance_t pgroup_provenance{};