diff --git a/src/common.cpp b/src/common.cpp index a1b3125cf..06cf735d9 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1789,12 +1789,15 @@ void save_term_foreground_process_group() { } void restore_term_foreground_process_group_for_exit() { - if (initial_fg_process_group != -1) { - // This is called during shutdown and from a signal handler. We don't bother to complain on - // failure because doing so is unlikely to be noticed. - // However we want this to fail if we are not the tty owner (#7060), so clear our SIGTTOU - // handler to allow it to fail properly. Note that we are about to exit. - (void)signal(SIGTTOU, SIG_DFL); + // We wish to restore the tty to the initial owner. There's two ways this can go wrong: + // 1. We may steal the tty from someone else (#7060). + // 2. The call to tcsetpgrp may deliver SIGSTOP to us, and we will not exit. + // Hanging on exit seems worse, so ensure that SIGTTOU is ignored so we do not get SIGSTOP. + // Note initial_fg_process_group == 0 is possible with Linux pid namespaces. + // This is called during shutdown and from a signal handler. We don't bother to complain on + // failure because doing so is unlikely to be noticed. + if (initial_fg_process_group > 0 && initial_fg_process_group != getpgrp()) { + (void)signal(SIGTTOU, SIG_IGN); (void)tcsetpgrp(STDIN_FILENO, initial_fg_process_group); } } diff --git a/src/fish_test_helper.cpp b/src/fish_test_helper.cpp index 1270772db..94575754d 100644 --- a/src/fish_test_helper.cpp +++ b/src/fish_test_helper.cpp @@ -20,6 +20,26 @@ static void become_foreground_then_print_stderr() { fprintf(stderr, "become_foreground_then_print_stderr done\n"); } +static void nohup_wait() { + pid_t init_parent = getppid(); + if (signal(SIGHUP, SIG_IGN)) { + perror("tcsetgrp"); + exit(EXIT_FAILURE); + } + // Note: these silly close() calls are necessary to prevent our parent process (presumably fish) + // from getting stuck in the "E" state ("Trying to exit"). This appears to be a (kernel?) bug on + // macOS: the process is no longer running but is not a zombie either, and so cannot be reaped. + // It is unclear why closing these fds successfully works around this issue. + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); + // To avoid leaving fish_test_helpers around, we exit once our parent changes, meaning the fish + // instance exited. + while (getppid() == init_parent) { + usleep(1000000 / 4); + } +} + static void report_foreground_loop() { int was_fg = -1; const auto grp = getpgrp(); @@ -146,6 +166,7 @@ struct fth_command_t { static fth_command_t s_commands[] = { {"become_foreground_then_print_stderr", become_foreground_then_print_stderr, "Claim the terminal (tcsetpgrp) and then print to stderr"}, + {"nohup_wait", nohup_wait, "Ignore SIGHUP and just wait"}, {"report_foreground", report_foreground, "Report to stderr whether we own the terminal"}, {"report_foreground_loop", report_foreground_loop, "Continually report to stderr whether we own the terminal"}, diff --git a/tests/pexpects/exit_nohang.py b/tests/pexpects/exit_nohang.py new file mode 100644 index 000000000..9c4a81df0 --- /dev/null +++ b/tests/pexpects/exit_nohang.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +from pexpect_helper import SpawnedProc +import subprocess +import sys +import signal +import time +import os + +sp = SpawnedProc() +send, sendline, sleep, expect_prompt, expect_re = ( + sp.send, + sp.sendline, + sp.sleep, + sp.expect_prompt, + sp.expect_re, +) + +# Helper to print an error and exit. +def error_and_exit(text): + keys = sp.colors() + print("{RED}Test failed: {NORMAL}{TEXT}".format(TEXT=text, **keys)) + sys.exit(1) + + +fish_pid = sp.spawn.pid + +# Launch fish_test_helper. +expect_prompt() +exe_path = os.environ.get("fish_test_helper") +sp.sendline(exe_path + " nohup_wait") + +# We expect it to transfer tty ownership to fish_test_helper. +sleep(0.1) +tty_owner = os.tcgetpgrp(sp.spawn.child_fd) +if fish_pid == tty_owner: + os.kill(fish_pid, signal.SIGKILL) + error_and_exit( + "Test failed: outer fish %d did not transfer tty owner to fish_test_helper" + % (fish_pid) + ) + + +# Now we are going to tell fish to exit. +# It must not hang. But it might hang when trying to restore the tty. +os.kill(fish_pid, signal.SIGTERM) + +# Loop a bit until the process exits (correct) or stops (incorrrect). +# When it exits it should be due to the SIGTERM that we sent it. +for i in range(10): + pid, status = os.waitpid(fish_pid, os.WUNTRACED | os.WNOHANG) + if pid == 0: + # No process ready yet, loop again. + sleep(0.1) + elif pid != fish_pid: + # Some other process exited (??) + os.kill(fish_pid, signal.SIGKILL) + error_and_exit( + "unexpected pid returned from waitpid. Got %d, expected %d" + % (pid, fish_pid) + ) + elif os.WIFSTOPPED(status): + # Our pid stopped instead of exiting. + # Probably it stopped because of its tcsetpgrp call during exit. + # Kill it and report a failure. + os.kill(fish_pid, signal.SIGKILL) + error_and_exit("pid %d stopped instead of exiting on SIGTERM" % pid) + elif not os.WIFSIGNALED(status): + error_and_exit("pid %d did not signal-exit" % pid) + elif os.WTERMSIG(status) != signal.SIGTERM: + error_and_exit( + "pid %d signal-exited with %d instead of %d (SIGTERM)" + % (os.WTERMSIG(status), signal.SIGTERM) + ) + else: + # Success! + sys.exit(0) +else: + # Our loop completed without the process being returned. + error_and_exit("fish with pid %d hung after SIGTERM" % fish_pid) + +# Should never get here. +error_and_exit("unknown, should be unreachable")