diff --git a/src/proc.cpp b/src/proc.cpp
index 0eb6c85ee..d38782ae6 100644
--- a/src/proc.cpp
+++ b/src/proc.cpp
@@ -334,14 +334,14 @@ io_chain_t job_t::all_io_redirections() const {
 
 typedef unsigned int process_generation_count_t;
 
-/// A static value tracking how many SIGCHLDs we have seen. This is only ever modified from within
-/// the SIGCHLD signal handler, and therefore does not need atomics or locks.
+/// A static value tracking how many SIGCHLDs we have seen, which is used in a heurstic to
+/// determine if we should call waitpid() at all in `process_mark_finished_children`.
 static volatile process_generation_count_t s_sigchld_generation_cnt = 0;
 
 /// See if any children of a fully constructed job have exited or been killed, and mark them
 /// accordingly. We cannot reap just any child that's exited, (as in, `waitpid(-1,…`) since
 /// that may reap a pgrp leader that has exited but in a job with another process that has yet to
-/// launch and join its pgrp (#5219). Returns as soon as a single child is reaped & processed.
+/// launch and join its pgrp (#5219).
 /// \param block_on_fg when true, blocks waiting for the foreground job to finish.
 /// \return whether the operation completed without incident
 static bool process_mark_finished_children(bool block_on_fg) {
@@ -350,27 +350,59 @@ static bool process_mark_finished_children(bool block_on_fg) {
     // This is always called from the main thread (and not forked), so we can cache this value.
     static pid_t shell_pgid = getpgrp();
 
+    // We can't always use SIGCHLD to determine if waitpid() should be called since it is not
+    // strictly one-SIGCHLD-per-one-child-exited (i.e. multiple exits can share a SIGCHLD call) and
+    // we a) return immediately the first time a dead child is reaped, b) explicitly skip over jobs
+    // that aren't yet fully constructed, so it's possible that we can get SIGCHLD and even find a
+    // killed child in the jobs we are reaping, but also have an exited child process in a job that
+    // hasn't been fully constructed yet - which means we can end up never knowing about the exited
+    // child process in that job if we use SIGCHLD count as the only metric for whether or not
+    // waitpid() is called.
+    // Without this optimization, the slowdown caused by calling waitpid() even just once each time
+    // `process_mark_finished_children()` is called is rather obvious (see the performance-related
+    // discussion in #5219), making it worth the complexity of this heuristic.
+
+    /// Tracks whether or not we received SIGCHLD without checking all jobs (due to jobs under
+    /// construction), forcing a full waitpid loop.
+    static bool dirty_state = true;
+    static process_generation_count_t last_sigchld_count = -1;
+
+    // If the last time that we received a SIGCHLD we did not waitpid all jobs, we cannot early out.
+    if (!dirty_state && last_sigchld_count == s_sigchld_generation_cnt) {
+        // If we have foreground jobs, we need to block on them below
+        if (!block_on_fg)
+        {
+            // We can assume that no children have exited and that all waitpid calls with
+            // WNOHANG below will confirm that.
+            return true;
+        }
+    }
+
+    last_sigchld_count = s_sigchld_generation_cnt;
+    bool jobs_skipped = false;
     bool has_error = false;
     job_t *job_fg = nullptr;
+
     // Reap only processes belonging to fully-constructed jobs to prevent reaping of processes
     // before others in the same process group have a chance to join their pgrp.
     job_iterator_t jobs;
     while (auto j = jobs.next()) {
-        // A job can have pgrp INVALID_PID if it consists solely of builtins that perform no IO
+        // (A job can have pgrp INVALID_PID if it consists solely of builtins that perform no IO)
         if (j->pgid == INVALID_PID || !j->is_constructed()) {
-            // Job has not been fully constructed yet
             debug(5, "Skipping wait on incomplete job %d (%ls)", j->job_id, j->preview().c_str());
+            jobs_skipped = true;
             continue;
         }
 
+        if (j != job_fg && j->is_foreground() && !j->is_stopped() && !j->is_completed()) {
+            assert(job_fg == nullptr && "More than one active, fully-constructed foreground job!");
+            job_fg = j;
+        }
+
         // Whether we will wait for uncompleted processes depends on the combination of `block_on_fg` and the
         // nature of the process. Default is WNOHANG, but if foreground, constructed, not stopped, *and*
         // block_on_fg is true, then no WNOHANG (i.e. "HANG").
         int options = WUNTRACED | WNOHANG;
-        if (j->is_foreground() && !j->is_stopped() && !j->is_completed()) {
-            assert(job_fg == nullptr && "More than one active, fully-constructed foreground job!");
-            job_fg = j;
-        }
 
         // We should never block twice in the same go, as `waitpid()' returning could mean one
         // process completed or many, and there is a race condition when calling `waitpid()` after
@@ -383,35 +415,60 @@ static bool process_mark_finished_children(bool block_on_fg) {
         // wait on only one pgrp at a time and we need to check all pgrps before returning, but we
         // never wait/block on fg processes after an error has been encountered to give ourselves
         // (elsewhere) a chance to handle the fallout from process termination, etc.
-        if (!has_error && block_on_fg && j->pgid != shell_pgid
-                && j == job_fg && j->get_flag(job_flag_t::JOB_CONTROL)) {
+        if (!has_error && block_on_fg && j->pgid != shell_pgid && j == job_fg
+                && j->get_flag(job_flag_t::JOB_CONTROL)) {
             debug(4, "Waiting on processes from foreground job %d", job_fg->pgid);
             options &= ~WNOHANG;
         }
 
-        int status = -1;
-        // A negative PID passed in to `waitpid()` means wait on any child in that process group
-        auto pid = waitpid(-1 * j->pgid, &status, options);
-        if (pid > 0) {
-            // A child process has been reaped
-            handle_child_status(pid, status);
-            return true;
-        }
-        else if (pid == 0) {
-            // No killed/dead children in this particular process group
-        } else {
-            // pid < 0 indicates an error. One likely failure is ECHILD (no children), which is not
-            // an error and is ignored. The other likely failure is EINTR, which means we got a
-            // signal, which is considered an error. We absolutely do not break or return on error,
-            // as we need to iterate over all constructed jobs but we only call waitpid for one pgrp
-            // at a time. We do bypass future waits in case of error, however.
-            if (errno != ECHILD) {
-                has_error = true;
-                wperror(L"waitpid in process_mark_finished_children");
+        // waitpid(2) returns 1 process each time, we need to keep calling it until we've reaped all
+        // children of the pgrp in question or else we can't reset the dirty_state flag. In all
+        // cases, calling waitpid(2) is faster than potentially calling select_try() on a process
+        // that has exited, which will force us to wait the full timeout before coming back here and
+        // calling waitpid() again.
+        while (true) {
+            int status = -1;
+            // A negative PID passed in to `waitpid()` means wait on any child in that process group
+            auto pid = waitpid(-1 * j->pgid, &status, options);
+            // Never make two calls to waitpid(2) without WNOHANG (i.e. with "HANG") in a row,
+            // because we might wait on a non-stopped job that becomes stopped, but we don't refresh
+            // our view of the process state before calling waitpid(2) again here.
+            options |= WNOHANG;
+
+            if (pid > 0) {
+                // A child process has been reaped
+                handle_child_status(pid, status);
+            }
+            else if (pid == 0) {
+                // No killed/dead children in this particular process group
+                break;
+            } else {
+                // pid < 0 indicates an error. One likely failure is ECHILD (no children), which is not
+                // an error and is ignored. The other likely failure is EINTR, which means we got a
+                // signal, which is considered an error. We absolutely do not break or return on error,
+                // as we need to iterate over all constructed jobs but we only call waitpid for one pgrp
+                // at a time. We do bypass future waits in case of error, however.
+                if (errno != ECHILD) {
+                    has_error = true;
+                    wperror(L"waitpid in process_mark_finished_children");
+                }
+                break;
             }
         }
     }
 
+    // Yes, the below can be collapsed to a single line, but it's worth being explicit about it with
+    // the comments. Fret not, the compiler will optimize it. (It better!)
+    if (jobs_skipped) {
+        // We received SIGCHLD but were not able to definitely say whether or not all children were
+        // reaped.
+        dirty_state = true;
+    }
+    else {
+        // We can safely assume that no SIGCHLD means we can just return next time around
+        dirty_state = false;
+    }
+
     return !has_error;
 }