From dcd77bd35ed3abb11957b68c99133a573b6420bc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 7 Dec 2024 01:10:48 +0100 Subject: [PATCH 1/2] introduce jobkill() Extract it from jobwork() so that build() can call it on a signal. Signed-off-by: Paolo Bonzini --- build.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/build.c b/build.c index 7988423..b75431c 100644 --- a/build.c +++ b/build.c @@ -484,6 +484,14 @@ jobdone(struct job *j) edgedone(e); } +static void +jobkill(struct job *j) +{ + kill(j->pid, SIGTERM); + j->failed = true; + jobdone(j); +} + /* returns whether a job still has work to do. if not, sets j->failed */ static bool jobwork(struct job *j) @@ -507,16 +515,16 @@ jobwork(struct job *j) j->buf.len += n; return true; } - if (n == 0) - goto done; - warn("read:"); + if (n < 0) { + warn("read:"); + goto kill; + } -kill: - kill(j->pid, SIGTERM); - j->failed = true; -done: jobdone(j); + return false; +kill: + jobkill(j); return false; } From abf0636ceecae88d7cb3cb1e3239517a5010b1fb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 May 2024 12:04:04 +0200 Subject: [PATCH 2/2] handle SIGTERM and SIGINT Keep the system clean by propagating SIGTERM to all children, and by not starting new jobs on both SIGTERM and SIGINT. The only tricky bit is that previously fd[i].revents was used to skip both jobs that are not in use and jobs that did not have output; that's because negative file descriptors do not cause POLLNVAL and therefore fd[i].revents is zero for inactive jobs as well. But because all jobs must be killed, build() now has to check fd[i].fd == -1 explicitly. While at it, also clean up jobdone() by clearing job[i].edge; it's not nice to leave a dangling pointer in the jobs array, even if it's harmless. Signed-off-by: Paolo Bonzini --- build.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/build.c b/build.c index b75431c..6ecbf3e 100644 --- a/build.c +++ b/build.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,10 @@ static size_t nstarted, nfinished, ntotal; static bool consoleused; static struct timespec starttime; +/* for signal handling */ +static volatile sig_atomic_t killall; +static volatile sig_atomic_t stopsig; + void buildreset(void) { @@ -115,6 +120,11 @@ queue(struct edge *e) *front = e; } +static bool have_work(void) +{ + return work && !stopsig; +} + void buildadd(struct node *n) { @@ -453,7 +463,8 @@ jobdone(struct job *j) j->failed = true; } } else if (WIFSIGNALED(status)) { - warn("job terminated due to signal %d: %s", WTERMSIG(status), j->cmd->s); + if (stopsig != WTERMSIG(status)) + warn("job terminated due to signal %d: %s", WTERMSIG(status), j->cmd->s); j->failed = true; } else { /* cannot happen according to POSIX */ @@ -465,6 +476,8 @@ jobdone(struct job *j) fwrite(j->buf.data, 1, j->buf.len, stdout); j->buf.len = 0; e = j->edge; + j->edge = NULL; + if (e->pool) { p = e->pool; @@ -546,6 +559,20 @@ queryload(void) #endif } +static void sighandler(int sig) +{ + /* + * Both SIGINT and SIGTERM will not start any more processes, + * but only SIGTERM needs to be forwarded to samurai's child + * processes. That's because samurai is a process group leader + * and SIGINT has already been sent to all children, and they + * will stop on their own. + */ + if (sig == SIGTERM) + killall = true; + stopsig = sig; +} + void build(void) { @@ -553,12 +580,19 @@ build(void) struct pollfd *fds = NULL; size_t i, next = 0, jobslen = 0, maxjobs = buildopts.maxjobs, numjobs = 0, numfail = 0; struct edge *e; + struct sigaction sa; if (ntotal == 0) { warn("nothing to do"); return; } + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = sighandler; + sa.sa_flags = SA_RESTART; + sigaction(SIGTERM, &sa, NULL); + sigaction(SIGINT, &sa, NULL); + clock_gettime(CLOCK_MONOTONIC, &starttime); formatstatus(NULL, 0); @@ -568,7 +602,7 @@ build(void) if (buildopts.maxload) maxjobs = queryload() > buildopts.maxload ? 1 : buildopts.maxjobs; /* start ready edges */ - while (work && numjobs < maxjobs && numfail < buildopts.maxfail) { + while (have_work() && numjobs < maxjobs && numfail < buildopts.maxfail) { e = work; work = work->worknext; if (e->rule != &phonyrule && buildopts.dryrun) { @@ -607,11 +641,16 @@ build(void) } if (numjobs == 0) break; - if (poll(fds, jobslen, 5000) < 0) + if (poll(fds, jobslen, 5000) < 0 && errno != EINTR) fatal("poll:"); for (i = 0; i < jobslen; ++i) { - if (!fds[i].revents || jobwork(&jobs[i])) + if (fds[i].fd == -1) continue; + else if (killall) + jobkill(&jobs[i]); + else if (!fds[i].revents || jobwork(&jobs[i])) + continue; + --numjobs; jobs[i].next = next; fds[i].fd = -1; @@ -627,6 +666,10 @@ build(void) if (numfail > 0) { if (numfail < buildopts.maxfail) fatal("cannot make progress due to previous errors"); + else if (stopsig == SIGINT) + fatal("interrupted by user"); + else if (stopsig) + fatal("interrupted by signal %d", stopsig); else if (numfail > 1) fatal("subcommands failed"); else