public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Stefan Reiter <s.reiter@proxmox.com>
To: pve-devel@lists.proxmox.com
Cc: d.csapak@proxmox.com, w.bumiller@proxmox.com
Subject: [pve-devel] [PATCH v2 qemu-server 2/7] qmeventd: add last-ditch effort SIGKILL cleanup
Date: Mon, 19 Oct 2020 14:18:37 +0200	[thread overview]
Message-ID: <20201019121842.20277-3-s.reiter@proxmox.com> (raw)
In-Reply-To: <20201019121842.20277-1-s.reiter@proxmox.com>

'alarm' is used to schedule an additionaly cleanup round 5 seconds after
sending SIGTERM via terminate_client. This then sends SIGKILL via a
pidfd (if supported by the kernel) or directly via kill, making sure
that the QEMU process is *really* dead and won't be left behind in an
undetermined state.

This shouldn't be an issue under normal circumstances, but can help
avoid dead processes lying around if QEMU hangs after SIGTERM.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---

v2:
* use a pidfd to avoid pid-reuse races

 qmeventd/qmeventd.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-
 qmeventd/qmeventd.h | 26 ++++++++++++++
 2 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index 6b02a06..57f1867 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -63,6 +63,8 @@ static int verbose = 0;
 static int epoll_fd = 0;
 static const char *progname;
 GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
+GSList *forced_cleanups;
+volatile sig_atomic_t alarm_triggered = 0;
 
 /*
  * Helper functions
@@ -468,8 +470,39 @@ terminate_client(struct Client *client)
 
     client->state = STATE_TERMINATING;
 
+    // open a pidfd before kill for later cleanup
+    int pidfd = pidfd_open(client->pid, 0);
+    if (pidfd < 0) {
+	switch (errno) {
+	    case ESRCH:
+		// process already dead for some reason, cleanup done
+		VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
+			      client->qemu.vmid, client->pid);
+		return;
+
+	    // otherwise fall back to just using the PID directly, but don't
+	    // print if we only failed because we're running on an older kernel
+	    case ENOSYS:
+		break;
+	    default:
+		perror("failed to open QEMU pidfd for cleanup");
+		break;
+	}
+    }
+
     int err = kill(client->pid, SIGTERM);
     log_neg(err, "kill");
+
+    struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
+    struct CleanupData data = {
+	.pid = client->pid,
+	.pidfd = pidfd
+    };
+    *data_ptr = data;
+    forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
+
+    // resets any other alarms, but will fire eventually and cleanup all
+    alarm(5);
 }
 
 void
@@ -545,6 +578,55 @@ handle_client(struct Client *client)
 }
 
 
+/*
+ * SIGALRM and cleanup handling
+ *
+ * terminate_client will set an alarm for 5 seconds and add its client's PID to
+ * the forced_cleanups list - when the timer expires, we iterate the list and
+ * attempt to issue SIGKILL to all processes which haven't yet stopped.
+ */
+
+static void
+alarm_handler(__attribute__((unused)) int signum)
+{
+    alarm_triggered = 1;
+}
+
+static void
+sigkill(void *ptr, __attribute__((unused)) void *unused)
+{
+    struct CleanupData data = *((struct CleanupData *)ptr);
+    int err;
+
+    if (data.pidfd > 0) {
+	err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
+    } else {
+	err = kill(data.pid, SIGKILL);
+    }
+
+    if (err < 0) {
+	if (errno != ESRCH) {
+	    fprintf(stderr, "SIGKILL cleanup of pid '%d' failed - %s\n",
+		    data.pid, strerror(errno));
+	}
+    } else {
+	fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
+		data.pid);
+    }
+}
+
+static void
+handle_forced_cleanup()
+{
+    if (alarm_triggered) {
+	alarm_triggered = 0;
+	g_slist_foreach(forced_cleanups, sigkill, NULL);
+	g_slist_free_full(forced_cleanups, free);
+	forced_cleanups = NULL;
+    }
+}
+
+
 int
 main(int argc, char *argv[])
 {
@@ -577,6 +659,7 @@ main(int argc, char *argv[])
     }
 
     signal(SIGCHLD, SIG_IGN);
+    signal(SIGALRM, alarm_handler);
 
     socket_path = argv[optind];
 
@@ -612,7 +695,7 @@ main(int argc, char *argv[])
     for(;;) {
 	nevents = epoll_wait(epoll_fd, events, 1, -1);
 	if (nevents < 0 && errno == EINTR) {
-	    // signal happened, try again
+	    handle_forced_cleanup();
 	    continue;
 	}
 	bail_neg(nevents, "epoll_wait");
@@ -630,5 +713,7 @@ main(int argc, char *argv[])
 		handle_client((struct Client *)events[n].data.ptr);
 	    }
 	}
+
+	handle_forced_cleanup();
     }
 }
diff --git a/qmeventd/qmeventd.h b/qmeventd/qmeventd.h
index 30aea98..1921ef3 100644
--- a/qmeventd/qmeventd.h
+++ b/qmeventd/qmeventd.h
@@ -21,6 +21,15 @@
     Author: Dominik Csapak <d.csapak@proxmox.com>
 */
 
+#include <sys/syscall.h>
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open 434
+#endif
+#ifndef __NR_pidfd_send_signal
+#define __NR_pidfd_send_signal 424
+#endif
+
 #define VERBOSE_PRINT(...) do { if (verbose) { printf(__VA_ARGS__); } } while (0)
 
 static inline void log_neg(int errval, const char *msg)
@@ -38,6 +47,18 @@ static inline void bail_neg(int errval, const char *msg)
     }
 }
 
+static inline int
+pidfd_open(pid_t pid, unsigned int flags)
+{
+    return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static inline int
+pidfd_send_signal(int pidfd, int sig, siginfo_t *info, unsigned int flags)
+{
+    return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
 typedef enum {
     CLIENT_NONE,
     CLIENT_QEMU,
@@ -77,6 +98,11 @@ struct Client {
     } vzdump;
 };
 
+struct CleanupData {
+    pid_t pid;
+    int pidfd;
+};
+
 void handle_qmp_handshake(struct Client *client);
 void handle_qmp_event(struct Client *client, struct json_object *obj);
 void handle_qmp_return(struct Client *client, struct json_object *data, bool error);
-- 
2.20.1





  parent reply	other threads:[~2020-10-19 12:20 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-19 12:18 [pve-devel] [PATCH v2 0/7] Handle guest shutdown during backups Stefan Reiter
2020-10-19 12:18 ` [pve-devel] [PATCH v2 qemu-server 1/7] qmeventd: add handling for -no-shutdown QEMU instances Stefan Reiter
2020-10-19 12:18 ` Stefan Reiter [this message]
2020-10-19 12:18 ` [pve-devel] [PATCH v2 qemu-server 3/7] vzdump: connect to qmeventd for duration of backup Stefan Reiter
2020-10-19 12:18 ` [pve-devel] [PATCH v2 qemu-server 4/7] vzdump: use dirty bitmap for not running VMs too Stefan Reiter
2020-10-19 12:18 ` [pve-devel] [PATCH v2 qemu-server 5/7] config_to_command: use -no-shutdown option Stefan Reiter
2020-10-19 12:18 ` [pve-devel] [PATCH v2 qemu-server 6/7] fix vm_resume and allow vm_start with QMP status 'shutdown' Stefan Reiter
2020-10-19 12:18 ` [pve-devel] [PATCH v2 manager 7/7] ui: qemu: set correct disabled state for start button Stefan Reiter
2020-11-05 12:35 ` [pve-devel] applied-series: [PATCH v2 0/7] Handle guest shutdown during backups Thomas Lamprecht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201019121842.20277-3-s.reiter@proxmox.com \
    --to=s.reiter@proxmox.com \
    --cc=d.csapak@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    --cc=w.bumiller@proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal