From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 1488B943A2
 for <pve-devel@lists.proxmox.com>; Wed, 21 Sep 2022 14:49:45 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 0C53F276F4
 for <pve-devel@lists.proxmox.com>; Wed, 21 Sep 2022 14:49:15 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pve-devel@lists.proxmox.com>; Wed, 21 Sep 2022 14:49:12 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id BA11744206
 for <pve-devel@lists.proxmox.com>; Wed, 21 Sep 2022 14:49:12 +0200 (CEST)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pve-devel@lists.proxmox.com
Date: Wed, 21 Sep 2022 14:49:09 +0200
Message-Id: <20220921124911.3224970-2-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.30.2
In-Reply-To: <20220921124911.3224970-1-d.csapak@proxmox.com>
References: <20220921124911.3224970-1-d.csapak@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.077 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: [pve-devel] [PATCH qemu-server 1/3] qmeventd: rework
 'forced_cleanup' handling and set timeout to 60s
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Wed, 21 Sep 2022 12:49:45 -0000

currently, the 'forced_cleanup' (sending SIGKILL to the qemu process),
is intended to be triggered 5 seconds after sending the initial shutdown
signal (SIGTERM) which is sadly not enough for some setups.

Accidentally, it could be triggered earlier than 5 seconds, if a
SIGALRM triggers in the timespan directly before setting it again.

Also, this approach means that depending on when machines are shutdown
their forced cleanup may happen after 5 seconds, or any time after, if
new vms are shut off in the meantime.

To improve this situation, rework the way we deal with this cleanup, by
saving the time incl. timeout in the CleanupData, and trigger a
forced_cleanup every 10 seconds. If that happends, remove it from
the forced_cleanup list.

To improve the shutdown behaviour, increase the default timeout to 60
seconds, which should be enough, but add a commandline toggle where
users can set it to a different value.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 qmeventd/qmeventd.c | 75 +++++++++++++++++++++++++--------------------
 qmeventd/qmeventd.h |  2 ++
 2 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index 8d32827..e9ff5b3 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -29,6 +29,7 @@
 #include <signal.h>
 #include <stdbool.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/epoll.h>
 #include <sys/socket.h>
@@ -36,15 +37,18 @@
 #include <sys/un.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <time.h>
 
 #include "qmeventd.h"
 
+#define DEFAULT_KILL_TIMEOUT 60
+
 static int verbose = 0;
+static int kill_timeout = DEFAULT_KILL_TIMEOUT;
 static int epoll_fd = 0;
 static const char *progname;
 GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
 GSList *forced_cleanups;
-volatile sig_atomic_t alarm_triggered = 0;
 
 /*
  * Helper functions
@@ -54,9 +58,10 @@ static void
 usage()
 {
     fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
-    fprintf(stderr, "  -f       run in foreground (default: false)\n");
-    fprintf(stderr, "  -v       verbose (default: false)\n");
-    fprintf(stderr, "  PATH     use PATH for socket\n");
+    fprintf(stderr, "  -f           run in foreground (default: false)\n");
+    fprintf(stderr, "  -v           verbose (default: false)\n");
+    fprintf(stderr, "  -t timeout   kill timeout (default: %ds)\n", DEFAULT_KILL_TIMEOUT);
+    fprintf(stderr, "  PATH         use PATH for socket\n");
 }
 
 static pid_t
@@ -469,16 +474,16 @@ terminate_client(struct Client *client)
     int err = kill(client->pid, SIGTERM);
     log_neg(err, "kill");
 
+    time_t timeout = time(NULL) + kill_timeout;
+
     struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
     struct CleanupData data = {
 	.pid = client->pid,
-	.pidfd = pidfd
+	.pidfd = pidfd,
+	.timeout = timeout,
     };
     *data_ptr = data;
     forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
-
-    // resets any other alarms, but will fire eventually and cleanup all
-    alarm(5);
 }
 
 void
@@ -551,27 +556,16 @@ handle_client(struct Client *client)
     json_tokener_free(tok);
 }
 
-
-/*
- * SIGALRM and cleanup handling
- *
- * terminate_client will set an alarm for 5 seconds and add its client's PID to
- * the forced_cleanups list - when the timer expires, we iterate the list and
- * attempt to issue SIGKILL to all processes which haven't yet stopped.
- */
-
-static void
-alarm_handler(__attribute__((unused)) int signum)
-{
-    alarm_triggered = 1;
-}
-
 static void
 sigkill(void *ptr, __attribute__((unused)) void *unused)
 {
     struct CleanupData data = *((struct CleanupData *)ptr);
     int err;
 
+    if (data.timeout > time(NULL)) {
+	return;
+    }
+
     if (data.pidfd > 0) {
 	err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
 	(void)close(data.pidfd);
@@ -588,21 +582,29 @@ sigkill(void *ptr, __attribute__((unused)) void *unused)
 	fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
 		data.pid);
     }
+
+    // remove ourselves from the list
+    forced_cleanups = g_slist_remove(forced_cleanups, ptr);
+    free(ptr);
 }
 
+/*
+ * SIGALRM and cleanup handling
+ *
+ * handles the cleanup on non terminated qemu processes, will be called every
+ * 10 seconds by setting 'alarm(10)' at the end again
+ */
+
 static void
-handle_forced_cleanup()
+alarm_handler(__attribute__((unused)) int signum)
 {
-    if (alarm_triggered) {
+    if (g_slist_length(forced_cleanups) > 0) {
 	VERBOSE_PRINT("clearing forced cleanup backlog\n");
-	alarm_triggered = 0;
 	g_slist_foreach(forced_cleanups, sigkill, NULL);
-	g_slist_free_full(forced_cleanups, free);
-	forced_cleanups = NULL;
     }
+    alarm(10);
 }
 
-
 int
 main(int argc, char *argv[])
 {
@@ -611,7 +613,7 @@ main(int argc, char *argv[])
     char *socket_path = NULL;
     progname = argv[0];
 
-    while ((opt = getopt(argc, argv, "hfv")) != -1) {
+    while ((opt = getopt(argc, argv, "hfvt:")) != -1) {
 	switch (opt) {
 	    case 'f':
 		daemonize = 0;
@@ -619,6 +621,15 @@ main(int argc, char *argv[])
 	    case 'v':
 		verbose = 1;
 		break;
+	    case 't':
+		errno = 0;
+		char *endptr = NULL;
+		kill_timeout = strtoul(optarg, &endptr, 10);
+		if (errno != 0 || *endptr != '\0' || kill_timeout == 0) {
+		    usage();
+		    exit(EXIT_FAILURE);
+		}
+		break;
 	    case 'h':
 		usage();
 		exit(EXIT_SUCCESS);
@@ -668,10 +679,10 @@ main(int argc, char *argv[])
 
     int nevents;
 
+    alarm(10);
     for(;;) {
 	nevents = epoll_wait(epoll_fd, events, 1, -1);
 	if (nevents < 0 && errno == EINTR) {
-	    handle_forced_cleanup();
 	    continue;
 	}
 	bail_neg(nevents, "epoll_wait");
@@ -688,7 +699,5 @@ main(int argc, char *argv[])
 		handle_client((struct Client *)events[n].data.ptr);
 	    }
 	}
-
-	handle_forced_cleanup();
     }
 }
diff --git a/qmeventd/qmeventd.h b/qmeventd/qmeventd.h
index 2cf1947..1b9cea1 100644
--- a/qmeventd/qmeventd.h
+++ b/qmeventd/qmeventd.h
@@ -7,6 +7,7 @@
 */
 
 #include <sys/syscall.h>
+#include <time.h>
 
 #ifndef __NR_pidfd_open
 #define __NR_pidfd_open 434
@@ -86,6 +87,7 @@ struct Client {
 struct CleanupData {
     pid_t pid;
     int pidfd;
+    time_t timeout;
 };
 
 void handle_qmp_handshake(struct Client *client);
-- 
2.30.2