From: Dominik Csapak <d.csapak@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH qemu-server v4 1/2] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s
Date: Fri, 23 Sep 2022 11:51:14 +0200 [thread overview]
Message-ID: <20220923095115.1974215-2-d.csapak@proxmox.com> (raw)
In-Reply-To: <20220923095115.1974215-1-d.csapak@proxmox.com>
currently, the 'forced_cleanup' (sending SIGKILL to the qemu process),
is intended to be triggered 5 seconds after sending the initial shutdown
signal (SIGTERM) which is sadly not enough for some setups.
Accidentally, it could be triggered earlier than 5 seconds, if a
SIGALRM triggers in the timespan directly before setting it again.
Also, this approach means that depending on when machines are shutdown
their forced cleanup may happen after 5 seconds, or any time after, if
new vms are shut off in the meantime.
Improve this situation by reworking the way we deal with this cleanup.
We save the pidfd, time incl. timeout in the Client, and set a timeout
to 'epoll_wait' of 10 seconds, which will then trigger a forced_cleanup.
Remove entries from the forced_cleanup list when that entry is killed,
or when the normal cleanup took place.
To improve the shutdown behaviour, increase the default timeout to 60
seconds, which should be enough, but add a commandline toggle where
users can set it to a different value.
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
qmeventd/qmeventd.c | 98 +++++++++++++++++++++++----------------------
qmeventd/qmeventd.h | 8 ++--
2 files changed, 54 insertions(+), 52 deletions(-)
diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index 8d32827..3256d42 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -29,6 +29,7 @@
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/socket.h>
@@ -36,15 +37,19 @@
#include <sys/un.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <time.h>
#include "qmeventd.h"
+#define DEFAULT_KILL_TIMEOUT 60
+
static int verbose = 0;
+static int kill_timeout = DEFAULT_KILL_TIMEOUT;
static int epoll_fd = 0;
static const char *progname;
GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
GSList *forced_cleanups;
-volatile sig_atomic_t alarm_triggered = 0;
+static int needs_cleanup = 0;
/*
* Helper functions
@@ -56,6 +61,7 @@ usage()
fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
fprintf(stderr, " -f run in foreground (default: false)\n");
fprintf(stderr, " -v verbose (default: false)\n");
+ fprintf(stderr, " -t <s> kill timeout (default: %ds)\n", DEFAULT_KILL_TIMEOUT);
fprintf(stderr, " PATH use PATH for socket\n");
}
@@ -436,6 +442,11 @@ cleanup_client(struct Client *client)
break;
}
+ if (client->pidfd > 0) {
+ (void)close(client->pidfd);
+ }
+ VERBOSE_PRINT("removing %s from forced cleanups\n", client->qemu.vmid);
+ forced_cleanups = g_slist_remove(forced_cleanups, client);
free(client);
}
@@ -469,16 +480,13 @@ terminate_client(struct Client *client)
int err = kill(client->pid, SIGTERM);
log_neg(err, "kill");
- struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
- struct CleanupData data = {
- .pid = client->pid,
- .pidfd = pidfd
- };
- *data_ptr = data;
- forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
+ time_t timeout = time(NULL) + kill_timeout;
+
+ client->pidfd = pidfd;
+ client->timeout = timeout;
- // resets any other alarms, but will fire eventually and cleanup all
- alarm(5);
+ forced_cleanups = g_slist_prepend(forced_cleanups, (void *)client);
+ needs_cleanup = 1;
}
void
@@ -551,58 +559,51 @@ handle_client(struct Client *client)
json_tokener_free(tok);
}
-
-/*
- * SIGALRM and cleanup handling
- *
- * terminate_client will set an alarm for 5 seconds and add its client's PID to
- * the forced_cleanups list - when the timer expires, we iterate the list and
- * attempt to issue SIGKILL to all processes which haven't yet stopped.
- */
-
-static void
-alarm_handler(__attribute__((unused)) int signum)
-{
- alarm_triggered = 1;
-}
-
static void
-sigkill(void *ptr, __attribute__((unused)) void *unused)
+sigkill(void *ptr, void *time_ptr)
{
- struct CleanupData data = *((struct CleanupData *)ptr);
+ struct Client *data = ptr;
int err;
- if (data.pidfd > 0) {
- err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
- (void)close(data.pidfd);
+ if (data->timeout != 0 && data->timeout > *(time_t *)time_ptr) {
+ return;
+ }
+
+ if (data->pidfd > 0) {
+ err = pidfd_send_signal(data->pidfd, SIGKILL, NULL, 0);
+ (void)close(data->pidfd);
+ data->pidfd = -1;
} else {
- err = kill(data.pid, SIGKILL);
+ err = kill(data->pid, SIGKILL);
}
if (err < 0) {
if (errno != ESRCH) {
fprintf(stderr, "SIGKILL cleanup of pid '%d' failed - %s\n",
- data.pid, strerror(errno));
+ data->pid, strerror(errno));
}
} else {
fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
- data.pid);
+ data->pid);
}
+
+ data->timeout = 0;
+
+ // remove ourselves from the list
+ forced_cleanups = g_slist_remove(forced_cleanups, ptr);
}
static void
handle_forced_cleanup()
{
- if (alarm_triggered) {
+ if (g_slist_length(forced_cleanups) > 0) {
VERBOSE_PRINT("clearing forced cleanup backlog\n");
- alarm_triggered = 0;
- g_slist_foreach(forced_cleanups, sigkill, NULL);
- g_slist_free_full(forced_cleanups, free);
- forced_cleanups = NULL;
+ time_t cur_time = time(NULL);
+ g_slist_foreach(forced_cleanups, sigkill, &cur_time);
}
+ needs_cleanup = g_slist_length(forced_cleanups) > 0;
}
-
int
main(int argc, char *argv[])
{
@@ -611,7 +612,7 @@ main(int argc, char *argv[])
char *socket_path = NULL;
progname = argv[0];
- while ((opt = getopt(argc, argv, "hfv")) != -1) {
+ while ((opt = getopt(argc, argv, "hfvt:")) != -1) {
switch (opt) {
case 'f':
daemonize = 0;
@@ -619,6 +620,15 @@ main(int argc, char *argv[])
case 'v':
verbose = 1;
break;
+ case 't':
+ errno = 0;
+ char *endptr = NULL;
+ kill_timeout = strtoul(optarg, &endptr, 10);
+ if (errno != 0 || *endptr != '\0' || kill_timeout == 0) {
+ usage();
+ exit(EXIT_FAILURE);
+ }
+ break;
case 'h':
usage();
exit(EXIT_SUCCESS);
@@ -635,7 +645,6 @@ main(int argc, char *argv[])
}
signal(SIGCHLD, SIG_IGN);
- signal(SIGALRM, alarm_handler);
socket_path = argv[optind];
@@ -669,11 +678,7 @@ main(int argc, char *argv[])
int nevents;
for(;;) {
- nevents = epoll_wait(epoll_fd, events, 1, -1);
- if (nevents < 0 && errno == EINTR) {
- handle_forced_cleanup();
- continue;
- }
+ nevents = epoll_wait(epoll_fd, events, 1, needs_cleanup ? 10*1000 : -1);
bail_neg(nevents, "epoll_wait");
for (int n = 0; n < nevents; n++) {
@@ -688,7 +693,6 @@ main(int argc, char *argv[])
handle_client((struct Client *)events[n].data.ptr);
}
}
-
handle_forced_cleanup();
}
}
diff --git a/qmeventd/qmeventd.h b/qmeventd/qmeventd.h
index 2cf1947..9afc935 100644
--- a/qmeventd/qmeventd.h
+++ b/qmeventd/qmeventd.h
@@ -7,6 +7,7 @@
*/
#include <sys/syscall.h>
+#include <time.h>
#ifndef __NR_pidfd_open
#define __NR_pidfd_open 434
@@ -63,6 +64,8 @@ struct Client {
int fd;
pid_t pid;
+ int pidfd;
+ time_t timeout;
ClientType type;
ClientState state;
@@ -83,11 +86,6 @@ struct Client {
} vzdump;
};
-struct CleanupData {
- pid_t pid;
- int pidfd;
-};
-
void handle_qmp_handshake(struct Client *client);
void handle_qmp_event(struct Client *client, struct json_object *obj);
void handle_qmp_return(struct Client *client, struct json_object *data, bool error);
--
2.30.2
next prev parent reply other threads:[~2022-09-23 9:51 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-23 9:51 [pve-devel] [PATCH qemu-server v4 0/2] qmeventd: improve shutdown behaviour Dominik Csapak
2022-09-23 9:51 ` Dominik Csapak [this message]
2022-09-23 9:51 ` [pve-devel] [PATCH qemu-server v4 2/2] qmeventd: send QMP 'quit' command instead of SIGTERM Dominik Csapak
2022-09-23 10:20 ` [pve-devel] applied-series: [PATCH qemu-server v4 0/2] qmeventd: improve shutdown behaviour Wolfgang Bumiller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220923095115.1974215-2-d.csapak@proxmox.com \
--to=d.csapak@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox