From: Stefan Reiter <s.reiter@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 16/22] file-restore-daemon: add watchdog module
Date: Tue, 16 Feb 2021 18:07:04 +0100 [thread overview]
Message-ID: <20210216170710.31767-17-s.reiter@proxmox.com> (raw)
In-Reply-To: <20210216170710.31767-1-s.reiter@proxmox.com>
Add a watchdog that will automatically shut down the VM after 10
minutes, if no API call is received.
This is handled using the unix 'alarm' syscall.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
src/api2/types/file_restore.rs | 3 ++
src/bin/proxmox-restore-daemon.rs | 5 ++
src/bin/proxmox_restore_daemon/api.rs | 22 ++++++--
src/bin/proxmox_restore_daemon/mod.rs | 3 ++
src/bin/proxmox_restore_daemon/watchdog.rs | 63 ++++++++++++++++++++++
5 files changed, 91 insertions(+), 5 deletions(-)
create mode 100644 src/bin/proxmox_restore_daemon/watchdog.rs
diff --git a/src/api2/types/file_restore.rs b/src/api2/types/file_restore.rs
index cd8df16a..710c6d83 100644
--- a/src/api2/types/file_restore.rs
+++ b/src/api2/types/file_restore.rs
@@ -8,5 +8,8 @@ use proxmox::api::api;
pub struct RestoreDaemonStatus {
/// VM uptime in seconds
pub uptime: i64,
+ /// time left until auto-shutdown, keep in mind that this is inaccurate when 'keep-timeout' is
+ /// not set, as then after the status call the timer will have reset
+ pub timeout: i64,
}
diff --git a/src/bin/proxmox-restore-daemon.rs b/src/bin/proxmox-restore-daemon.rs
index 1ec90794..d30da563 100644
--- a/src/bin/proxmox-restore-daemon.rs
+++ b/src/bin/proxmox-restore-daemon.rs
@@ -40,6 +40,9 @@ fn main() -> Result<(), Error> {
.write_style(env_logger::WriteStyle::Never)
.init();
+ // start watchdog, failure is a critical error as it leads to a scenario where we never exit
+ watchdog_init()?;
+
proxmox_backup::tools::runtime::main(run())
}
@@ -77,6 +80,8 @@ fn accept_vsock_connections(
Ok(stream) => {
if sender.send(Ok(stream)).await.is_err() {
error!("connection accept channel was closed");
+ } else {
+ watchdog_ping();
}
}
Err(err) => {
diff --git a/src/bin/proxmox_restore_daemon/api.rs b/src/bin/proxmox_restore_daemon/api.rs
index 3c642aaf..8eb727df 100644
--- a/src/bin/proxmox_restore_daemon/api.rs
+++ b/src/bin/proxmox_restore_daemon/api.rs
@@ -8,6 +8,8 @@ use proxmox::list_subdirs_api_method;
use proxmox_backup::api2::types::*;
+use super::{watchdog_remaining, watchdog_undo_ping};
+
// NOTE: All API endpoints must have Permission::World, as the configs for authentication do not
// exist within the restore VM. Safety is guaranteed since we use a low port, so only root on the
// host can contact us - and there the proxmox-backup-client validates permissions already.
@@ -25,6 +27,16 @@ fn read_uptime() -> Result<f32, Error> {
}
#[api(
+ input: {
+ properties: {
+ "keep-timeout": {
+ type: bool,
+ description: "If true, do not reset the watchdog timer on this API call.",
+ default: false,
+ optional: true,
+ },
+ },
+ },
access: {
description: "Permissions are handled outside restore VM.",
permission: &Permission::World,
@@ -34,12 +46,12 @@ fn read_uptime() -> Result<f32, Error> {
}
)]
/// General status information
-fn status(
- _param: Value,
- _info: &ApiMethod,
- _rpcenv: &mut dyn RpcEnvironment,
-) -> Result<RestoreDaemonStatus, Error> {
+fn status(keep_timeout: bool) -> Result<RestoreDaemonStatus, Error> {
+ if keep_timeout {
+ watchdog_undo_ping();
+ }
Ok(RestoreDaemonStatus {
uptime: read_uptime()? as i64,
+ timeout: watchdog_remaining(false),
})
}
diff --git a/src/bin/proxmox_restore_daemon/mod.rs b/src/bin/proxmox_restore_daemon/mod.rs
index d938a5bb..6802d31c 100644
--- a/src/bin/proxmox_restore_daemon/mod.rs
+++ b/src/bin/proxmox_restore_daemon/mod.rs
@@ -1,3 +1,6 @@
///! File restore VM related functionality
mod api;
pub use api::*;
+
+mod watchdog;
+pub use watchdog::*;
diff --git a/src/bin/proxmox_restore_daemon/watchdog.rs b/src/bin/proxmox_restore_daemon/watchdog.rs
new file mode 100644
index 00000000..f722be0b
--- /dev/null
+++ b/src/bin/proxmox_restore_daemon/watchdog.rs
@@ -0,0 +1,63 @@
+//! SIGALRM/alarm(1) based watchdog that shuts down the VM if not pinged for TIMEOUT
+use anyhow::Error;
+use std::sync::atomic::{AtomicI64, Ordering};
+
+use nix::sys::{reboot, signal::*};
+use nix::unistd::alarm;
+
+const TIMEOUT: u32 = 600; // seconds
+static TRIGGERED: AtomicI64 = AtomicI64::new(0);
+static LAST_TRIGGERED: AtomicI64 = AtomicI64::new(0);
+
+/// Handler is called when alarm-watchdog expires, immediately shuts down VM when triggered
+extern "C" fn alarm_handler(_signal: nix::libc::c_int) {
+ // use println! instead of log, since log might buffer and not print before shut down
+ println!("Watchdog expired, shutting down VM...");
+ let err = reboot::reboot(reboot::RebootMode::RB_POWER_OFF).unwrap_err();
+ println!("'reboot' syscall failed: {}", err);
+ std::process::exit(1);
+}
+
+/// Initialize alarm() based watchdog
+pub fn watchdog_init() -> Result<(), Error> {
+ unsafe {
+ sigaction(
+ Signal::SIGALRM,
+ &SigAction::new(
+ SigHandler::Handler(alarm_handler),
+ SaFlags::empty(),
+ SigSet::empty(),
+ ),
+ )?;
+ }
+
+ watchdog_ping();
+
+ Ok(())
+}
+
+/// Trigger watchdog keepalive
+pub fn watchdog_ping() {
+ alarm::set(TIMEOUT);
+ let cur_time = proxmox::tools::time::epoch_i64();
+ let last = TRIGGERED.swap(cur_time, Ordering::SeqCst);
+ LAST_TRIGGERED.store(last, Ordering::SeqCst);
+}
+
+/// Returns the remaining time before watchdog expiry in seconds if 'current' is true, otherwise it
+/// returns the remaining time before the last ping (which is probably what you want in the API, as
+/// from an API call 'current'=true will *always* return TIMEOUT)
+pub fn watchdog_remaining(current: bool) -> i64 {
+ let cur_time = proxmox::tools::time::epoch_i64();
+ let last_time = (if current { &TRIGGERED } else { &LAST_TRIGGERED }).load(Ordering::SeqCst);
+ TIMEOUT as i64 - (cur_time - last_time)
+}
+
+/// Undo the last watchdog ping and set timer back to previous state, call this in the API to fake
+/// a non-resetting call
+pub fn watchdog_undo_ping() {
+ let set = watchdog_remaining(false);
+ TRIGGERED.store(LAST_TRIGGERED.load(Ordering::SeqCst), Ordering::SeqCst);
+ // make sure argument cannot be 0, as that would cancel any alarm
+ alarm::set(1.max(set) as u32);
+}
--
2.20.1
next prev parent reply other threads:[~2021-02-16 17:07 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-16 17:06 [pbs-devel] [PATCH 00/22] Single file restore for VM images Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH pxar 01/22] decoder/aio: add contents() and content_size() calls Stefan Reiter
2021-02-17 7:56 ` Wolfgang Bumiller
2021-02-16 17:06 ` [pbs-devel] [PATCH pxar 02/22] decoder: add peek() Stefan Reiter
2021-02-17 8:20 ` Wolfgang Bumiller
2021-02-17 8:38 ` Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-restore-vm-data 03/22] initial commit Stefan Reiter
2021-03-15 18:35 ` [pbs-devel] applied: " Thomas Lamprecht
2021-03-16 15:33 ` Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 04/22] api2/admin/datastore: refactor list_dir_content in catalog_reader Stefan Reiter
2021-02-17 7:50 ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 05/22] api2/admin/datastore: accept "/" as path for root Stefan Reiter
2021-02-17 7:50 ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 06/22] api2/admin/datastore: refactor create_zip into pxar/extract Stefan Reiter
2021-02-17 7:50 ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 07/22] pxar/extract: add extract_sub_dir Stefan Reiter
2021-02-17 7:51 ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 08/22] pxar/extract: add sequential variants to create_zip, extract_sub_dir Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 09/22] client: extract common functions to proxmox_client_tools module Stefan Reiter
2021-02-17 6:49 ` Dietmar Maurer
2021-02-17 7:58 ` Stefan Reiter
2021-02-17 8:50 ` Dietmar Maurer
2021-02-17 9:47 ` Stefan Reiter
2021-02-17 10:12 ` Dietmar Maurer
2021-02-17 9:13 ` [pbs-devel] applied: " Dietmar Maurer
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 10/22] proxmox_client_tools: extract 'key' from client module Stefan Reiter
2021-02-17 9:11 ` Dietmar Maurer
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 11/22] file-restore: add binary and basic commands Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 12/22] file-restore: allow specifying output-format Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 13/22] rest: implement tower service for UnixStream Stefan Reiter
2021-02-17 6:52 ` [pbs-devel] applied: " Dietmar Maurer
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 14/22] client: add VsockClient to connect to virtio-vsock VMs Stefan Reiter
2021-02-17 7:24 ` [pbs-devel] applied: " Dietmar Maurer
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 15/22] file-restore-daemon: add binary with virtio-vsock API server Stefan Reiter
2021-02-17 10:17 ` Dietmar Maurer
2021-02-17 10:25 ` Dietmar Maurer
2021-02-17 10:30 ` Stefan Reiter
2021-02-17 11:13 ` Dietmar Maurer
2021-02-17 11:26 ` Dietmar Maurer
2021-02-16 17:07 ` Stefan Reiter [this message]
2021-02-17 10:52 ` [pbs-devel] [PATCH proxmox-backup 16/22] file-restore-daemon: add watchdog module Wolfgang Bumiller
2021-02-17 11:14 ` Stefan Reiter
2021-02-17 11:29 ` Wolfgang Bumiller
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 17/22] file-restore-daemon: add disk module Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 18/22] file-restore: add basic VM/block device support Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 19/22] file-restore: improve logging of VM with logrotate Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 20/22] debian/client: add postinst hook to rebuild file-restore initramfs Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 21/22] file-restore(-daemon): implement list API Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 22/22] file-restore: add 'extract' command for VM file restore Stefan Reiter
2021-02-16 17:11 ` [pbs-devel] [PATCH 00/22] Single file restore for VM images Stefan Reiter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210216170710.31767-17-s.reiter@proxmox.com \
--to=s.reiter@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.