public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Stefan Reiter <s.reiter@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 16/22] file-restore-daemon: add watchdog module
Date: Tue, 16 Feb 2021 18:07:04 +0100	[thread overview]
Message-ID: <20210216170710.31767-17-s.reiter@proxmox.com> (raw)
In-Reply-To: <20210216170710.31767-1-s.reiter@proxmox.com>

Add a watchdog that will automatically shut down the VM after 10
minutes, if no API call is received.

This is handled using the unix 'alarm' syscall.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
 src/api2/types/file_restore.rs             |  3 ++
 src/bin/proxmox-restore-daemon.rs          |  5 ++
 src/bin/proxmox_restore_daemon/api.rs      | 22 ++++++--
 src/bin/proxmox_restore_daemon/mod.rs      |  3 ++
 src/bin/proxmox_restore_daemon/watchdog.rs | 63 ++++++++++++++++++++++
 5 files changed, 91 insertions(+), 5 deletions(-)
 create mode 100644 src/bin/proxmox_restore_daemon/watchdog.rs

diff --git a/src/api2/types/file_restore.rs b/src/api2/types/file_restore.rs
index cd8df16a..710c6d83 100644
--- a/src/api2/types/file_restore.rs
+++ b/src/api2/types/file_restore.rs
@@ -8,5 +8,8 @@ use proxmox::api::api;
 pub struct RestoreDaemonStatus {
     /// VM uptime in seconds
     pub uptime: i64,
+    /// time left until auto-shutdown, keep in mind that this is inaccurate when 'keep-timeout' is
+    /// not set, as then after the status call the timer will have reset
+    pub timeout: i64,
 }
 
diff --git a/src/bin/proxmox-restore-daemon.rs b/src/bin/proxmox-restore-daemon.rs
index 1ec90794..d30da563 100644
--- a/src/bin/proxmox-restore-daemon.rs
+++ b/src/bin/proxmox-restore-daemon.rs
@@ -40,6 +40,9 @@ fn main() -> Result<(), Error> {
         .write_style(env_logger::WriteStyle::Never)
         .init();
 
+    // start watchdog, failure is a critical error as it leads to a scenario where we never exit
+    watchdog_init()?;
+
     proxmox_backup::tools::runtime::main(run())
 }
 
@@ -77,6 +80,8 @@ fn accept_vsock_connections(
                 Ok(stream) => {
                     if sender.send(Ok(stream)).await.is_err() {
                         error!("connection accept channel was closed");
+                    } else {
+                        watchdog_ping();
                     }
                 }
                 Err(err) => {
diff --git a/src/bin/proxmox_restore_daemon/api.rs b/src/bin/proxmox_restore_daemon/api.rs
index 3c642aaf..8eb727df 100644
--- a/src/bin/proxmox_restore_daemon/api.rs
+++ b/src/bin/proxmox_restore_daemon/api.rs
@@ -8,6 +8,8 @@ use proxmox::list_subdirs_api_method;
 
 use proxmox_backup::api2::types::*;
 
+use super::{watchdog_remaining, watchdog_undo_ping};
+
 // NOTE: All API endpoints must have Permission::World, as the configs for authentication do not
 // exist within the restore VM. Safety is guaranteed since we use a low port, so only root on the
 // host can contact us - and there the proxmox-backup-client validates permissions already.
@@ -25,6 +27,16 @@ fn read_uptime() -> Result<f32, Error> {
 }
 
 #[api(
+    input: {
+        properties: {
+            "keep-timeout": {
+                type: bool,
+                description: "If true, do not reset the watchdog timer on this API call.",
+                default: false,
+                optional: true,
+            },
+        },
+    },
     access: {
         description: "Permissions are handled outside restore VM.",
         permission: &Permission::World,
@@ -34,12 +46,12 @@ fn read_uptime() -> Result<f32, Error> {
     }
 )]
 /// General status information
-fn status(
-    _param: Value,
-    _info: &ApiMethod,
-    _rpcenv: &mut dyn RpcEnvironment,
-) -> Result<RestoreDaemonStatus, Error> {
+fn status(keep_timeout: bool) -> Result<RestoreDaemonStatus, Error> {
+    if keep_timeout {
+        watchdog_undo_ping();
+    }
     Ok(RestoreDaemonStatus {
         uptime: read_uptime()? as i64,
+        timeout: watchdog_remaining(false),
     })
 }
diff --git a/src/bin/proxmox_restore_daemon/mod.rs b/src/bin/proxmox_restore_daemon/mod.rs
index d938a5bb..6802d31c 100644
--- a/src/bin/proxmox_restore_daemon/mod.rs
+++ b/src/bin/proxmox_restore_daemon/mod.rs
@@ -1,3 +1,6 @@
 ///! File restore VM related functionality
 mod api;
 pub use api::*;
+
+mod watchdog;
+pub use watchdog::*;
diff --git a/src/bin/proxmox_restore_daemon/watchdog.rs b/src/bin/proxmox_restore_daemon/watchdog.rs
new file mode 100644
index 00000000..f722be0b
--- /dev/null
+++ b/src/bin/proxmox_restore_daemon/watchdog.rs
@@ -0,0 +1,63 @@
+//! SIGALRM/alarm(1) based watchdog that shuts down the VM if not pinged for TIMEOUT
+use anyhow::Error;
+use std::sync::atomic::{AtomicI64, Ordering};
+
+use nix::sys::{reboot, signal::*};
+use nix::unistd::alarm;
+
+const TIMEOUT: u32 = 600; // seconds
+static TRIGGERED: AtomicI64 = AtomicI64::new(0);
+static LAST_TRIGGERED: AtomicI64 = AtomicI64::new(0);
+
+/// Handler is called when alarm-watchdog expires, immediately shuts down VM when triggered
+extern "C" fn alarm_handler(_signal: nix::libc::c_int) {
+    // use println! instead of log, since log might buffer and not print before shut down
+    println!("Watchdog expired, shutting down VM...");
+    let err = reboot::reboot(reboot::RebootMode::RB_POWER_OFF).unwrap_err();
+    println!("'reboot' syscall failed: {}", err);
+    std::process::exit(1);
+}
+
+/// Initialize alarm() based watchdog
+pub fn watchdog_init() -> Result<(), Error> {
+    unsafe {
+        sigaction(
+            Signal::SIGALRM,
+            &SigAction::new(
+                SigHandler::Handler(alarm_handler),
+                SaFlags::empty(),
+                SigSet::empty(),
+            ),
+        )?;
+    }
+
+    watchdog_ping();
+
+    Ok(())
+}
+
+/// Trigger watchdog keepalive
+pub fn watchdog_ping() {
+    alarm::set(TIMEOUT);
+    let cur_time = proxmox::tools::time::epoch_i64();
+    let last = TRIGGERED.swap(cur_time, Ordering::SeqCst);
+    LAST_TRIGGERED.store(last, Ordering::SeqCst);
+}
+
+/// Returns the remaining time before watchdog expiry in seconds if 'current' is true, otherwise it
+/// returns the remaining time before the last ping (which is probably what you want in the API, as
+/// from an API call 'current'=true will *always* return TIMEOUT)
+pub fn watchdog_remaining(current: bool) -> i64 {
+    let cur_time = proxmox::tools::time::epoch_i64();
+    let last_time = (if current { &TRIGGERED } else { &LAST_TRIGGERED }).load(Ordering::SeqCst);
+    TIMEOUT as i64 - (cur_time - last_time)
+}
+
+/// Undo the last watchdog ping and set timer back to previous state, call this in the API to fake
+/// a non-resetting call
+pub fn watchdog_undo_ping() {
+    let set = watchdog_remaining(false);
+    TRIGGERED.store(LAST_TRIGGERED.load(Ordering::SeqCst), Ordering::SeqCst);
+    // make sure argument cannot be 0, as that would cancel any alarm
+    alarm::set(1.max(set) as u32);
+}
-- 
2.20.1





  parent reply	other threads:[~2021-02-16 17:07 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-16 17:06 [pbs-devel] [PATCH 00/22] Single file restore for VM images Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH pxar 01/22] decoder/aio: add contents() and content_size() calls Stefan Reiter
2021-02-17  7:56   ` Wolfgang Bumiller
2021-02-16 17:06 ` [pbs-devel] [PATCH pxar 02/22] decoder: add peek() Stefan Reiter
2021-02-17  8:20   ` Wolfgang Bumiller
2021-02-17  8:38     ` Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-restore-vm-data 03/22] initial commit Stefan Reiter
2021-03-15 18:35   ` [pbs-devel] applied: " Thomas Lamprecht
2021-03-16 15:33     ` Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 04/22] api2/admin/datastore: refactor list_dir_content in catalog_reader Stefan Reiter
2021-02-17  7:50   ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 05/22] api2/admin/datastore: accept "/" as path for root Stefan Reiter
2021-02-17  7:50   ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 06/22] api2/admin/datastore: refactor create_zip into pxar/extract Stefan Reiter
2021-02-17  7:50   ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 07/22] pxar/extract: add extract_sub_dir Stefan Reiter
2021-02-17  7:51   ` [pbs-devel] applied: " Thomas Lamprecht
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 08/22] pxar/extract: add sequential variants to create_zip, extract_sub_dir Stefan Reiter
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 09/22] client: extract common functions to proxmox_client_tools module Stefan Reiter
2021-02-17  6:49   ` Dietmar Maurer
2021-02-17  7:58     ` Stefan Reiter
2021-02-17  8:50       ` Dietmar Maurer
2021-02-17  9:47         ` Stefan Reiter
2021-02-17 10:12           ` Dietmar Maurer
2021-02-17  9:13   ` [pbs-devel] applied: " Dietmar Maurer
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 10/22] proxmox_client_tools: extract 'key' from client module Stefan Reiter
2021-02-17  9:11   ` Dietmar Maurer
2021-02-16 17:06 ` [pbs-devel] [PATCH proxmox-backup 11/22] file-restore: add binary and basic commands Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 12/22] file-restore: allow specifying output-format Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 13/22] rest: implement tower service for UnixStream Stefan Reiter
2021-02-17  6:52   ` [pbs-devel] applied: " Dietmar Maurer
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 14/22] client: add VsockClient to connect to virtio-vsock VMs Stefan Reiter
2021-02-17  7:24   ` [pbs-devel] applied: " Dietmar Maurer
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 15/22] file-restore-daemon: add binary with virtio-vsock API server Stefan Reiter
2021-02-17 10:17   ` Dietmar Maurer
2021-02-17 10:25   ` Dietmar Maurer
2021-02-17 10:30     ` Stefan Reiter
2021-02-17 11:13       ` Dietmar Maurer
2021-02-17 11:26   ` Dietmar Maurer
2021-02-16 17:07 ` Stefan Reiter [this message]
2021-02-17 10:52   ` [pbs-devel] [PATCH proxmox-backup 16/22] file-restore-daemon: add watchdog module Wolfgang Bumiller
2021-02-17 11:14     ` Stefan Reiter
2021-02-17 11:29       ` Wolfgang Bumiller
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 17/22] file-restore-daemon: add disk module Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 18/22] file-restore: add basic VM/block device support Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 19/22] file-restore: improve logging of VM with logrotate Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 20/22] debian/client: add postinst hook to rebuild file-restore initramfs Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 21/22] file-restore(-daemon): implement list API Stefan Reiter
2021-02-16 17:07 ` [pbs-devel] [PATCH proxmox-backup 22/22] file-restore: add 'extract' command for VM file restore Stefan Reiter
2021-02-16 17:11 ` [pbs-devel] [PATCH 00/22] Single file restore for VM images Stefan Reiter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210216170710.31767-17-s.reiter@proxmox.com \
    --to=s.reiter@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal