From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 8F9896A1FC for ; Wed, 24 Mar 2021 16:21:40 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 9C933C2F6 for ; Wed, 24 Mar 2021 16:21:08 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 224D0C1F3 for ; Wed, 24 Mar 2021 16:21:03 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id DBD9F46467 for ; Wed, 24 Mar 2021 16:21:02 +0100 (CET) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Wed, 24 Mar 2021 16:18:18 +0100 Message-Id: <20210324151827.26200-12-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20210324151827.26200-1-s.reiter@proxmox.com> References: <20210324151827.26200-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.022 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [mod.rs, watchdog.rs, api.rs, proxmox-restore-daemon.rs] Subject: [pbs-devel] [PATCH v2 proxmox-backup 11/20] file-restore-daemon: add watchdog module X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 24 Mar 2021 15:21:40 -0000 Add a watchdog that will automatically shut down the VM after 10 minutes, if no API call is received. Signed-off-by: Stefan Reiter --- v2: * use tokio instead of alarm() src/api2/types/file_restore.rs | 3 ++ src/bin/proxmox-restore-daemon.rs | 2 ++ src/bin/proxmox_restore_daemon/api.rs | 26 ++++++++++---- src/bin/proxmox_restore_daemon/mod.rs | 3 ++ src/bin/proxmox_restore_daemon/watchdog.rs | 41 ++++++++++++++++++++++ 5 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 src/bin/proxmox_restore_daemon/watchdog.rs diff --git a/src/api2/types/file_restore.rs b/src/api2/types/file_restore.rs index cd8df16a..29085c31 100644 --- a/src/api2/types/file_restore.rs +++ b/src/api2/types/file_restore.rs @@ -8,5 +8,8 @@ use proxmox::api::api; pub struct RestoreDaemonStatus { /// VM uptime in seconds pub uptime: i64, + /// time left until auto-shutdown, keep in mind that this is useless when 'keep-timeout' is + /// not set, as then the status call will have reset the timer before returning the value + pub timeout: i64, } diff --git a/src/bin/proxmox-restore-daemon.rs b/src/bin/proxmox-restore-daemon.rs index e803238a..6b453ad3 100644 --- a/src/bin/proxmox-restore-daemon.rs +++ b/src/bin/proxmox-restore-daemon.rs @@ -45,6 +45,8 @@ fn main() -> Result<(), Error> { } async fn run() -> Result<(), Error> { + watchdog_init(); + let auth_config = Arc::new( auth::ticket_auth().map_err(|err| format_err!("reading ticket file failed: {}", err))?, ); diff --git a/src/bin/proxmox_restore_daemon/api.rs b/src/bin/proxmox_restore_daemon/api.rs index 2dec11fe..4c78a0e8 100644 --- a/src/bin/proxmox_restore_daemon/api.rs +++ b/src/bin/proxmox_restore_daemon/api.rs @@ -8,6 +8,8 @@ use proxmox::list_subdirs_api_method; use proxmox_backup::api2::types::*; +use super::{watchdog_remaining, watchdog_ping}; + // NOTE: All API endpoints must have Permission::Superuser, as the configs for authentication do // not exist within the restore VM. Safety is guaranteed by checking a ticket via a custom ApiAuth. @@ -27,22 +29,32 @@ fn read_uptime() -> Result { } #[api( + input: { + properties: { + "keep-timeout": { + type: bool, + description: "If true, do not reset the watchdog timer on this API call.", + default: false, + optional: true, + }, + }, + }, access: { - description: "Permissions are handled outside restore VM.", - permission: &Permission::Superuser, + description: "Permissions are handled outside restore VM. This call can be made without a ticket, but keep-timeout is always assumed 'true' then.", + permission: &Permission::World, }, returns: { type: RestoreDaemonStatus, } )] /// General status information -fn status( - _param: Value, - _info: &ApiMethod, - _rpcenv: &mut dyn RpcEnvironment, -) -> Result { +fn status(rpcenv: &mut dyn RpcEnvironment, keep_timeout: bool) -> Result { + if !keep_timeout && rpcenv.get_auth_id().is_some() { + watchdog_ping(); + } Ok(RestoreDaemonStatus { uptime: read_uptime()? as i64, + timeout: watchdog_remaining(), }) } diff --git a/src/bin/proxmox_restore_daemon/mod.rs b/src/bin/proxmox_restore_daemon/mod.rs index 8396ebc5..3b52cf06 100644 --- a/src/bin/proxmox_restore_daemon/mod.rs +++ b/src/bin/proxmox_restore_daemon/mod.rs @@ -3,3 +3,6 @@ mod api; pub use api::*; pub mod auth; + +mod watchdog; +pub use watchdog::*; diff --git a/src/bin/proxmox_restore_daemon/watchdog.rs b/src/bin/proxmox_restore_daemon/watchdog.rs new file mode 100644 index 00000000..776d66e3 --- /dev/null +++ b/src/bin/proxmox_restore_daemon/watchdog.rs @@ -0,0 +1,41 @@ +//! Tokio-based watchdog that shuts down the VM if not pinged for TIMEOUT +use std::sync::atomic::{AtomicI64, Ordering}; +use proxmox::tools::time::epoch_i64; + +const TIMEOUT: i64 = 600; // seconds +static TRIGGERED: AtomicI64 = AtomicI64::new(0); + +fn handle_expired() -> ! { + use nix::sys::reboot; + println!("watchdog expired, shutting down"); + let err = reboot::reboot(reboot::RebootMode::RB_POWER_OFF).unwrap_err(); + println!("'reboot' syscall failed: {}", err); + std::process::exit(1); +} + +async fn watchdog_loop() { + use tokio::time::{sleep, Duration}; + loop { + let remaining = watchdog_remaining(); + if remaining <= 0 { + handle_expired(); + } + sleep(Duration::from_secs(remaining as u64)).await; + } +} + +/// Initialize watchdog +pub fn watchdog_init() { + watchdog_ping(); + tokio::spawn(watchdog_loop()); +} + +/// Trigger watchdog keepalive +pub fn watchdog_ping() { + TRIGGERED.store(epoch_i64(), Ordering::SeqCst); +} + +/// Returns the remaining time before watchdog expiry in seconds +pub fn watchdog_remaining() -> i64 { + TIMEOUT - (epoch_i64() - TRIGGERED.load(Ordering::SeqCst)) +} -- 2.20.1