From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 4744674955 for ; Wed, 2 Jun 2021 14:19:56 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 36E2AAF52 for ; Wed, 2 Jun 2021 14:19:26 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id AF771AF34 for ; Wed, 2 Jun 2021 14:19:21 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 822B9466F1 for ; Wed, 2 Jun 2021 14:19:21 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Wed, 2 Jun 2021 14:19:19 +0200 Message-Id: <20210602121919.31852-1-d.csapak@proxmox.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.045 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [mod.rs, backup.rs] Subject: [pbs-devel] [PATCH proxmox-backup v2] tape/drive: improve tape device locking behaviour X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 02 Jun 2021 12:19:56 -0000 by implementing a custom error type that is either 'TimeOut' or 'Other'. In the api, check in the worker loop for exactly 'TimeOut' errors and continue only then. All other errors lead to a aborted task. Signed-off-by: Dominik Csapak --- changes from v1: * implement custom error types, so that we can detect the one special error condition more cleanly src/api2/tape/backup.rs | 14 +++++++++----- src/tape/drive/mod.rs | 34 +++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs index 77b11bb0..c3b541c7 100644 --- a/src/api2/tape/backup.rs +++ b/src/api2/tape/backup.rs @@ -65,6 +65,7 @@ use crate::{ drive::{ media_changer, lock_tape_device, + TapeLockError, set_tape_device_state, }, changer::update_changer_online_status, @@ -203,12 +204,15 @@ pub fn do_tape_backup_job( // for scheduled tape backup jobs, we wait indefinitely for the lock task_log!(worker, "waiting for drive lock..."); loop { - if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) { - drive_lock = Some(lock); - break; - } // ignore errors - worker.check_abort()?; + match lock_tape_device(&drive_config, &setup.drive) { + Ok(lock) => { + drive_lock = Some(lock); + break; + } + Err(TapeLockError::TimeOut) => continue, + Err(TapeLockError::Other(err)) => return Err(err), + } } } set_tape_device_state(&setup.drive, &worker.upid().to_string())?; diff --git a/src/tape/drive/mod.rs b/src/tape/drive/mod.rs index f72e0b51..7bc02f9e 100644 --- a/src/tape/drive/mod.rs +++ b/src/tape/drive/mod.rs @@ -477,16 +477,34 @@ pub fn request_and_load_media( } } +#[derive(thiserror::Error, Debug)] +pub enum TapeLockError { + #[error("timeout while trying to lock")] + TimeOut, + #[error("{0}")] + Other(#[from] Error), +} + +impl From for TapeLockError { + fn from(error: std::io::Error) -> Self { + Self::Other(error.into()) + } +} + /// Acquires an exclusive lock for the tape device /// /// Basically calls lock_device_path() using the configured drive path. pub fn lock_tape_device( config: &SectionConfigData, drive: &str, -) -> Result { +) -> Result { let path = tape_device_path(config, drive)?; - lock_device_path(&path) - .map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err)) + lock_device_path(&path).map_err(|err| match err { + TapeLockError::Other(err) => { + TapeLockError::Other(format_err!("unable to lock drive '{}' - {}", drive, err)) + } + other => other, + }) } /// Writes the given state for the specified drive @@ -555,7 +573,7 @@ pub struct DeviceLockGuard(std::fs::File); // // Uses systemd escape_unit to compute a file name from `device_path`, the try // to lock `/var/lock/`. -fn lock_device_path(device_path: &str) -> Result { +fn lock_device_path(device_path: &str) -> Result { let lock_name = crate::tools::systemd::escape_unit(device_path, true); @@ -564,7 +582,13 @@ fn lock_device_path(device_path: &str) -> Result { let timeout = std::time::Duration::new(10, 0); let mut file = std::fs::OpenOptions::new().create(true).append(true).open(path)?; - proxmox::tools::fs::lock_file(&mut file, true, Some(timeout))?; + if let Err(err) = proxmox::tools::fs::lock_file(&mut file, true, Some(timeout)) { + if err.kind() == std::io::ErrorKind::Interrupted { + return Err(TapeLockError::TimeOut); + } else { + return Err(err.into()); + } + } let backup_user = crate::backup::backup_user()?; fchown(file.as_raw_fd(), Some(backup_user.uid), Some(backup_user.gid))?; -- 2.20.1