From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id E3D2C7479A for ; Wed, 2 Jun 2021 10:20:19 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id CCBC48660 for ; Wed, 2 Jun 2021 10:19:49 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 4D2D3864C for ; Wed, 2 Jun 2021 10:19:45 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 19A56466EB for ; Wed, 2 Jun 2021 10:19:39 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Wed, 2 Jun 2021 10:19:36 +0200 Message-Id: <20210602081936.20907-1-d.csapak@proxmox.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.048 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [backup.rs, mod.rs] Subject: [pbs-devel] [PATCH proxmox-backup] tape/drive: improve tape device locking behaviour X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 02 Jun 2021 08:20:20 -0000 by passing through errors if they are of the 'Interrupted' kind, since that happens mostly when the lock is interrupted by the timeout timer signal. In the api, check in the worker loop for exactly this error and continue only then. All other errors lead to a aborted task. Signed-off-by: Dominik Csapak --- an alternative solution would be to change the function signature to return an Option instead and check that, but this would be a 'weird' interface for a locking function... src/api2/tape/backup.rs | 20 +++++++++++++++----- src/tape/drive/mod.rs | 10 ++++++++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs index 77b11bb0..7e1de88e 100644 --- a/src/api2/tape/backup.rs +++ b/src/api2/tape/backup.rs @@ -203,12 +203,22 @@ pub fn do_tape_backup_job( // for scheduled tape backup jobs, we wait indefinitely for the lock task_log!(worker, "waiting for drive lock..."); loop { - if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) { - drive_lock = Some(lock); - break; - } // ignore errors - worker.check_abort()?; + match lock_tape_device(&drive_config, &setup.drive) { + Ok(lock) => { + drive_lock = Some(lock); + break; + } + Err(err) => { + if let Some(err) = err.downcast_ref::() { + if err.kind() == std::io::ErrorKind::Interrupted { + // locking was probably interrupted due to a timeout + continue; + } + } + return Err(err); + } + } } } set_tape_device_state(&setup.drive, &worker.upid().to_string())?; diff --git a/src/tape/drive/mod.rs b/src/tape/drive/mod.rs index f72e0b51..5cc81924 100644 --- a/src/tape/drive/mod.rs +++ b/src/tape/drive/mod.rs @@ -485,8 +485,14 @@ pub fn lock_tape_device( drive: &str, ) -> Result { let path = tape_device_path(config, drive)?; - lock_device_path(&path) - .map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err)) + match lock_device_path(&path) { + Ok(lock) => Ok(lock), + // we do not change interrrupted errors, so that the caller can catch that + Err(err) => match err.downcast_ref::() { + Some(e) if e.kind() == std::io::ErrorKind::Interrupted => Err(err), + _ => bail!("unable to lock drive '{}' - {}", drive, err), + } + } } /// Writes the given state for the specified drive -- 2.20.1