From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 4744674955
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 14:19:56 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 36E2AAF52
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 14:19:26 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS id AF771AF34
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 14:19:21 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 822B9466F1
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 14:19:21 +0200 (CEST)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Wed,  2 Jun 2021 14:19:19 +0200
Message-Id: <20210602121919.31852-1-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.20.1
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.045 Adjusted score from AWL reputation of From: address
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [mod.rs, backup.rs]
Subject: [pbs-devel] [PATCH proxmox-backup v2] tape/drive: improve tape
 device locking behaviour
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Wed, 02 Jun 2021 12:19:56 -0000

by implementing a custom error type that is either 'TimeOut' or
'Other'.

In the api, check in the worker loop for exactly 'TimeOut' errors and continue only
then. All other errors lead to a aborted task.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
changes from v1:
* implement custom error types, so that we can detect the one
  special error condition more cleanly

 src/api2/tape/backup.rs | 14 +++++++++-----
 src/tape/drive/mod.rs   | 34 +++++++++++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
index 77b11bb0..c3b541c7 100644
--- a/src/api2/tape/backup.rs
+++ b/src/api2/tape/backup.rs
@@ -65,6 +65,7 @@ use crate::{
         drive::{
             media_changer,
             lock_tape_device,
+            TapeLockError,
             set_tape_device_state,
         },
         changer::update_changer_online_status,
@@ -203,12 +204,15 @@ pub fn do_tape_backup_job(
                     // for scheduled tape backup jobs, we wait indefinitely for the lock
                     task_log!(worker, "waiting for drive lock...");
                     loop {
-                        if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) {
-                            drive_lock = Some(lock);
-                            break;
-                        } // ignore errors
-
                         worker.check_abort()?;
+                        match lock_tape_device(&drive_config, &setup.drive) {
+                            Ok(lock) => {
+                                drive_lock = Some(lock);
+                                break;
+                            }
+                            Err(TapeLockError::TimeOut) => continue,
+                            Err(TapeLockError::Other(err)) => return Err(err),
+                        }
                     }
                 }
                 set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
diff --git a/src/tape/drive/mod.rs b/src/tape/drive/mod.rs
index f72e0b51..7bc02f9e 100644
--- a/src/tape/drive/mod.rs
+++ b/src/tape/drive/mod.rs
@@ -477,16 +477,34 @@ pub fn request_and_load_media(
     }
 }
 
+#[derive(thiserror::Error, Debug)]
+pub enum TapeLockError {
+    #[error("timeout while trying to lock")]
+    TimeOut,
+    #[error("{0}")]
+    Other(#[from] Error),
+}
+
+impl From<std::io::Error> for TapeLockError {
+    fn from(error: std::io::Error) -> Self {
+        Self::Other(error.into())
+    }
+}
+
 /// Acquires an exclusive lock for the tape device
 ///
 /// Basically calls lock_device_path() using the configured drive path.
 pub fn lock_tape_device(
     config: &SectionConfigData,
     drive: &str,
-) -> Result<DeviceLockGuard, Error> {
+) -> Result<DeviceLockGuard, TapeLockError> {
     let path = tape_device_path(config, drive)?;
-    lock_device_path(&path)
-        .map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err))
+    lock_device_path(&path).map_err(|err| match err {
+        TapeLockError::Other(err) => {
+            TapeLockError::Other(format_err!("unable to lock drive '{}' - {}", drive, err))
+        }
+        other => other,
+    })
 }
 
 /// Writes the given state for the specified drive
@@ -555,7 +573,7 @@ pub struct DeviceLockGuard(std::fs::File);
 //
 // Uses systemd escape_unit to compute a file name from `device_path`, the try
 // to lock `/var/lock/<name>`.
-fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> {
+fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, TapeLockError> {
 
     let lock_name = crate::tools::systemd::escape_unit(device_path, true);
 
@@ -564,7 +582,13 @@ fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> {
 
     let timeout = std::time::Duration::new(10, 0);
     let mut file = std::fs::OpenOptions::new().create(true).append(true).open(path)?;
-    proxmox::tools::fs::lock_file(&mut file, true, Some(timeout))?;
+    if let Err(err) =  proxmox::tools::fs::lock_file(&mut file, true, Some(timeout)) {
+        if err.kind() == std::io::ErrorKind::Interrupted {
+            return Err(TapeLockError::TimeOut);
+        } else {
+            return Err(err.into());
+        }
+    }
 
     let backup_user = crate::backup::backup_user()?;
     fchown(file.as_raw_fd(), Some(backup_user.uid), Some(backup_user.gid))?;
-- 
2.20.1