From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id E3D2C7479A
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 10:20:19 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id CCBC48660
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 10:19:49 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS id 4D2D3864C
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 10:19:45 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 19A56466EB
 for <pbs-devel@lists.proxmox.com>; Wed,  2 Jun 2021 10:19:39 +0200 (CEST)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Wed,  2 Jun 2021 10:19:36 +0200
Message-Id: <20210602081936.20907-1-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.20.1
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.048 Adjusted score from AWL reputation of From: address
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [backup.rs, mod.rs]
Subject: [pbs-devel] [PATCH proxmox-backup] tape/drive: improve tape device
 locking behaviour
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Wed, 02 Jun 2021 08:20:20 -0000

by passing through errors if they are of the 'Interrupted' kind,
since that happens mostly when the lock is interrupted by the
timeout timer signal.

In the api, check in the worker loop for exactly this error and continue only
then. All other errors lead to a aborted task.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
an alternative solution would be to change the function signature to
return an Option<Guard> instead and check that, but this would
be a 'weird' interface for a locking function...

 src/api2/tape/backup.rs | 20 +++++++++++++++-----
 src/tape/drive/mod.rs   | 10 ++++++++--
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
index 77b11bb0..7e1de88e 100644
--- a/src/api2/tape/backup.rs
+++ b/src/api2/tape/backup.rs
@@ -203,12 +203,22 @@ pub fn do_tape_backup_job(
                     // for scheduled tape backup jobs, we wait indefinitely for the lock
                     task_log!(worker, "waiting for drive lock...");
                     loop {
-                        if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) {
-                            drive_lock = Some(lock);
-                            break;
-                        } // ignore errors
-
                         worker.check_abort()?;
+                        match lock_tape_device(&drive_config, &setup.drive) {
+                            Ok(lock) => {
+                                drive_lock = Some(lock);
+                                break;
+                            }
+                            Err(err) => {
+                                if let Some(err) = err.downcast_ref::<std::io::Error>() {
+                                    if err.kind() == std::io::ErrorKind::Interrupted {
+                                        // locking was probably interrupted due to a timeout
+                                        continue;
+                                    }
+                                }
+                                return Err(err);
+                            }
+                        }
                     }
                 }
                 set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
diff --git a/src/tape/drive/mod.rs b/src/tape/drive/mod.rs
index f72e0b51..5cc81924 100644
--- a/src/tape/drive/mod.rs
+++ b/src/tape/drive/mod.rs
@@ -485,8 +485,14 @@ pub fn lock_tape_device(
     drive: &str,
 ) -> Result<DeviceLockGuard, Error> {
     let path = tape_device_path(config, drive)?;
-    lock_device_path(&path)
-        .map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err))
+    match lock_device_path(&path) {
+        Ok(lock) => Ok(lock),
+        // we do not change interrrupted errors, so that the caller can catch that
+        Err(err) => match err.downcast_ref::<std::io::Error>() {
+            Some(e) if e.kind() == std::io::ErrorKind::Interrupted => Err(err),
+            _ => bail!("unable to lock drive '{}' - {}", drive, err),
+        }
+    }
 }
 
 /// Writes the given state for the specified drive
-- 
2.20.1