public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Shannon Sterz <s.sterz@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup v9 1/4] datastore/api/backup: prepare for fix of #3935 by adding lock helpers
Date: Wed, 26 Mar 2025 12:44:11 +0100	[thread overview]
Message-ID: <20250326114414.132478-2-s.sterz@proxmox.com> (raw)
In-Reply-To: <20250326114414.132478-1-s.sterz@proxmox.com>

to avoid duplicate code, add helpers for locking groups and snapshots
to the BackupGroup and BackupDir traits respectively and refactor
existing code to use them.

this also adapts error handling by adding relevant context to each
locking helper call site. otherwise, we might loose valuable
information useful for debugging. note, however, that users that
relied on specific error messages will break.

Signed-off-by: Shannon Sterz <s.sterz@proxmox.com>
---
 pbs-datastore/src/backup_info.rs     | 45 ++++++++++++---
 pbs-datastore/src/datastore.rs       | 82 +++++++++++++---------------
 pbs-datastore/src/snapshot_reader.rs |  9 ++-
 src/api2/backup/environment.rs       | 16 +++---
 src/api2/backup/mod.rs               | 13 ++---
 src/api2/reader/mod.rs               | 11 ++--
 src/backup/verify.rs                 | 12 ++--
 src/server/sync.rs                   | 13 ++---
 8 files changed, 106 insertions(+), 95 deletions(-)

diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs
index ef2b982c..c7f7ed53 100644
--- a/pbs-datastore/src/backup_info.rs
+++ b/pbs-datastore/src/backup_info.rs
@@ -3,9 +3,11 @@ use std::os::unix::io::RawFd;
 use std::path::PathBuf;
 use std::sync::Arc;
 
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, format_err, Context, Error};
 
-use proxmox_sys::fs::{lock_dir_noblock, replace_file, CreateOptions};
+use proxmox_sys::fs::{
+    lock_dir_noblock, lock_dir_noblock_shared, replace_file, CreateOptions, DirLockGuard,
+};
 
 use pbs_api_types::{
     Authid, BackupGroupDeleteStats, BackupNamespace, BackupType, GroupFilter, VerifyState,
@@ -199,9 +201,10 @@ impl BackupGroup {
     /// Returns `BackupGroupDeleteStats`, containing the number of deleted snapshots
     /// and number of protected snaphsots, which therefore were not removed.
     pub fn destroy(&self) -> Result<BackupGroupDeleteStats, Error> {
+        let _guard = self
+            .lock()
+            .with_context(|| format!("while destroying group '{self:?}'"))?;
         let path = self.full_group_path();
-        let _guard =
-            proxmox_sys::fs::lock_dir_noblock(&path, "backup group", "possible running backup")?;
 
         log::info!("removing backup group {:?}", path);
         let mut delete_stats = BackupGroupDeleteStats::default();
@@ -237,6 +240,15 @@ impl BackupGroup {
         self.store
             .set_owner(&self.ns, self.as_ref(), auth_id, force)
     }
+
+    /// Lock a group exclusively
+    pub fn lock(&self) -> Result<DirLockGuard, Error> {
+        lock_dir_noblock(
+            &self.full_group_path(),
+            "backup group",
+            "possible running backup",
+        )
+    }
 }
 
 impl AsRef<pbs_api_types::BackupNamespace> for BackupGroup {
@@ -442,15 +454,33 @@ impl BackupDir {
             .map_err(|err| format_err!("unable to acquire manifest lock {:?} - {}", &path, err))
     }
 
+    /// Lock this snapshot exclusively
+    pub fn lock(&self) -> Result<DirLockGuard, Error> {
+        lock_dir_noblock(
+            &self.full_path(),
+            "snapshot",
+            "backup is running or snapshot is in use",
+        )
+    }
+
+    /// Acquire a shared lock on this snapshot
+    pub fn lock_shared(&self) -> Result<DirLockGuard, Error> {
+        lock_dir_noblock_shared(
+            &self.full_path(),
+            "snapshot",
+            "backup is running or snapshot is in use, could not acquire shared lock",
+        )
+    }
+
     /// Destroy the whole snapshot, bails if it's protected
     ///
     /// Setting `force` to true skips locking and thus ignores if the backup is currently in use.
     pub fn destroy(&self, force: bool) -> Result<(), Error> {
-        let full_path = self.full_path();
-
         let (_guard, _manifest_guard);
         if !force {
-            _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+            _guard = self
+                .lock()
+                .with_context(|| format!("while destroying snapshot '{self:?}'"))?;
             _manifest_guard = self.lock_manifest()?;
         }
 
@@ -458,6 +488,7 @@ impl BackupDir {
             bail!("cannot remove protected snapshot"); // use special error type?
         }
 
+        let full_path = self.full_path();
         log::info!("removing backup snapshot {:?}", full_path);
         std::fs::remove_dir_all(&full_path).map_err(|err| {
             format_err!("removing backup snapshot {:?} failed - {}", full_path, err,)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index a6a91ca7..1cbd0038 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -5,7 +5,7 @@ use std::os::unix::io::AsRawFd;
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, LazyLock, Mutex};
 
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, format_err, Context, Error};
 use nix::unistd::{unlinkat, UnlinkatFlags};
 use tracing::{info, warn};
 
@@ -13,8 +13,8 @@ use proxmox_human_byte::HumanByte;
 use proxmox_schema::ApiType;
 
 use proxmox_sys::error::SysError;
+use proxmox_sys::fs::DirLockGuard;
 use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
-use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard};
 use proxmox_sys::linux::procfs::MountInfo;
 use proxmox_sys::process_locker::ProcessLockSharedGuard;
 use proxmox_worker_task::WorkerTaskContext;
@@ -774,41 +774,35 @@ impl DataStore {
     ///
     /// This also acquires an exclusive lock on the directory and returns the lock guard.
     pub fn create_locked_backup_group(
-        &self,
+        self: &Arc<Self>,
         ns: &BackupNamespace,
         backup_group: &pbs_api_types::BackupGroup,
         auth_id: &Authid,
     ) -> Result<(Authid, DirLockGuard), Error> {
-        // create intermediate path first:
-        let mut full_path = self.base_path();
-        for ns in ns.components() {
-            full_path.push("ns");
-            full_path.push(ns);
-        }
-        full_path.push(backup_group.ty.as_str());
-        std::fs::create_dir_all(&full_path)?;
+        let backup_group = self.backup_group(ns.clone(), backup_group.clone());
 
-        full_path.push(&backup_group.id);
+        // create intermediate path first
+        let full_path = backup_group.full_group_path();
 
-        // create the last component now
+        std::fs::create_dir_all(full_path.parent().ok_or_else(|| {
+            format_err!("could not construct parent path for group {backup_group:?}")
+        })?)?;
+
+        // now create the group, this allows us to check whether it existed before
         match std::fs::create_dir(&full_path) {
             Ok(_) => {
-                let guard = lock_dir_noblock(
-                    &full_path,
-                    "backup group",
-                    "another backup is already running",
-                )?;
-                self.set_owner(ns, backup_group, auth_id, false)?;
-                let owner = self.get_owner(ns, backup_group)?; // just to be sure
+                let guard = backup_group.lock().with_context(|| {
+                    format!("while creating new locked backup group '{backup_group:?}'")
+                })?;
+                self.set_owner(ns, backup_group.group(), auth_id, false)?;
+                let owner = self.get_owner(ns, backup_group.group())?; // just to be sure
                 Ok((owner, guard))
             }
             Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => {
-                let guard = lock_dir_noblock(
-                    &full_path,
-                    "backup group",
-                    "another backup is already running",
-                )?;
-                let owner = self.get_owner(ns, backup_group)?; // just to be sure
+                let guard = backup_group.lock().with_context(|| {
+                    format!("while creating locked backup group '{backup_group:?}'")
+                })?;
+                let owner = self.get_owner(ns, backup_group.group())?; // just to be sure
                 Ok((owner, guard))
             }
             Err(err) => bail!("unable to create backup group {:?} - {}", full_path, err),
@@ -819,29 +813,25 @@ impl DataStore {
     ///
     /// The BackupGroup directory needs to exist.
     pub fn create_locked_backup_dir(
-        &self,
+        self: &Arc<Self>,
         ns: &BackupNamespace,
         backup_dir: &pbs_api_types::BackupDir,
     ) -> Result<(PathBuf, bool, DirLockGuard), Error> {
-        let full_path = self.snapshot_path(ns, backup_dir);
-        let relative_path = full_path.strip_prefix(self.base_path()).map_err(|err| {
-            format_err!(
-                "failed to produce correct path for backup {backup_dir} in namespace {ns}: {err}"
-            )
-        })?;
+        let backup_dir = self.backup_dir(ns.clone(), backup_dir.clone())?;
+        let relative_path = backup_dir.relative_path();
 
-        let lock = || {
-            lock_dir_noblock(
-                &full_path,
-                "snapshot",
-                "internal error - tried creating snapshot that's already in use",
-            )
-        };
-
-        match std::fs::create_dir(&full_path) {
-            Ok(_) => Ok((relative_path.to_owned(), true, lock()?)),
+        match std::fs::create_dir(backup_dir.full_path()) {
+            Ok(_) => {
+                let guard = backup_dir.lock().with_context(|| {
+                    format!("while creating new locked snapshot '{backup_dir:?}'")
+                })?;
+                Ok((relative_path, true, guard))
+            }
             Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => {
-                Ok((relative_path.to_owned(), false, lock()?))
+                let guard = backup_dir
+                    .lock()
+                    .with_context(|| format!("while creating locked snapshot '{backup_dir:?}'"))?;
+                Ok((relative_path, false, guard))
             }
             Err(e) => Err(e.into()),
         }
@@ -1305,7 +1295,9 @@ impl DataStore {
             bail!("snapshot {} does not exist!", backup_dir.dir());
         }
 
-        let _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+        let _guard = backup_dir.lock().with_context(|| {
+            format!("while updating the protection status of snapshot '{backup_dir:?}'")
+        })?;
 
         let protected_path = backup_dir.protected_file();
         if protection {
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 99ae1180..edff19ef 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -4,11 +4,9 @@ use std::path::Path;
 use std::rc::Rc;
 use std::sync::Arc;
 
-use anyhow::{bail, Error};
+use anyhow::{bail, Context, Error};
 use nix::dir::Dir;
 
-use proxmox_sys::fs::lock_dir_noblock_shared;
-
 use pbs_api_types::{
     print_store_and_ns, ArchiveType, BackupNamespace, Operation, CLIENT_LOG_BLOB_NAME,
     MANIFEST_BLOB_NAME,
@@ -48,8 +46,9 @@ impl SnapshotReader {
             bail!("snapshot {} does not exist!", snapshot.dir());
         }
 
-        let locked_dir =
-            lock_dir_noblock_shared(&snapshot_path, "snapshot", "locked by another operation")?;
+        let locked_dir = snapshot
+            .lock_shared()
+            .with_context(|| format!("while trying to read snapshot '{snapshot:?}'"))?;
 
         let datastore_name = datastore.name().to_string();
         let manifest = match snapshot.load_manifest() {
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index 99d885e2..7943b576 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -1,4 +1,4 @@
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, format_err, Context, Error};
 use nix::dir::Dir;
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
@@ -8,7 +8,7 @@ use ::serde::Serialize;
 use serde_json::{json, Value};
 
 use proxmox_router::{RpcEnvironment, RpcEnvironmentType};
-use proxmox_sys::fs::{lock_dir_noblock_shared, replace_file, CreateOptions};
+use proxmox_sys::fs::{replace_file, CreateOptions};
 
 use pbs_api_types::Authid;
 use pbs_datastore::backup_info::{BackupDir, BackupInfo};
@@ -645,12 +645,12 @@ impl BackupEnvironment {
 
         // Downgrade to shared lock, the backup itself is finished
         drop(excl_snap_lock);
-        let snap_lock = lock_dir_noblock_shared(
-            &self.backup_dir.full_path(),
-            "snapshot",
-            "snapshot is already locked by another operation",
-        )?;
-
+        let snap_lock = self.backup_dir.lock_shared().with_context(|| {
+            format!(
+                "while trying to verify snapshot '{:?}' after completion",
+                self.backup_dir
+            )
+        })?;
         let worker_id = format!(
             "{}:{}/{}/{:08X}",
             self.datastore.name(),
diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
index efc97a1f..344c80d4 100644
--- a/src/api2/backup/mod.rs
+++ b/src/api2/backup/mod.rs
@@ -1,6 +1,6 @@
 //! Backup protocol (HTTP2 upgrade)
 
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, format_err, Context, Error};
 use futures::*;
 use hex::FromHex;
 use hyper::header::{HeaderValue, CONNECTION, UPGRADE};
@@ -17,7 +17,6 @@ use proxmox_router::{
 };
 use proxmox_schema::*;
 use proxmox_sortable_macro::sortable;
-use proxmox_sys::fs::lock_dir_noblock_shared;
 
 use pbs_api_types::{
     ArchiveType, Authid, BackupNamespace, BackupType, Operation, VerifyState,
@@ -186,12 +185,10 @@ fn upgrade_to_backup_protocol(
             }
 
             // lock last snapshot to prevent forgetting/pruning it during backup
-            let full_path = last.backup_dir.full_path();
-            Some(lock_dir_noblock_shared(
-                &full_path,
-                "snapshot",
-                "base snapshot is already locked by another operation",
-            )?)
+            let guard = last.backup_dir
+                .lock_shared()
+                .with_context(|| format!("while locking last snapshot during backup '{last:?}'"))?;
+            Some(guard)
         } else {
             None
         };
diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs
index 1713f182..cc791299 100644
--- a/src/api2/reader/mod.rs
+++ b/src/api2/reader/mod.rs
@@ -1,6 +1,6 @@
 //! Backup reader/restore protocol (HTTP2 upgrade)
 
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, format_err, Context, Error};
 use futures::*;
 use hex::FromHex;
 use hyper::header::{self, HeaderValue, CONNECTION, UPGRADE};
@@ -16,7 +16,6 @@ use proxmox_router::{
 };
 use proxmox_schema::{BooleanSchema, ObjectSchema};
 use proxmox_sortable_macro::sortable;
-use proxmox_sys::fs::lock_dir_noblock_shared;
 
 use pbs_api_types::{
     ArchiveType, Authid, Operation, BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA,
@@ -129,11 +128,9 @@ fn upgrade_to_backup_reader_protocol(
             bail!("snapshot {} does not exist.", backup_dir.dir());
         }
 
-        let _guard = lock_dir_noblock_shared(
-            &backup_dir.full_path(),
-            "snapshot",
-            "locked by another operation",
-        )?;
+        let _guard = backup_dir
+            .lock_shared()
+            .with_context(|| format!("while reading snapshot '{backup_dir:?}'"))?;
 
         let path = datastore.base_path();
 
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index f652c262..10a64fda 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -4,10 +4,9 @@ use std::sync::{Arc, Mutex};
 use std::time::Instant;
 
 use anyhow::{bail, Error};
-use nix::dir::Dir;
+use proxmox_sys::fs::DirLockGuard;
 use tracing::{error, info, warn};
 
-use proxmox_sys::fs::lock_dir_noblock_shared;
 use proxmox_worker_task::WorkerTaskContext;
 
 use pbs_api_types::{
@@ -307,11 +306,8 @@ pub fn verify_backup_dir(
         return Ok(true);
     }
 
-    let snap_lock = lock_dir_noblock_shared(
-        &backup_dir.full_path(),
-        "snapshot",
-        "locked by another operation",
-    );
+    let snap_lock = backup_dir.lock_shared();
+
     match snap_lock {
         Ok(snap_lock) => {
             verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock)
@@ -334,7 +330,7 @@ pub fn verify_backup_dir_with_lock(
     backup_dir: &BackupDir,
     upid: UPID,
     filter: Option<&dyn Fn(&BackupManifest) -> bool>,
-    _snap_lock: Dir,
+    _snap_lock: DirLockGuard,
 ) -> Result<bool, Error> {
     let datastore_name = verify_worker.datastore.name();
     let backup_dir_name = backup_dir.dir();
diff --git a/src/server/sync.rs b/src/server/sync.rs
index 4dd46c5a..b6852aff 100644
--- a/src/server/sync.rs
+++ b/src/server/sync.rs
@@ -10,6 +10,7 @@ use std::time::Duration;
 use anyhow::{bail, format_err, Context, Error};
 use futures::{future::FutureExt, select};
 use hyper::http::StatusCode;
+use proxmox_sys::fs::DirLockGuard;
 use serde_json::json;
 use tracing::{info, warn};
 
@@ -105,7 +106,7 @@ pub(crate) struct RemoteSourceReader {
 }
 
 pub(crate) struct LocalSourceReader {
-    pub(crate) _dir_lock: Arc<Mutex<proxmox_sys::fs::DirLockGuard>>,
+    pub(crate) _dir_lock: Arc<Mutex<DirLockGuard>>,
     pub(crate) path: PathBuf,
     pub(crate) datastore: Arc<DataStore>,
 }
@@ -478,13 +479,11 @@ impl SyncSource for LocalSource {
         dir: &BackupDir,
     ) -> Result<Arc<dyn SyncSourceReader>, Error> {
         let dir = self.store.backup_dir(ns.clone(), dir.clone())?;
-        let dir_lock = proxmox_sys::fs::lock_dir_noblock_shared(
-            &dir.full_path(),
-            "snapshot",
-            "locked by another operation",
-        )?;
+        let guard = dir
+            .lock()
+            .with_context(|| format!("while reading snapshot '{dir:?}' for a sync job"))?;
         Ok(Arc::new(LocalSourceReader {
-            _dir_lock: Arc::new(Mutex::new(dir_lock)),
+            _dir_lock: Arc::new(Mutex::new(guard)),
             path: dir.full_path(),
             datastore: dir.datastore().clone(),
         }))
-- 
2.39.5



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


  reply	other threads:[~2025-03-26 11:44 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-26 11:44 [pbs-devel] [PATCH proxmox-backup v9 0/4] refactor datastore locking to use tmpfs Shannon Sterz
2025-03-26 11:44 ` Shannon Sterz [this message]
2025-03-26 11:44 ` [pbs-devel] [PATCH proxmox-backup v9 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run' Shannon Sterz
2025-03-26 15:14   ` Thomas Lamprecht
2025-03-26 15:22   ` Thomas Lamprecht
2025-03-26 11:44 ` [pbs-devel] [PATCH proxmox-backup v9 3/4] fix #3935: datastore: move manifest locking to new locking method Shannon Sterz
2025-03-26 11:44 ` [pbs-devel] [PATCH proxmox-backup v9 4/4] fix: api: avoid race condition in set_backup_owner Shannon Sterz
2025-03-26 15:36 ` [pbs-devel] applied-series: [PATCH proxmox-backup v9 0/4] refactor datastore locking to use tmpfs Thomas Lamprecht
2025-03-27 10:02   ` Shannon Sterz
2025-03-28  7:51     ` Wolfgang Bumiller
2025-03-28  8:17       ` Shannon Sterz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250326114414.132478-2-s.sterz@proxmox.com \
    --to=s.sterz@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal