public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Shannon Sterz <s.sterz@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run'
Date: Mon, 10 Mar 2025 15:36:05 +0100	[thread overview]
Message-ID: <20250310143607.298339-3-s.sterz@proxmox.com> (raw)
In-Reply-To: <20250310143607.298339-1-s.sterz@proxmox.com>

to avoid issues when removing a group or snapshot directory where two
threads hold a lock to the same directory, move locking to the tmpfs
backed '/run' directory. also adds double stat'ing to make it possible
to remove locks without certain race condition issues.

Signed-off-by: Shannon Sterz <s.sterz@proxmox.com>
---
 pbs-config/src/lib.rs                |  13 ++-
 pbs-datastore/Cargo.toml             |   1 +
 pbs-datastore/src/backup_info.rs     | 156 ++++++++++++++++++++++-----
 pbs-datastore/src/datastore.rs       |   6 +-
 pbs-datastore/src/snapshot_reader.rs |  17 ++-
 src/api2/backup/environment.rs       |   5 +-
 src/backup/verify.rs                 |   4 +-
 src/server/sync.rs                   |   4 +-
 8 files changed, 166 insertions(+), 40 deletions(-)

diff --git a/pbs-config/src/lib.rs b/pbs-config/src/lib.rs
index 20a8238d..1b4b9989 100644
--- a/pbs-config/src/lib.rs
+++ b/pbs-config/src/lib.rs
@@ -22,6 +22,7 @@ pub use config_version_cache::ConfigVersionCache;
 
 use anyhow::{format_err, Error};
 use nix::unistd::{Gid, Group, Uid, User};
+use std::os::unix::prelude::AsRawFd;
 
 pub use pbs_buildcfg::{BACKUP_GROUP_NAME, BACKUP_USER_NAME};
 
@@ -46,13 +47,19 @@ pub fn backup_group() -> Result<nix::unistd::Group, Error> {
 }
 
 pub struct BackupLockGuard {
-    _file: Option<std::fs::File>,
+    file: Option<std::fs::File>,
+}
+
+impl AsRawFd for BackupLockGuard {
+    fn as_raw_fd(&self) -> i32 {
+        self.file.as_ref().map_or(-1, |f| f.as_raw_fd())
+    }
 }
 
 #[doc(hidden)]
 /// Note: do not use for production code, this is only intended for tests
 pub unsafe fn create_mocked_lock() -> BackupLockGuard {
-    BackupLockGuard { _file: None }
+    BackupLockGuard { file: None }
 }
 
 /// Open or create a lock file owned by user "backup" and lock it.
@@ -76,7 +83,7 @@ pub fn open_backup_lockfile<P: AsRef<std::path::Path>>(
     let timeout = timeout.unwrap_or(std::time::Duration::new(10, 0));
 
     let file = proxmox_sys::fs::open_file_locked(&path, timeout, exclusive, options)?;
-    Ok(BackupLockGuard { _file: Some(file) })
+    Ok(BackupLockGuard { file: Some(file) })
 }
 
 /// Atomically write data to file owned by "root:backup" with permission "0640"
diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml
index 4ebc5fdc..7623adc2 100644
--- a/pbs-datastore/Cargo.toml
+++ b/pbs-datastore/Cargo.toml
@@ -35,6 +35,7 @@ proxmox-lang.workspace=true
 proxmox-schema = { workspace = true, features = [ "api-macro" ] }
 proxmox-serde = { workspace = true, features = [ "serde_json" ] }
 proxmox-sys.workspace = true
+proxmox-systemd.workspace = true
 proxmox-time.workspace = true
 proxmox-uuid.workspace = true
 proxmox-worker-task.workspace = true
diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs
index 42ce74e9..5496e053 100644
--- a/pbs-datastore/src/backup_info.rs
+++ b/pbs-datastore/src/backup_info.rs
@@ -1,13 +1,15 @@
 use std::fmt;
-use std::os::unix::io::RawFd;
+use std::os::unix::io::{AsRawFd, RawFd};
+use std::os::unix::prelude::OsStrExt;
+use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
+use std::time::Duration;
 
 use anyhow::{bail, format_err, Error};
 
-use proxmox_sys::fs::{
-    lock_dir_noblock, lock_dir_noblock_shared, replace_file, CreateOptions, DirLockGuard,
-};
+use proxmox_sys::fs::{replace_file, CreateOptions};
+use proxmox_systemd::escape_unit;
 
 use pbs_api_types::{
     Authid, BackupGroupDeleteStats, BackupNamespace, BackupType, GroupFilter, VerifyState,
@@ -18,6 +20,8 @@ use pbs_config::{open_backup_lockfile, BackupLockGuard};
 use crate::manifest::{BackupManifest, MANIFEST_LOCK_NAME};
 use crate::{DataBlob, DataStore};
 
+pub const DATASTORE_LOCKS_DIR: &str = "/run/proxmox-backup/locks";
+
 /// BackupGroup is a directory containing a list of BackupDir
 #[derive(Clone)]
 pub struct BackupGroup {
@@ -223,6 +227,7 @@ impl BackupGroup {
             delete_stats.increment_removed_groups();
         }
 
+        let _ = std::fs::remove_file(self.lock_path());
         Ok(delete_stats)
     }
 
@@ -239,13 +244,25 @@ impl BackupGroup {
             .set_owner(&self.ns, self.as_ref(), auth_id, force)
     }
 
-    /// Lock a group exclusively
-    pub fn lock(&self) -> Result<DirLockGuard, Error> {
-        lock_dir_noblock(
-            &self.full_group_path(),
-            "backup group",
-            "possible running backup",
-        )
+    /// Returns a file name for locking a group.
+    ///
+    /// The lock file will be located in:
+    /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_path_helper(rpath)}`
+    /// where `rpath` is the relative path of the group.
+    fn lock_path(&self) -> PathBuf {
+        let path = Path::new(DATASTORE_LOCKS_DIR).join(self.store.name());
+
+        let rpath = Path::new(self.group.ty.as_str()).join(&self.group.id);
+
+        path.join(lock_file_path_helper(&self.ns, rpath))
+    }
+
+    /// Locks a group exclusively.
+    pub fn lock(&self) -> Result<BackupLockGuard, Error> {
+        lock_helper(self.store.name(), &self.lock_path(), |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), true)
+                .map_err(|err| format_err!("unable to acquire backup group lock {p:?} - {err}"))
+        })
     }
 }
 
@@ -452,22 +469,35 @@ impl BackupDir {
             .map_err(|err| format_err!("unable to acquire manifest lock {:?} - {}", &path, err))
     }
 
-    /// Lock this snapshot exclusively
-    pub fn lock(&self) -> Result<DirLockGuard, Error> {
-        lock_dir_noblock(
-            &self.full_path(),
-            "snapshot",
-            "backup is running or snapshot is in use",
-        )
+    /// Returns a file name for locking a snapshot.
+    ///
+    /// The lock file will be located in:
+    /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_path_helper(rpath)}`
+    /// where `rpath` is the relative path of the snapshot.
+    fn lock_path(&self) -> PathBuf {
+        let path = Path::new(DATASTORE_LOCKS_DIR).join(self.store.name());
+
+        let rpath = Path::new(self.dir.group.ty.as_str())
+            .join(&self.dir.group.id)
+            .join(&self.backup_time_string);
+
+        path.join(lock_file_path_helper(&self.ns, rpath))
     }
 
-    /// Acquire a shared lock on this snapshot
-    pub fn lock_shared(&self) -> Result<DirLockGuard, Error> {
-        lock_dir_noblock_shared(
-            &self.full_path(),
-            "snapshot",
-            "backup is running or snapshot is in use, could not acquire shared lock",
-        )
+    /// Locks a snapshot exclusively.
+    pub fn lock(&self) -> Result<BackupLockGuard, Error> {
+        lock_helper(self.store.name(), &self.lock_path(), |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), true)
+                .map_err(|err| format_err!("unable to acquire snapshot lock {p:?} - {err}"))
+        })
+    }
+
+    /// Acquires a shared lock on a snapshot.
+    pub fn lock_shared(&self) -> Result<BackupLockGuard, Error> {
+        lock_helper(self.store.name(), &self.lock_path(), |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), false)
+                .map_err(|err| format_err!("unable to acquire shared snapshot lock {p:?} - {err}"))
+        })
     }
 
     /// Destroy the whole snapshot, bails if it's protected
@@ -490,11 +520,13 @@ impl BackupDir {
             format_err!("removing backup snapshot {:?} failed - {}", full_path, err,)
         })?;
 
-        // the manifest doesn't exist anymore, no need to keep the lock (already done by guard?)
+        // remove no longer needed lock files
         if let Ok(path) = self.manifest_lock_path() {
             let _ = std::fs::remove_file(path); // ignore errors
         }
 
+        let _ = std::fs::remove_file(self.lock_path()); // ignore errors
+
         Ok(())
     }
 
@@ -688,3 +720,75 @@ fn list_backup_files<P: ?Sized + nix::NixPath>(
 
     Ok(files)
 }
+
+/// Creates a path to a lock file depending on the relative path of an object (snapshot, group,
+/// manifest) in a datastore. First all namespaces will be concatenated with a colon (ns-folder).
+/// Then the actual file name will depend on the length of the relative path without namespaces. If
+/// it is shorter than 255 characters in its unit encoded form, than the unit encoded form will be
+/// used directly. If not, the file name will consist of the first 80 character, the last 80
+/// characters and the hash of the unit encoded relative path without namespaces. It will also be
+/// placed into a "hashed" subfolder in the namespace folder.
+///
+/// Examples:
+///
+/// - vm-100
+/// - vm-100-2022\x2d05\x2d02T08\x3a11\x3a33Z
+/// - ns1:ns2:ns3:ns4:ns5:ns6:ns7/vm-100-2022\x2d05\x2d02T08\x3a11\x3a33Z
+///
+/// A "hashed" lock file would look like this:
+/// - ns1:ns2:ns3/hashed/$first_eighty...$last_eighty-$hash
+fn lock_file_path_helper(ns: &BackupNamespace, path: PathBuf) -> PathBuf {
+    let to_return = PathBuf::from(
+        ns.components()
+            .map(String::from)
+            .reduce(|acc, n| format!("{acc}:{n}"))
+            .unwrap_or_default(),
+    );
+
+    let path_bytes = path.as_os_str().as_bytes();
+
+    let enc = escape_unit(path_bytes, true);
+
+    if enc.len() < 255 {
+        return to_return.join(enc);
+    }
+
+    let to_return = to_return.join("hashed");
+
+    let first_eigthy = &enc[..80];
+    let last_eighty = &enc[enc.len() - 80..];
+    let hash = hex::encode(openssl::sha::sha256(path_bytes));
+
+    to_return.join(format!("{first_eigthy}...{last_eighty}-{hash}"))
+}
+
+/// Helps implement the double stat'ing procedure. It avoids certain race conditions upon lock
+/// deletion.
+///
+/// It also creates the base directory for lock files.
+fn lock_helper<F>(
+    store_name: &str,
+    path: &std::path::Path,
+    lock_fn: F,
+) -> Result<BackupLockGuard, Error>
+where
+    F: Fn(&std::path::Path) -> Result<BackupLockGuard, Error>,
+{
+    let mut lock_dir = Path::new(DATASTORE_LOCKS_DIR).join(store_name);
+
+    if let Some(parent) = path.parent() {
+        lock_dir = lock_dir.join(parent);
+    };
+
+    std::fs::create_dir_all(&lock_dir)?;
+
+    let lock = lock_fn(path)?;
+
+    let stat = nix::sys::stat::fstat(lock.as_raw_fd())?.st_ino;
+
+    if nix::sys::stat::stat(path).map_or(true, |st| stat != st.st_ino) {
+        bail!("could not acquire lock, another thread modified the lock file");
+    }
+
+    Ok(lock)
+}
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index b3804b5a..15377a8a 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -13,7 +13,6 @@ use proxmox_human_byte::HumanByte;
 use proxmox_schema::ApiType;
 
 use proxmox_sys::error::SysError;
-use proxmox_sys::fs::DirLockGuard;
 use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
 use proxmox_sys::linux::procfs::MountInfo;
 use proxmox_sys::process_locker::ProcessLockSharedGuard;
@@ -24,6 +23,7 @@ use pbs_api_types::{
     DataStoreConfig, DatastoreFSyncLevel, DatastoreTuning, GarbageCollectionStatus,
     MaintenanceMode, MaintenanceType, Operation, UPID,
 };
+use pbs_config::BackupLockGuard;
 
 use crate::backup_info::{BackupDir, BackupGroup};
 use crate::chunk_store::ChunkStore;
@@ -778,7 +778,7 @@ impl DataStore {
         ns: &BackupNamespace,
         backup_group: &pbs_api_types::BackupGroup,
         auth_id: &Authid,
-    ) -> Result<(Authid, DirLockGuard), Error> {
+    ) -> Result<(Authid, BackupLockGuard), Error> {
         let backup_group = self.backup_group(ns.clone(), backup_group.clone());
 
         // create intermediate path first
@@ -812,7 +812,7 @@ impl DataStore {
         self: &Arc<Self>,
         ns: &BackupNamespace,
         backup_dir: &pbs_api_types::BackupDir,
-    ) -> Result<(PathBuf, bool, DirLockGuard), Error> {
+    ) -> Result<(PathBuf, bool, BackupLockGuard), Error> {
         let backup_dir = self.backup_dir(ns.clone(), backup_dir.clone())?;
         let relative_path = backup_dir.relative_path();
 
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 604d35c1..63836e39 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -4,8 +4,11 @@ use std::path::Path;
 use std::rc::Rc;
 use std::sync::Arc;
 
-use anyhow::{bail, Error};
+use anyhow::{bail, format_err, Error};
 use nix::dir::Dir;
+use nix::fcntl::OFlag;
+use nix::sys::stat::Mode;
+use pbs_config::BackupLockGuard;
 
 use pbs_api_types::{
     print_store_and_ns, ArchiveType, BackupNamespace, Operation, CLIENT_LOG_BLOB_NAME,
@@ -26,6 +29,10 @@ pub struct SnapshotReader {
     datastore_name: String,
     file_list: Vec<String>,
     locked_dir: Dir,
+
+    // while this is never read, the lock needs to be kept until the
+    // reader is dropped to ensure valid locking semantics
+    _lock: BackupLockGuard,
 }
 
 impl SnapshotReader {
@@ -46,7 +53,12 @@ impl SnapshotReader {
             bail!("snapshot {} does not exist!", snapshot.dir());
         }
 
-        let locked_dir = snapshot.lock_shared()?;
+        let lock = snapshot.lock_shared()?;
+
+        let locked_dir =
+            Dir::open(&snapshot_path, OFlag::O_RDONLY, Mode::empty()).map_err(|err| {
+                format_err!("unable to open snapshot directory {snapshot_path:?} - {err}")
+            })?;
 
         let datastore_name = datastore.name().to_string();
         let manifest = match snapshot.load_manifest() {
@@ -77,6 +89,7 @@ impl SnapshotReader {
             datastore_name,
             file_list,
             locked_dir,
+            _lock: lock,
         })
     }
 
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index 133932b3..e4cd20b6 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -1,5 +1,6 @@
 use anyhow::{bail, format_err, Error};
-use nix::dir::Dir;
+
+use pbs_config::BackupLockGuard;
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use tracing::info;
@@ -635,7 +636,7 @@ impl BackupEnvironment {
     /// If verify-new is set on the datastore, this will run a new verify task
     /// for the backup. If not, this will return and also drop the passed lock
     /// immediately.
-    pub fn verify_after_complete(&self, excl_snap_lock: Dir) -> Result<(), Error> {
+    pub fn verify_after_complete(&self, excl_snap_lock: BackupLockGuard) -> Result<(), Error> {
         self.ensure_finished()?;
 
         if !self.datastore.verify_new() {
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 10a64fda..3d2cba8a 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -1,10 +1,10 @@
+use pbs_config::BackupLockGuard;
 use std::collections::HashSet;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::Instant;
 
 use anyhow::{bail, Error};
-use proxmox_sys::fs::DirLockGuard;
 use tracing::{error, info, warn};
 
 use proxmox_worker_task::WorkerTaskContext;
@@ -330,7 +330,7 @@ pub fn verify_backup_dir_with_lock(
     backup_dir: &BackupDir,
     upid: UPID,
     filter: Option<&dyn Fn(&BackupManifest) -> bool>,
-    _snap_lock: DirLockGuard,
+    _snap_lock: BackupLockGuard,
 ) -> Result<bool, Error> {
     let datastore_name = verify_worker.datastore.name();
     let backup_dir_name = backup_dir.dir();
diff --git a/src/server/sync.rs b/src/server/sync.rs
index abe7316a..da003cdc 100644
--- a/src/server/sync.rs
+++ b/src/server/sync.rs
@@ -10,7 +10,7 @@ use std::time::Duration;
 use anyhow::{bail, format_err, Context, Error};
 use futures::{future::FutureExt, select};
 use hyper::http::StatusCode;
-use proxmox_sys::fs::DirLockGuard;
+use pbs_config::BackupLockGuard;
 use serde_json::json;
 use tracing::{info, warn};
 
@@ -106,7 +106,7 @@ pub(crate) struct RemoteSourceReader {
 }
 
 pub(crate) struct LocalSourceReader {
-    pub(crate) _dir_lock: Arc<Mutex<DirLockGuard>>,
+    pub(crate) _dir_lock: Arc<Mutex<BackupLockGuard>>,
     pub(crate) path: PathBuf,
     pub(crate) datastore: Arc<DataStore>,
 }
-- 
2.39.5



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


  parent reply	other threads:[~2025-03-10 14:36 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-10 14:36 [pbs-devel] [PATCH proxmox-backup 0/4] refactor datastore locking to use tmpfs Shannon Sterz
2025-03-10 14:36 ` [pbs-devel] [PATCH proxmox-backup 1/4] datastore/api/backup: prepare for fix of #3935 by adding lock helpers Shannon Sterz
2025-03-10 14:36 ` Shannon Sterz [this message]
2025-03-10 14:36 ` [pbs-devel] [PATCH proxmox-backup 3/4] fix #3935: datastore: move manifest locking to new locking method Shannon Sterz
2025-03-10 14:36 ` [pbs-devel] [PATCH proxmox-backup 4/4] fix: api: avoid race condition in set_backup_owner Shannon Sterz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250310143607.298339-3-s.sterz@proxmox.com \
    --to=s.sterz@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal