From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 7B1591FF13B for ; Wed, 11 Mar 2026 16:14:04 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id AF131302C0; Wed, 11 Mar 2026 16:13:59 +0100 (CET) From: Hannes Laimer To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup v4 1/7] datastore: add namespace-level locking Date: Wed, 11 Mar 2026 16:13:09 +0100 Message-ID: <20260311151315.133637-2-h.laimer@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260311151315.133637-1-h.laimer@proxmox.com> References: <20260311151315.133637-1-h.laimer@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1773241968112 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.068 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: BRR2S37IE4YJ656M7H3WIKG7JT3ZNG4F X-Message-ID-Hash: BRR2S37IE4YJ656M7H3WIKG7JT3ZNG4F X-MailFrom: h.laimer@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Add exclusive/shared namespace locking keyed at /run/proxmox-backup/locks/{store}/{ns}/.ns-lock. Operations that read from or write into a namespace hold a shared lock for their duration. Structural operations (move, delete) hold an exclusive lock. The shared lock is hierarchical: locking a/b/c also locks a/b and a, so an exclusive lock on any ancestor blocks all active operations below it. Walking up the ancestor chain costs O(depth), which is bounded by the maximum namespace depth of 8, whereas locking all descendants would be arbitrarily expensive. Backup jobs and pull/push sync acquire the shared lock via create_locked_backup_group and pull_ns/push_namespace respectively. Verify and prune acquire it per snapshot/group and skip gracefully if the lock cannot be taken, since a concurrent move is a transient condition. Signed-off-by: Hannes Laimer --- pbs-datastore/src/backup_info.rs | 92 ++++++++++++++++++++++++++++++++ pbs-datastore/src/datastore.rs | 45 +++++++++++++--- src/api2/admin/namespace.rs | 6 ++- src/api2/backup/environment.rs | 4 ++ src/api2/backup/mod.rs | 14 +++-- src/api2/tape/restore.rs | 9 ++-- src/backup/verify.rs | 19 ++++++- src/server/prune_job.rs | 11 ++++ src/server/pull.rs | 8 ++- src/server/push.rs | 6 +++ 10 files changed, 193 insertions(+), 21 deletions(-) diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs index c33eb307..476daa61 100644 --- a/pbs-datastore/src/backup_info.rs +++ b/pbs-datastore/src/backup_info.rs @@ -937,6 +937,98 @@ fn lock_file_path_helper(ns: &BackupNamespace, path: PathBuf) -> PathBuf { to_return.join(format!("{first_eigthy}...{last_eighty}-{hash}")) } +/// Returns the lock file path for a backup namespace. +/// +/// The lock file will be located at: +/// `${DATASTORE_LOCKS_DIR}/${store_name}/${ns_colon_encoded}/.ns-lock` +pub(crate) fn ns_lock_path(store_name: &str, ns: &BackupNamespace) -> PathBuf { + let ns_part = ns + .components() + .map(String::from) + .reduce(|acc, n| format!("{acc}:{n}")) + .unwrap_or_default(); + Path::new(DATASTORE_LOCKS_DIR) + .join(store_name) + .join(ns_part) + .join(".ns-lock") +} + +/// Holds namespace locks acquired for a structural operation or a read/write operation. +/// +/// For exclusive locks the first guard is exclusive (on the namespace itself), and any remaining +/// guards are shared (on ancestor namespaces). For shared locks all guards are shared. +pub struct NamespaceLockGuard { + _guards: Vec, +} + +fn lock_namespace_exclusive_single( + store_name: &str, + ns: &BackupNamespace, +) -> Result { + let path = ns_lock_path(store_name, ns); + lock_helper(store_name, &path, |p| { + open_backup_lockfile(p, Some(Duration::from_secs(0)), true) + .with_context(|| format!("unable to acquire exclusive namespace lock for '{ns}'")) + }) +} + +fn lock_namespace_shared_single( + store_name: &str, + ns: &BackupNamespace, +) -> Result { + let path = ns_lock_path(store_name, ns); + lock_helper(store_name, &path, |p| { + open_backup_lockfile(p, Some(Duration::from_secs(0)), false) + .with_context(|| format!("unable to acquire shared namespace lock for '{ns}'")) + }) +} + +/// Acquires an exclusive lock on `ns` and shared locks on all its non-root ancestors. +/// +/// Used by operations that structurally modify a namespace (e.g. move). The shared ancestor locks +/// ensure that a concurrent structural operation on any ancestor namespace blocks until this one +/// completes, and vice versa - mirroring the hierarchical behavior of `lock_namespace_shared`. +pub(crate) fn lock_namespace( + store_name: &str, + ns: &BackupNamespace, +) -> Result { + // Acquire the exclusive lock on ns first, then shared locks on ancestors. + // Order matters: taking exclusive before shared avoids a scenario where we hold shared on + // an ancestor and then block waiting for exclusive on ns (which another holder of that + // ancestor's shared lock might be waiting to promote - not currently possible but fragile). + let exclusive = lock_namespace_exclusive_single(store_name, ns)?; + let mut guards = vec![exclusive]; + let mut current = ns.clone(); + while !current.is_root() { + current = current.parent(); + if !current.is_root() { + guards.push(lock_namespace_shared_single(store_name, ¤t)?); + } + } + Ok(NamespaceLockGuard { _guards: guards }) +} + +/// Acquires shared locks on a namespace and all its non-root ancestors. +/// +/// Held by operations that read from or write into a namespace (backup, sync, verify, prune), +/// preventing concurrent exclusive operations (e.g. move) on the namespace itself or any ancestor +/// from proceeding while this guard is alive. +/// +/// Locking up the ancestor chain (bounded by max namespace depth) rather than down the subtree +/// keeps the cost O(depth) regardless of how wide the namespace tree is. +pub(crate) fn lock_namespace_shared( + store_name: &str, + ns: &BackupNamespace, +) -> Result { + let mut guards = Vec::new(); + let mut current = ns.clone(); + while !current.is_root() { + guards.push(lock_namespace_shared_single(store_name, ¤t)?); + current = current.parent(); + } + Ok(NamespaceLockGuard { _guards: guards }) +} + /// Helps implement the double stat'ing procedure. It avoids certain race conditions upon lock /// deletion. /// diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index ef378c69..564c44a1 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -38,7 +38,8 @@ use pbs_config::{BackupLockGuard, ConfigVersionCache}; use proxmox_section_config::SectionConfigData; use crate::backup_info::{ - BackupDir, BackupGroup, BackupInfo, OLD_LOCKING, PROTECTED_MARKER_FILENAME, + lock_namespace, lock_namespace_shared, BackupDir, BackupGroup, BackupInfo, NamespaceLockGuard, + OLD_LOCKING, PROTECTED_MARKER_FILENAME, }; use crate::chunk_store::ChunkStore; use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; @@ -1123,18 +1124,46 @@ impl DataStore { Ok(()) } - /// Create (if it does not already exists) and lock a backup group + /// Acquires an exclusive lock on a backup namespace and shared locks on all its ancestors. /// - /// And set the owner to 'userid'. If the group already exists, it returns the - /// current owner (instead of setting the owner). + /// Operations that structurally modify a namespace (move, delete) must hold this for their + /// duration to prevent concurrent readers or writers from accessing the namespace while it is + /// being relocated or destroyed. + pub fn lock_namespace( + self: &Arc, + ns: &BackupNamespace, + ) -> Result { + lock_namespace(self.name(), ns) + } + + /// Acquires shared locks on a backup namespace and all its non-root ancestors. /// - /// This also acquires an exclusive lock on the directory and returns the lock guard. + /// Operations that read from or write into a namespace (backup, sync, verify, prune) must hold + /// this for their duration to prevent a concurrent `move_namespace` or `delete_namespace` from + /// relocating or destroying the namespace under them. + pub fn lock_namespace_shared( + self: &Arc, + ns: &BackupNamespace, + ) -> Result { + lock_namespace_shared(self.name(), ns) + } + + /// Create (if it does not already exist) and lock a backup group. + /// + /// Sets the owner to `auth_id`. If the group already exists, returns the current owner. + /// + /// Returns `(owner, ns_lock, group_lock)`. Both locks must be kept alive for the duration of + /// the backup session: the shared namespace lock prevents concurrent namespace moves or + /// deletions, and the exclusive group lock prevents concurrent backups to the same group. pub fn create_locked_backup_group( self: &Arc, ns: &BackupNamespace, backup_group: &pbs_api_types::BackupGroup, auth_id: &Authid, - ) -> Result<(Authid, BackupLockGuard), Error> { + ) -> Result<(Authid, NamespaceLockGuard, BackupLockGuard), Error> { + let ns_guard = lock_namespace_shared(self.name(), ns) + .with_context(|| format!("failed to acquire shared namespace lock for '{ns}'"))?; + let backup_group = self.backup_group(ns.clone(), backup_group.clone()); // create intermediate path first @@ -1155,14 +1184,14 @@ impl DataStore { return Err(err); } let owner = self.get_owner(ns, backup_group.group())?; // just to be sure - Ok((owner, guard)) + Ok((owner, ns_guard, guard)) } Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => { let guard = backup_group.lock().with_context(|| { format!("while creating locked backup group '{backup_group:?}'") })?; let owner = self.get_owner(ns, backup_group.group())?; // just to be sure - Ok((owner, guard)) + Ok((owner, ns_guard, guard)) } Err(err) => bail!("unable to create backup group {:?} - {}", full_path, err), } diff --git a/src/api2/admin/namespace.rs b/src/api2/admin/namespace.rs index 30e24d8d..ec913001 100644 --- a/src/api2/admin/namespace.rs +++ b/src/api2/admin/namespace.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, Error}; +use anyhow::{bail, Context, Error}; use pbs_config::CachedUserInfo; use proxmox_router::{http_bail, ApiMethod, Permission, Router, RpcEnvironment}; @@ -164,6 +164,10 @@ pub fn delete_namespace( let datastore = DataStore::lookup_datastore(&store, Operation::Write)?; + let _ns_lock = datastore + .lock_namespace(&ns) + .with_context(|| format!("failed to lock namespace '{ns}' for deletion"))?; + let (removed_all, stats) = datastore.remove_namespace_recursive(&ns, delete_groups)?; if !removed_all { let err_msg = if delete_groups { diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index ab623f1f..44483add 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -1,5 +1,6 @@ use anyhow::{bail, format_err, Context, Error}; use pbs_config::BackupLockGuard; +use pbs_datastore::backup_info::NamespaceLockGuard; use proxmox_sys::process_locker::ProcessLockSharedGuard; use std::collections::HashMap; @@ -100,6 +101,7 @@ struct SharedBackupState { pub struct BackupLockGuards { previous_snapshot: Option, + _namespace: Option, group: Option, snapshot: Option, chunk_store: Option, @@ -108,12 +110,14 @@ pub struct BackupLockGuards { impl BackupLockGuards { pub(crate) fn new( previous_snapshot: Option, + namespace: NamespaceLockGuard, group: BackupLockGuard, snapshot: BackupLockGuard, chunk_store: ProcessLockSharedGuard, ) -> Self { Self { previous_snapshot, + _namespace: Some(namespace), group: Some(group), snapshot: Some(snapshot), chunk_store: Some(chunk_store), diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs index 6df0d34b..80e70dac 100644 --- a/src/api2/backup/mod.rs +++ b/src/api2/backup/mod.rs @@ -143,8 +143,9 @@ fn upgrade_to_backup_protocol( "backup" }; - // lock backup group to only allow one backup per group at a time - let (owner, group_guard) = datastore.create_locked_backup_group( + // lock backup group to only allow one backup per group at a time, + // also acquires a shared namespace lock to prevent concurrent namespace moves + let (owner, ns_guard, group_guard) = datastore.create_locked_backup_group( backup_group.backup_ns(), backup_group.as_ref(), &auth_id, @@ -215,8 +216,13 @@ fn upgrade_to_backup_protocol( // case of errors. The former is required for immediate subsequent backups (e.g. // during a push sync) to be able to lock the group and snapshots. let chunk_store_guard = datastore.try_shared_chunk_store_lock()?; - let backup_lock_guards = - BackupLockGuards::new(last_guard, group_guard, snap_guard, chunk_store_guard); + let backup_lock_guards = BackupLockGuards::new( + last_guard, + ns_guard, + group_guard, + snap_guard, + chunk_store_guard, + ); let mut env = BackupEnvironment::new( env_type, diff --git a/src/api2/tape/restore.rs b/src/api2/tape/restore.rs index 92529a76..1cb85f2d 100644 --- a/src/api2/tape/restore.rs +++ b/src/api2/tape/restore.rs @@ -848,11 +848,8 @@ fn restore_list_worker( Some(restore_owner), )?; - let (owner, _group_lock) = datastore.create_locked_backup_group( - &ns, - backup_dir.as_ref(), - restore_owner, - )?; + let (owner, _ns_guard, _group_lock) = datastore + .create_locked_backup_group(&ns, backup_dir.as_ref(), restore_owner)?; if restore_owner != &owner { bail!( "cannot restore snapshot '{snapshot}' into group '{}', owner check \ @@ -1354,7 +1351,7 @@ fn restore_archive<'a>( auth_id, Some(restore_owner), )?; - let (owner, _group_lock) = datastore.create_locked_backup_group( + let (owner, _ns_guard, _group_lock) = datastore.create_locked_backup_group( &backup_ns, backup_dir.as_ref(), restore_owner, diff --git a/src/backup/verify.rs b/src/backup/verify.rs index f52d7781..ce5d6f69 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -393,10 +393,27 @@ impl VerifyWorker { return Ok(true); } + let ns_lock = match self.datastore.lock_namespace_shared(backup_dir.backup_ns()) { + Ok(lock) => lock, + Err(err) => { + info!( + "SKIPPED: verify {}:{} - could not acquire namespace lock: {}", + self.datastore.name(), + backup_dir.dir(), + err, + ); + return Ok(true); + } + }; + let snap_lock = backup_dir.lock_shared(); match snap_lock { - Ok(snap_lock) => self.verify_backup_dir_with_lock(backup_dir, upid, filter, snap_lock), + Ok(snap_lock) => { + let result = self.verify_backup_dir_with_lock(backup_dir, upid, filter, snap_lock); + drop(ns_lock); + result + } Err(err) => { info!( "SKIPPED: verify {}:{} - could not acquire snapshot lock: {}", diff --git a/src/server/prune_job.rs b/src/server/prune_job.rs index bb86a323..8103f59c 100644 --- a/src/server/prune_job.rs +++ b/src/server/prune_job.rs @@ -54,6 +54,17 @@ pub fn prune_datastore( )? { let group = group?; let ns = group.backup_ns(); + let _ns_lock = match datastore.lock_namespace_shared(ns) { + Ok(lock) => lock, + Err(err) => { + warn!( + "skipping prune of group '{}:{}' - could not acquire namespace lock: {err}", + ns, + group.group(), + ); + continue; + } + }; let list = group.list_backups()?; let mut prune_info = compute_prune_info(list, &prune_options.keep)?; diff --git a/src/server/pull.rs b/src/server/pull.rs index 0ac6b5b8..40173324 100644 --- a/src/server/pull.rs +++ b/src/server/pull.rs @@ -1061,6 +1061,12 @@ async fn pull_ns( params: &mut PullParameters, encountered_chunks: Arc>, ) -> Result<(StoreProgress, SyncStats, bool), Error> { + let _ns_lock = params + .target + .store + .lock_namespace_shared(namespace) + .with_context(|| format!("failed to acquire shared namespace lock for '{namespace}'"))?; + let list: Vec = params.source.list_groups(namespace, ¶ms.owner).await?; let unfiltered_count = list.len(); @@ -1093,7 +1099,7 @@ async fn pull_ns( progress.done_snapshots = 0; progress.group_snapshots = 0; - let (owner, _lock_guard) = + let (owner, _ns_guard, _lock_guard) = match params .target .store diff --git a/src/server/push.rs b/src/server/push.rs index 27c5b22d..47067d66 100644 --- a/src/server/push.rs +++ b/src/server/push.rs @@ -525,6 +525,12 @@ pub(crate) async fn push_namespace( check_ns_remote_datastore_privs(params, &target_namespace, PRIV_REMOTE_DATASTORE_BACKUP) .context("Pushing to remote namespace not allowed")?; + let _ns_lock = params + .source + .store + .lock_namespace_shared(namespace) + .with_context(|| format!("failed to acquire shared namespace lock for '{namespace}'"))?; + let mut list: Vec = params .source .list_groups(namespace, ¶ms.local_user) -- 2.47.3